{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "eval_steps": 500, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.002, "grad_norm": 106.3351058959961, "learning_rate": 2e-05, "loss": 2.97638154, "step": 1 }, { "epoch": 0.004, "grad_norm": 272.83465576171875, "learning_rate": 2e-05, "loss": 3.5211587, "step": 2 }, { "epoch": 0.006, "grad_norm": 36.330013275146484, "learning_rate": 2e-05, "loss": 1.36666727, "step": 3 }, { "epoch": 0.008, "grad_norm": 25.315696716308594, "learning_rate": 2e-05, "loss": 0.87812793, "step": 4 }, { "epoch": 0.01, "grad_norm": 35.91809844970703, "learning_rate": 2e-05, "loss": 0.68812144, "step": 5 }, { "epoch": 0.012, "grad_norm": 26.905012130737305, "learning_rate": 2e-05, "loss": 0.70144284, "step": 6 }, { "epoch": 0.014, "grad_norm": 33.107261657714844, "learning_rate": 2e-05, "loss": 0.90835726, "step": 7 }, { "epoch": 0.016, "grad_norm": 35.66191482543945, "learning_rate": 2e-05, "loss": 0.74513012, "step": 8 }, { "epoch": 0.018, "grad_norm": 70.35578918457031, "learning_rate": 2e-05, "loss": 0.73760056, "step": 9 }, { "epoch": 0.02, "grad_norm": 40.839927673339844, "learning_rate": 2e-05, "loss": 0.84170687, "step": 10 }, { "epoch": 0.022, "grad_norm": 23.0489559173584, "learning_rate": 2e-05, "loss": 0.73332345, "step": 11 }, { "epoch": 0.024, "grad_norm": 18.695117950439453, "learning_rate": 2e-05, "loss": 0.71223199, "step": 12 }, { "epoch": 0.026, "grad_norm": 44.652923583984375, "learning_rate": 2e-05, "loss": 0.76524973, "step": 13 }, { "epoch": 0.028, "grad_norm": 55.078460693359375, "learning_rate": 2e-05, "loss": 0.61261988, "step": 14 }, { "epoch": 0.03, "grad_norm": 21.46772575378418, "learning_rate": 2e-05, "loss": 0.631437, "step": 15 }, { "epoch": 0.032, "grad_norm": 37.89278793334961, "learning_rate": 2e-05, "loss": 0.7464875, "step": 16 }, { "epoch": 0.034, "grad_norm": 30.019264221191406, "learning_rate": 2e-05, "loss": 0.87719274, "step": 17 }, { "epoch": 0.036, "grad_norm": 36.052101135253906, "learning_rate": 2e-05, "loss": 0.7440601, "step": 18 }, { "epoch": 0.038, "grad_norm": 26.557836532592773, "learning_rate": 2e-05, "loss": 0.79755795, "step": 19 }, { "epoch": 0.04, "grad_norm": 38.445892333984375, "learning_rate": 2e-05, "loss": 0.83467078, "step": 20 }, { "epoch": 0.042, "grad_norm": 292.89532470703125, "learning_rate": 2e-05, "loss": 0.5564701, "step": 21 }, { "epoch": 0.044, "grad_norm": 66.19842529296875, "learning_rate": 2e-05, "loss": 0.71594197, "step": 22 }, { "epoch": 0.046, "grad_norm": 43.98209762573242, "learning_rate": 2e-05, "loss": 0.72038335, "step": 23 }, { "epoch": 0.048, "grad_norm": 34.50160217285156, "learning_rate": 2e-05, "loss": 0.63889897, "step": 24 }, { "epoch": 0.05, "grad_norm": 21.84002685546875, "learning_rate": 2e-05, "loss": 0.56213444, "step": 25 }, { "epoch": 0.052, "grad_norm": 30.0999698638916, "learning_rate": 2e-05, "loss": 0.6529696, "step": 26 }, { "epoch": 0.054, "grad_norm": 9.198137283325195, "learning_rate": 2e-05, "loss": 0.50230336, "step": 27 }, { "epoch": 0.056, "grad_norm": 22.86176109313965, "learning_rate": 2e-05, "loss": 0.70972103, "step": 28 }, { "epoch": 0.058, "grad_norm": 15.194514274597168, "learning_rate": 2e-05, "loss": 0.51951593, "step": 29 }, { "epoch": 0.06, "grad_norm": 8.040966987609863, "learning_rate": 2e-05, "loss": 0.47542316, "step": 30 }, { "epoch": 0.062, "grad_norm": 15.389135360717773, "learning_rate": 2e-05, "loss": 0.58396137, "step": 31 }, { "epoch": 0.064, "grad_norm": 22.020004272460938, "learning_rate": 2e-05, "loss": 0.54637253, "step": 32 }, { "epoch": 0.066, "grad_norm": 42.19824981689453, "learning_rate": 2e-05, "loss": 0.5568074, "step": 33 }, { "epoch": 0.068, "grad_norm": 39.06177520751953, "learning_rate": 2e-05, "loss": 0.6917187, "step": 34 }, { "epoch": 0.07, "grad_norm": 14.390580177307129, "learning_rate": 2e-05, "loss": 0.59219807, "step": 35 }, { "epoch": 0.072, "grad_norm": 26.983028411865234, "learning_rate": 2e-05, "loss": 0.49751258, "step": 36 }, { "epoch": 0.074, "grad_norm": 12.522932052612305, "learning_rate": 2e-05, "loss": 0.53285646, "step": 37 }, { "epoch": 0.076, "grad_norm": 17.231962203979492, "learning_rate": 2e-05, "loss": 0.51553643, "step": 38 }, { "epoch": 0.078, "grad_norm": 6.541717052459717, "learning_rate": 2e-05, "loss": 0.40074992, "step": 39 }, { "epoch": 0.08, "grad_norm": 36.18123245239258, "learning_rate": 2e-05, "loss": 0.5162462, "step": 40 }, { "epoch": 0.082, "grad_norm": 13.25635051727295, "learning_rate": 2e-05, "loss": 0.53972137, "step": 41 }, { "epoch": 0.084, "grad_norm": 18.840490341186523, "learning_rate": 2e-05, "loss": 0.44445139, "step": 42 }, { "epoch": 0.086, "grad_norm": 10.913798332214355, "learning_rate": 2e-05, "loss": 0.49452934, "step": 43 }, { "epoch": 0.088, "grad_norm": 8.581521034240723, "learning_rate": 2e-05, "loss": 0.43107983, "step": 44 }, { "epoch": 0.09, "grad_norm": 9.29199504852295, "learning_rate": 2e-05, "loss": 0.4090324, "step": 45 }, { "epoch": 0.092, "grad_norm": 5.323775768280029, "learning_rate": 2e-05, "loss": 0.39204675, "step": 46 }, { "epoch": 0.094, "grad_norm": 16.615453720092773, "learning_rate": 2e-05, "loss": 0.48691493, "step": 47 }, { "epoch": 0.096, "grad_norm": 44.74150848388672, "learning_rate": 2e-05, "loss": 0.45611793, "step": 48 }, { "epoch": 0.098, "grad_norm": 22.81415367126465, "learning_rate": 2e-05, "loss": 0.44323951, "step": 49 }, { "epoch": 0.1, "grad_norm": 32.3690299987793, "learning_rate": 2e-05, "loss": 0.44699708, "step": 50 }, { "epoch": 0.102, "grad_norm": 7.717865943908691, "learning_rate": 2e-05, "loss": 0.48019993, "step": 51 }, { "epoch": 0.104, "grad_norm": 7.4963555335998535, "learning_rate": 2e-05, "loss": 0.48855418, "step": 52 }, { "epoch": 0.106, "grad_norm": 23.345155715942383, "learning_rate": 2e-05, "loss": 0.44010609, "step": 53 }, { "epoch": 0.108, "grad_norm": 10.180810928344727, "learning_rate": 2e-05, "loss": 0.44849843, "step": 54 }, { "epoch": 0.11, "grad_norm": 3.067502021789551, "learning_rate": 2e-05, "loss": 0.41234958, "step": 55 }, { "epoch": 0.112, "grad_norm": 4.944139003753662, "learning_rate": 2e-05, "loss": 0.45853633, "step": 56 }, { "epoch": 0.114, "grad_norm": 45.07456588745117, "learning_rate": 2e-05, "loss": 0.38530779, "step": 57 }, { "epoch": 0.116, "grad_norm": 15.853984832763672, "learning_rate": 2e-05, "loss": 0.5157482, "step": 58 }, { "epoch": 0.118, "grad_norm": 12.359729766845703, "learning_rate": 2e-05, "loss": 0.44657743, "step": 59 }, { "epoch": 0.12, "grad_norm": 18.414812088012695, "learning_rate": 2e-05, "loss": 0.43552893, "step": 60 }, { "epoch": 0.122, "grad_norm": 299.7832946777344, "learning_rate": 2e-05, "loss": 0.46864027, "step": 61 }, { "epoch": 0.124, "grad_norm": 47.110591888427734, "learning_rate": 2e-05, "loss": 0.54755104, "step": 62 }, { "epoch": 0.126, "grad_norm": 51.655914306640625, "learning_rate": 2e-05, "loss": 0.61471045, "step": 63 }, { "epoch": 0.128, "grad_norm": 34.72273635864258, "learning_rate": 2e-05, "loss": 0.59823078, "step": 64 }, { "epoch": 0.13, "grad_norm": 20.485750198364258, "learning_rate": 2e-05, "loss": 0.57281041, "step": 65 }, { "epoch": 0.132, "grad_norm": 48.85628890991211, "learning_rate": 2e-05, "loss": 0.62450993, "step": 66 }, { "epoch": 0.134, "grad_norm": 41.954463958740234, "learning_rate": 2e-05, "loss": 0.79015625, "step": 67 }, { "epoch": 0.136, "grad_norm": 166.2735137939453, "learning_rate": 2e-05, "loss": 0.93918943, "step": 68 }, { "epoch": 0.138, "grad_norm": 109.09810638427734, "learning_rate": 2e-05, "loss": 0.75576627, "step": 69 }, { "epoch": 0.14, "grad_norm": 69.08746337890625, "learning_rate": 2e-05, "loss": 0.87483096, "step": 70 }, { "epoch": 0.142, "grad_norm": 53.783870697021484, "learning_rate": 2e-05, "loss": 0.80816042, "step": 71 }, { "epoch": 0.144, "grad_norm": 53.78447341918945, "learning_rate": 2e-05, "loss": 0.76368117, "step": 72 }, { "epoch": 0.146, "grad_norm": 62.41336441040039, "learning_rate": 2e-05, "loss": 0.61411834, "step": 73 }, { "epoch": 0.148, "grad_norm": 57.343257904052734, "learning_rate": 2e-05, "loss": 0.59398806, "step": 74 }, { "epoch": 0.15, "grad_norm": 52.208194732666016, "learning_rate": 2e-05, "loss": 0.53552341, "step": 75 }, { "epoch": 0.152, "grad_norm": 130.47926330566406, "learning_rate": 2e-05, "loss": 0.61511314, "step": 76 }, { "epoch": 0.154, "grad_norm": 42.08031463623047, "learning_rate": 2e-05, "loss": 0.55080497, "step": 77 }, { "epoch": 0.156, "grad_norm": 36.434295654296875, "learning_rate": 2e-05, "loss": 0.61036658, "step": 78 }, { "epoch": 0.158, "grad_norm": 26.579214096069336, "learning_rate": 2e-05, "loss": 0.55669212, "step": 79 }, { "epoch": 0.16, "grad_norm": 28.81806182861328, "learning_rate": 2e-05, "loss": 0.54770672, "step": 80 }, { "epoch": 0.162, "grad_norm": 27.80552864074707, "learning_rate": 2e-05, "loss": 0.54727596, "step": 81 }, { "epoch": 0.164, "grad_norm": 246.13504028320312, "learning_rate": 2e-05, "loss": 0.50458884, "step": 82 }, { "epoch": 0.166, "grad_norm": 15.502361297607422, "learning_rate": 2e-05, "loss": 0.53181148, "step": 83 }, { "epoch": 0.168, "grad_norm": 37.407371520996094, "learning_rate": 2e-05, "loss": 0.55894393, "step": 84 }, { "epoch": 0.17, "grad_norm": 55.06547164916992, "learning_rate": 2e-05, "loss": 0.5384931, "step": 85 }, { "epoch": 0.172, "grad_norm": 27.198009490966797, "learning_rate": 2e-05, "loss": 0.57203805, "step": 86 }, { "epoch": 0.174, "grad_norm": 20.451473236083984, "learning_rate": 2e-05, "loss": 0.46600908, "step": 87 }, { "epoch": 0.176, "grad_norm": 13.885315895080566, "learning_rate": 2e-05, "loss": 0.42258817, "step": 88 }, { "epoch": 0.178, "grad_norm": 13.090973854064941, "learning_rate": 2e-05, "loss": 0.47550941, "step": 89 }, { "epoch": 0.18, "grad_norm": 17.438533782958984, "learning_rate": 2e-05, "loss": 0.45427507, "step": 90 }, { "epoch": 0.182, "grad_norm": 15.374178886413574, "learning_rate": 2e-05, "loss": 0.47588485, "step": 91 }, { "epoch": 0.184, "grad_norm": 21.128461837768555, "learning_rate": 2e-05, "loss": 0.44636747, "step": 92 }, { "epoch": 0.186, "grad_norm": 25.663137435913086, "learning_rate": 2e-05, "loss": 0.4346846, "step": 93 }, { "epoch": 0.188, "grad_norm": 24.24207878112793, "learning_rate": 2e-05, "loss": 0.48590896, "step": 94 }, { "epoch": 0.19, "grad_norm": 15.967377662658691, "learning_rate": 2e-05, "loss": 0.49886051, "step": 95 }, { "epoch": 0.192, "grad_norm": 14.418985366821289, "learning_rate": 2e-05, "loss": 0.38748822, "step": 96 }, { "epoch": 0.194, "grad_norm": 17.987184524536133, "learning_rate": 2e-05, "loss": 0.53771186, "step": 97 }, { "epoch": 0.196, "grad_norm": 14.136760711669922, "learning_rate": 2e-05, "loss": 0.44553316, "step": 98 }, { "epoch": 0.198, "grad_norm": 25.80638313293457, "learning_rate": 2e-05, "loss": 0.56278884, "step": 99 }, { "epoch": 0.2, "grad_norm": 36.95318603515625, "learning_rate": 2e-05, "loss": 0.45035309, "step": 100 }, { "epoch": 0.202, "grad_norm": 23.130966186523438, "learning_rate": 2e-05, "loss": 0.49739093, "step": 101 }, { "epoch": 0.204, "grad_norm": 18.668306350708008, "learning_rate": 2e-05, "loss": 0.52438545, "step": 102 }, { "epoch": 0.206, "grad_norm": 17.244892120361328, "learning_rate": 2e-05, "loss": 0.50640076, "step": 103 }, { "epoch": 0.208, "grad_norm": 14.973573684692383, "learning_rate": 2e-05, "loss": 0.46126199, "step": 104 }, { "epoch": 0.21, "grad_norm": 107.84129333496094, "learning_rate": 2e-05, "loss": 0.48328286, "step": 105 }, { "epoch": 0.212, "grad_norm": 12.581948280334473, "learning_rate": 2e-05, "loss": 0.40299821, "step": 106 }, { "epoch": 0.214, "grad_norm": 122.16223907470703, "learning_rate": 2e-05, "loss": 0.49847573, "step": 107 }, { "epoch": 0.216, "grad_norm": 2467.09912109375, "learning_rate": 2e-05, "loss": 0.44365385, "step": 108 }, { "epoch": 0.218, "grad_norm": 20.038747787475586, "learning_rate": 2e-05, "loss": 0.53577441, "step": 109 }, { "epoch": 0.22, "grad_norm": 18.071563720703125, "learning_rate": 2e-05, "loss": 0.53914034, "step": 110 }, { "epoch": 0.222, "grad_norm": 28.213003158569336, "learning_rate": 2e-05, "loss": 0.49175298, "step": 111 }, { "epoch": 0.224, "grad_norm": 12.749397277832031, "learning_rate": 2e-05, "loss": 0.45982122, "step": 112 }, { "epoch": 0.226, "grad_norm": 14.801767349243164, "learning_rate": 2e-05, "loss": 0.45866692, "step": 113 }, { "epoch": 0.228, "grad_norm": 15.233878135681152, "learning_rate": 2e-05, "loss": 0.50253963, "step": 114 }, { "epoch": 0.23, "grad_norm": 16.621849060058594, "learning_rate": 2e-05, "loss": 0.41842964, "step": 115 }, { "epoch": 0.232, "grad_norm": 12.910942077636719, "learning_rate": 2e-05, "loss": 0.48191142, "step": 116 }, { "epoch": 0.234, "grad_norm": 13.818432807922363, "learning_rate": 2e-05, "loss": 0.52697116, "step": 117 }, { "epoch": 0.236, "grad_norm": 573.7451782226562, "learning_rate": 2e-05, "loss": 0.44657373, "step": 118 }, { "epoch": 0.238, "grad_norm": 13.285164833068848, "learning_rate": 2e-05, "loss": 0.47332335, "step": 119 }, { "epoch": 0.24, "grad_norm": 13.838842391967773, "learning_rate": 2e-05, "loss": 0.36773446, "step": 120 }, { "epoch": 0.242, "grad_norm": 15.649510383605957, "learning_rate": 2e-05, "loss": 0.40493056, "step": 121 }, { "epoch": 0.244, "grad_norm": 12.385529518127441, "learning_rate": 2e-05, "loss": 0.43755823, "step": 122 }, { "epoch": 0.246, "grad_norm": 11.90880298614502, "learning_rate": 2e-05, "loss": 0.46447501, "step": 123 }, { "epoch": 0.248, "grad_norm": 19.716524124145508, "learning_rate": 2e-05, "loss": 0.46195307, "step": 124 }, { "epoch": 0.25, "grad_norm": 12.739131927490234, "learning_rate": 2e-05, "loss": 0.43450442, "step": 125 }, { "epoch": 0.252, "grad_norm": 14.855384826660156, "learning_rate": 2e-05, "loss": 0.46269423, "step": 126 }, { "epoch": 0.254, "grad_norm": 11.975515365600586, "learning_rate": 2e-05, "loss": 0.45238233, "step": 127 }, { "epoch": 0.256, "grad_norm": 11.805922508239746, "learning_rate": 2e-05, "loss": 0.44513977, "step": 128 }, { "epoch": 0.258, "grad_norm": 95.38381958007812, "learning_rate": 2e-05, "loss": 0.46242535, "step": 129 }, { "epoch": 0.26, "grad_norm": 13.660318374633789, "learning_rate": 2e-05, "loss": 0.42104715, "step": 130 }, { "epoch": 0.262, "grad_norm": 14.528120040893555, "learning_rate": 2e-05, "loss": 0.45455492, "step": 131 }, { "epoch": 0.264, "grad_norm": 11.612198829650879, "learning_rate": 2e-05, "loss": 0.47833079, "step": 132 }, { "epoch": 0.266, "grad_norm": 12.275348663330078, "learning_rate": 2e-05, "loss": 0.4781037, "step": 133 }, { "epoch": 0.268, "grad_norm": 10.271618843078613, "learning_rate": 2e-05, "loss": 0.41165605, "step": 134 }, { "epoch": 0.27, "grad_norm": 7.801446914672852, "learning_rate": 2e-05, "loss": 0.42807937, "step": 135 }, { "epoch": 0.272, "grad_norm": 14.0069580078125, "learning_rate": 2e-05, "loss": 0.41604611, "step": 136 }, { "epoch": 0.274, "grad_norm": 15.768139839172363, "learning_rate": 2e-05, "loss": 0.39413089, "step": 137 }, { "epoch": 0.276, "grad_norm": 11.007628440856934, "learning_rate": 2e-05, "loss": 0.45819357, "step": 138 }, { "epoch": 0.278, "grad_norm": 9.272838592529297, "learning_rate": 2e-05, "loss": 0.41990653, "step": 139 }, { "epoch": 0.28, "grad_norm": 9.490700721740723, "learning_rate": 2e-05, "loss": 0.44716167, "step": 140 }, { "epoch": 0.282, "grad_norm": 24.454200744628906, "learning_rate": 2e-05, "loss": 0.39211977, "step": 141 }, { "epoch": 0.284, "grad_norm": 11.137938499450684, "learning_rate": 2e-05, "loss": 0.45704901, "step": 142 }, { "epoch": 0.286, "grad_norm": 10.667425155639648, "learning_rate": 2e-05, "loss": 0.48869911, "step": 143 }, { "epoch": 0.288, "grad_norm": 21.238933563232422, "learning_rate": 2e-05, "loss": 0.44154745, "step": 144 }, { "epoch": 0.29, "grad_norm": 9.602286338806152, "learning_rate": 2e-05, "loss": 0.38942307, "step": 145 }, { "epoch": 0.292, "grad_norm": 10.36988353729248, "learning_rate": 2e-05, "loss": 0.44821233, "step": 146 }, { "epoch": 0.294, "grad_norm": 12.892916679382324, "learning_rate": 2e-05, "loss": 0.41553789, "step": 147 }, { "epoch": 0.296, "grad_norm": 13.940518379211426, "learning_rate": 2e-05, "loss": 0.39861506, "step": 148 }, { "epoch": 0.298, "grad_norm": 11.218053817749023, "learning_rate": 2e-05, "loss": 0.40667194, "step": 149 }, { "epoch": 0.3, "grad_norm": 8.908164024353027, "learning_rate": 2e-05, "loss": 0.43225476, "step": 150 }, { "epoch": 0.302, "grad_norm": 10.960769653320312, "learning_rate": 2e-05, "loss": 0.36067069, "step": 151 }, { "epoch": 0.304, "grad_norm": 8.613821983337402, "learning_rate": 2e-05, "loss": 0.43313751, "step": 152 }, { "epoch": 0.306, "grad_norm": 8.52908992767334, "learning_rate": 2e-05, "loss": 0.39631498, "step": 153 }, { "epoch": 0.308, "grad_norm": 9.934247016906738, "learning_rate": 2e-05, "loss": 0.38983732, "step": 154 }, { "epoch": 0.31, "grad_norm": 9.010107040405273, "learning_rate": 2e-05, "loss": 0.42385882, "step": 155 }, { "epoch": 0.312, "grad_norm": 8.398744583129883, "learning_rate": 2e-05, "loss": 0.38752389, "step": 156 }, { "epoch": 0.314, "grad_norm": 9.783997535705566, "learning_rate": 2e-05, "loss": 0.43464756, "step": 157 }, { "epoch": 0.316, "grad_norm": 9.42183780670166, "learning_rate": 2e-05, "loss": 0.47586909, "step": 158 }, { "epoch": 0.318, "grad_norm": 395.812744140625, "learning_rate": 2e-05, "loss": 0.38081068, "step": 159 }, { "epoch": 0.32, "grad_norm": 10.035112380981445, "learning_rate": 2e-05, "loss": 0.37909234, "step": 160 }, { "epoch": 0.322, "grad_norm": 10.719354629516602, "learning_rate": 2e-05, "loss": 0.42697549, "step": 161 }, { "epoch": 0.324, "grad_norm": 9.908405303955078, "learning_rate": 2e-05, "loss": 0.38544208, "step": 162 }, { "epoch": 0.326, "grad_norm": 14.891396522521973, "learning_rate": 2e-05, "loss": 0.43682563, "step": 163 }, { "epoch": 0.328, "grad_norm": 21.249048233032227, "learning_rate": 2e-05, "loss": 0.38770065, "step": 164 }, { "epoch": 0.33, "grad_norm": 9.066083908081055, "learning_rate": 2e-05, "loss": 0.34423929, "step": 165 }, { "epoch": 0.332, "grad_norm": 18.91913604736328, "learning_rate": 2e-05, "loss": 0.38416678, "step": 166 }, { "epoch": 0.334, "grad_norm": 13.15003776550293, "learning_rate": 2e-05, "loss": 0.41496092, "step": 167 }, { "epoch": 0.336, "grad_norm": 11.459242820739746, "learning_rate": 2e-05, "loss": 0.4813475, "step": 168 }, { "epoch": 0.338, "grad_norm": 12.327369689941406, "learning_rate": 2e-05, "loss": 0.43969703, "step": 169 }, { "epoch": 0.34, "grad_norm": 8.72236442565918, "learning_rate": 2e-05, "loss": 0.43106002, "step": 170 }, { "epoch": 0.342, "grad_norm": 10.84215259552002, "learning_rate": 2e-05, "loss": 0.42379761, "step": 171 }, { "epoch": 0.344, "grad_norm": 13.493989944458008, "learning_rate": 2e-05, "loss": 0.42502257, "step": 172 }, { "epoch": 0.346, "grad_norm": 9.381986618041992, "learning_rate": 2e-05, "loss": 0.42881054, "step": 173 }, { "epoch": 0.348, "grad_norm": 10.391239166259766, "learning_rate": 2e-05, "loss": 0.42760158, "step": 174 }, { "epoch": 0.35, "grad_norm": 9.514788627624512, "learning_rate": 2e-05, "loss": 0.4439213, "step": 175 }, { "epoch": 0.352, "grad_norm": 8.54010009765625, "learning_rate": 2e-05, "loss": 0.42852497, "step": 176 }, { "epoch": 0.354, "grad_norm": 9.419174194335938, "learning_rate": 2e-05, "loss": 0.42105347, "step": 177 }, { "epoch": 0.356, "grad_norm": 10.54394817352295, "learning_rate": 2e-05, "loss": 0.42986929, "step": 178 }, { "epoch": 0.358, "grad_norm": 9.84325122833252, "learning_rate": 2e-05, "loss": 0.46499085, "step": 179 }, { "epoch": 0.36, "grad_norm": 13.598509788513184, "learning_rate": 2e-05, "loss": 0.41894197, "step": 180 }, { "epoch": 0.362, "grad_norm": 12.252158164978027, "learning_rate": 2e-05, "loss": 0.43681717, "step": 181 }, { "epoch": 0.364, "grad_norm": 10.102478981018066, "learning_rate": 2e-05, "loss": 0.44088128, "step": 182 }, { "epoch": 0.366, "grad_norm": 10.13042163848877, "learning_rate": 2e-05, "loss": 0.4017719, "step": 183 }, { "epoch": 0.368, "grad_norm": 9.021408081054688, "learning_rate": 2e-05, "loss": 0.41342515, "step": 184 }, { "epoch": 0.37, "grad_norm": 11.439295768737793, "learning_rate": 2e-05, "loss": 0.42541078, "step": 185 }, { "epoch": 0.372, "grad_norm": 11.0103759765625, "learning_rate": 2e-05, "loss": 0.38462064, "step": 186 }, { "epoch": 0.374, "grad_norm": 12.478909492492676, "learning_rate": 2e-05, "loss": 0.48205873, "step": 187 }, { "epoch": 0.376, "grad_norm": 10.006922721862793, "learning_rate": 2e-05, "loss": 0.38860631, "step": 188 }, { "epoch": 0.378, "grad_norm": 9.058837890625, "learning_rate": 2e-05, "loss": 0.44789264, "step": 189 }, { "epoch": 0.38, "grad_norm": 11.576780319213867, "learning_rate": 2e-05, "loss": 0.37894994, "step": 190 }, { "epoch": 0.382, "grad_norm": 9.028461456298828, "learning_rate": 2e-05, "loss": 0.43965554, "step": 191 }, { "epoch": 0.384, "grad_norm": 9.708847999572754, "learning_rate": 2e-05, "loss": 0.40843514, "step": 192 }, { "epoch": 0.386, "grad_norm": 9.772795677185059, "learning_rate": 2e-05, "loss": 0.34363228, "step": 193 }, { "epoch": 0.388, "grad_norm": 9.10479736328125, "learning_rate": 2e-05, "loss": 0.40338659, "step": 194 }, { "epoch": 0.39, "grad_norm": 11.163968086242676, "learning_rate": 2e-05, "loss": 0.44817132, "step": 195 }, { "epoch": 0.392, "grad_norm": 10.096890449523926, "learning_rate": 2e-05, "loss": 0.41826627, "step": 196 }, { "epoch": 0.394, "grad_norm": 9.3131742477417, "learning_rate": 2e-05, "loss": 0.44676968, "step": 197 }, { "epoch": 0.396, "grad_norm": 9.904888153076172, "learning_rate": 2e-05, "loss": 0.43615833, "step": 198 }, { "epoch": 0.398, "grad_norm": 9.178487777709961, "learning_rate": 2e-05, "loss": 0.42178857, "step": 199 }, { "epoch": 0.4, "grad_norm": 9.929298400878906, "learning_rate": 2e-05, "loss": 0.3757419, "step": 200 }, { "epoch": 0.402, "grad_norm": 10.286604881286621, "learning_rate": 2e-05, "loss": 0.447451, "step": 201 }, { "epoch": 0.404, "grad_norm": 12.059324264526367, "learning_rate": 2e-05, "loss": 0.41405517, "step": 202 }, { "epoch": 0.406, "grad_norm": 10.0454683303833, "learning_rate": 2e-05, "loss": 0.47724348, "step": 203 }, { "epoch": 0.408, "grad_norm": 10.583747863769531, "learning_rate": 2e-05, "loss": 0.43451419, "step": 204 }, { "epoch": 0.41, "grad_norm": 9.788122177124023, "learning_rate": 2e-05, "loss": 0.43504053, "step": 205 }, { "epoch": 0.412, "grad_norm": 9.68225383758545, "learning_rate": 2e-05, "loss": 0.43267208, "step": 206 }, { "epoch": 0.414, "grad_norm": 9.53421688079834, "learning_rate": 2e-05, "loss": 0.42052275, "step": 207 }, { "epoch": 0.416, "grad_norm": 11.584818840026855, "learning_rate": 2e-05, "loss": 0.41782227, "step": 208 }, { "epoch": 0.418, "grad_norm": 10.827794075012207, "learning_rate": 2e-05, "loss": 0.39735281, "step": 209 }, { "epoch": 0.42, "grad_norm": 11.823992729187012, "learning_rate": 2e-05, "loss": 0.39076281, "step": 210 }, { "epoch": 0.422, "grad_norm": 14.375153541564941, "learning_rate": 2e-05, "loss": 0.38367137, "step": 211 }, { "epoch": 0.424, "grad_norm": 8.833474159240723, "learning_rate": 2e-05, "loss": 0.39642403, "step": 212 }, { "epoch": 0.426, "grad_norm": 9.101374626159668, "learning_rate": 2e-05, "loss": 0.37987879, "step": 213 }, { "epoch": 0.428, "grad_norm": 10.408185005187988, "learning_rate": 2e-05, "loss": 0.43505937, "step": 214 }, { "epoch": 0.43, "grad_norm": 11.4240083694458, "learning_rate": 2e-05, "loss": 0.42941645, "step": 215 }, { "epoch": 0.432, "grad_norm": 9.909538269042969, "learning_rate": 2e-05, "loss": 0.42284393, "step": 216 }, { "epoch": 0.434, "grad_norm": 9.866671562194824, "learning_rate": 2e-05, "loss": 0.46035844, "step": 217 }, { "epoch": 0.436, "grad_norm": 9.53433895111084, "learning_rate": 2e-05, "loss": 0.4272083, "step": 218 }, { "epoch": 0.438, "grad_norm": 9.214035987854004, "learning_rate": 2e-05, "loss": 0.36724138, "step": 219 }, { "epoch": 0.44, "grad_norm": 8.800764083862305, "learning_rate": 2e-05, "loss": 0.39726126, "step": 220 }, { "epoch": 0.442, "grad_norm": 8.15795612335205, "learning_rate": 2e-05, "loss": 0.42147645, "step": 221 }, { "epoch": 0.444, "grad_norm": 8.553804397583008, "learning_rate": 2e-05, "loss": 0.42537025, "step": 222 }, { "epoch": 0.446, "grad_norm": 7.93506383895874, "learning_rate": 2e-05, "loss": 0.38129061, "step": 223 }, { "epoch": 0.448, "grad_norm": 9.668242454528809, "learning_rate": 2e-05, "loss": 0.37951964, "step": 224 }, { "epoch": 0.45, "grad_norm": 8.776659965515137, "learning_rate": 2e-05, "loss": 0.48104104, "step": 225 }, { "epoch": 0.452, "grad_norm": 9.964150428771973, "learning_rate": 2e-05, "loss": 0.38302588, "step": 226 }, { "epoch": 0.454, "grad_norm": 10.77599811553955, "learning_rate": 2e-05, "loss": 0.42485452, "step": 227 }, { "epoch": 0.456, "grad_norm": 8.449227333068848, "learning_rate": 2e-05, "loss": 0.40950543, "step": 228 }, { "epoch": 0.458, "grad_norm": 9.343338012695312, "learning_rate": 2e-05, "loss": 0.3956787, "step": 229 }, { "epoch": 0.46, "grad_norm": 8.55199909210205, "learning_rate": 2e-05, "loss": 0.38624418, "step": 230 }, { "epoch": 0.462, "grad_norm": 8.833600044250488, "learning_rate": 2e-05, "loss": 0.4036369, "step": 231 }, { "epoch": 0.464, "grad_norm": 9.766615867614746, "learning_rate": 2e-05, "loss": 0.37018943, "step": 232 }, { "epoch": 0.466, "grad_norm": 8.233248710632324, "learning_rate": 2e-05, "loss": 0.33328718, "step": 233 }, { "epoch": 0.468, "grad_norm": 8.763534545898438, "learning_rate": 2e-05, "loss": 0.39497456, "step": 234 }, { "epoch": 0.47, "grad_norm": 9.405655860900879, "learning_rate": 2e-05, "loss": 0.35139692, "step": 235 }, { "epoch": 0.472, "grad_norm": 10.097919464111328, "learning_rate": 2e-05, "loss": 0.37612265, "step": 236 }, { "epoch": 0.474, "grad_norm": 9.268573760986328, "learning_rate": 2e-05, "loss": 0.42285377, "step": 237 }, { "epoch": 0.476, "grad_norm": 10.747326850891113, "learning_rate": 2e-05, "loss": 0.44634187, "step": 238 }, { "epoch": 0.478, "grad_norm": 10.302854537963867, "learning_rate": 2e-05, "loss": 0.33439553, "step": 239 }, { "epoch": 0.48, "grad_norm": 15.384054183959961, "learning_rate": 2e-05, "loss": 0.44443309, "step": 240 }, { "epoch": 0.482, "grad_norm": 7.4963555335998535, "learning_rate": 2e-05, "loss": 0.41940176, "step": 241 }, { "epoch": 0.484, "grad_norm": 7.963672161102295, "learning_rate": 2e-05, "loss": 0.43743706, "step": 242 }, { "epoch": 0.486, "grad_norm": 14.842870712280273, "learning_rate": 2e-05, "loss": 0.3921411, "step": 243 }, { "epoch": 0.488, "grad_norm": 9.910212516784668, "learning_rate": 2e-05, "loss": 0.38945308, "step": 244 }, { "epoch": 0.49, "grad_norm": 10.475669860839844, "learning_rate": 2e-05, "loss": 0.41762626, "step": 245 }, { "epoch": 0.492, "grad_norm": 8.770633697509766, "learning_rate": 2e-05, "loss": 0.37912223, "step": 246 }, { "epoch": 0.494, "grad_norm": 12.145181655883789, "learning_rate": 2e-05, "loss": 0.34887606, "step": 247 }, { "epoch": 0.496, "grad_norm": 12.38477897644043, "learning_rate": 2e-05, "loss": 0.39244926, "step": 248 }, { "epoch": 0.498, "grad_norm": 9.446784973144531, "learning_rate": 2e-05, "loss": 0.43521994, "step": 249 }, { "epoch": 0.5, "grad_norm": 8.600379943847656, "learning_rate": 2e-05, "loss": 0.39659691, "step": 250 }, { "epoch": 0.502, "grad_norm": 8.290218353271484, "learning_rate": 2e-05, "loss": 0.43629667, "step": 251 }, { "epoch": 0.504, "grad_norm": 9.408143997192383, "learning_rate": 2e-05, "loss": 0.43271595, "step": 252 }, { "epoch": 0.506, "grad_norm": 9.678403854370117, "learning_rate": 2e-05, "loss": 0.37272936, "step": 253 }, { "epoch": 0.508, "grad_norm": 8.974466323852539, "learning_rate": 2e-05, "loss": 0.39126772, "step": 254 }, { "epoch": 0.51, "grad_norm": 9.532995223999023, "learning_rate": 2e-05, "loss": 0.39947736, "step": 255 }, { "epoch": 0.512, "grad_norm": 10.349555015563965, "learning_rate": 2e-05, "loss": 0.4188467, "step": 256 }, { "epoch": 0.514, "grad_norm": 8.810914993286133, "learning_rate": 2e-05, "loss": 0.38613635, "step": 257 }, { "epoch": 0.516, "grad_norm": 7.506891250610352, "learning_rate": 2e-05, "loss": 0.36956555, "step": 258 }, { "epoch": 0.518, "grad_norm": 6.91972541809082, "learning_rate": 2e-05, "loss": 0.39109254, "step": 259 }, { "epoch": 0.52, "grad_norm": 12.392112731933594, "learning_rate": 2e-05, "loss": 0.37215352, "step": 260 }, { "epoch": 0.522, "grad_norm": 8.929586410522461, "learning_rate": 2e-05, "loss": 0.42977282, "step": 261 }, { "epoch": 0.524, "grad_norm": 15.293289184570312, "learning_rate": 2e-05, "loss": 0.40764329, "step": 262 }, { "epoch": 0.526, "grad_norm": 9.751267433166504, "learning_rate": 2e-05, "loss": 0.38308442, "step": 263 }, { "epoch": 0.528, "grad_norm": 8.307703971862793, "learning_rate": 2e-05, "loss": 0.38924426, "step": 264 }, { "epoch": 0.53, "grad_norm": 9.36828327178955, "learning_rate": 2e-05, "loss": 0.46595907, "step": 265 }, { "epoch": 0.532, "grad_norm": 8.520101547241211, "learning_rate": 2e-05, "loss": 0.4105033, "step": 266 }, { "epoch": 0.534, "grad_norm": 9.048510551452637, "learning_rate": 2e-05, "loss": 0.33322144, "step": 267 }, { "epoch": 0.536, "grad_norm": 10.200215339660645, "learning_rate": 2e-05, "loss": 0.36372185, "step": 268 }, { "epoch": 0.538, "grad_norm": 9.192442893981934, "learning_rate": 2e-05, "loss": 0.43131557, "step": 269 }, { "epoch": 0.54, "grad_norm": 9.150290489196777, "learning_rate": 2e-05, "loss": 0.36910906, "step": 270 }, { "epoch": 0.542, "grad_norm": 8.008354187011719, "learning_rate": 2e-05, "loss": 0.40895599, "step": 271 }, { "epoch": 0.544, "grad_norm": 7.537794589996338, "learning_rate": 2e-05, "loss": 0.40643951, "step": 272 }, { "epoch": 0.546, "grad_norm": 16.45746421813965, "learning_rate": 2e-05, "loss": 0.43084574, "step": 273 }, { "epoch": 0.548, "grad_norm": 7.935338973999023, "learning_rate": 2e-05, "loss": 0.42419183, "step": 274 }, { "epoch": 0.55, "grad_norm": 8.415511131286621, "learning_rate": 2e-05, "loss": 0.47662529, "step": 275 }, { "epoch": 0.552, "grad_norm": 10.13573169708252, "learning_rate": 2e-05, "loss": 0.36752117, "step": 276 }, { "epoch": 0.554, "grad_norm": 8.261918067932129, "learning_rate": 2e-05, "loss": 0.43175328, "step": 277 }, { "epoch": 0.556, "grad_norm": 9.257563591003418, "learning_rate": 2e-05, "loss": 0.39658278, "step": 278 }, { "epoch": 0.558, "grad_norm": 7.06180477142334, "learning_rate": 2e-05, "loss": 0.33129516, "step": 279 }, { "epoch": 0.56, "grad_norm": 8.67619800567627, "learning_rate": 2e-05, "loss": 0.40633339, "step": 280 }, { "epoch": 0.562, "grad_norm": 9.376317977905273, "learning_rate": 2e-05, "loss": 0.41355729, "step": 281 }, { "epoch": 0.564, "grad_norm": 7.655827045440674, "learning_rate": 2e-05, "loss": 0.37823072, "step": 282 }, { "epoch": 0.566, "grad_norm": 7.874055862426758, "learning_rate": 2e-05, "loss": 0.40991163, "step": 283 }, { "epoch": 0.568, "grad_norm": 9.009245872497559, "learning_rate": 2e-05, "loss": 0.37317026, "step": 284 }, { "epoch": 0.57, "grad_norm": 11.411980628967285, "learning_rate": 2e-05, "loss": 0.40589482, "step": 285 }, { "epoch": 0.572, "grad_norm": 9.214781761169434, "learning_rate": 2e-05, "loss": 0.39573234, "step": 286 }, { "epoch": 0.574, "grad_norm": 10.63353157043457, "learning_rate": 2e-05, "loss": 0.45614642, "step": 287 }, { "epoch": 0.576, "grad_norm": 9.073410987854004, "learning_rate": 2e-05, "loss": 0.35370085, "step": 288 }, { "epoch": 0.578, "grad_norm": 9.990442276000977, "learning_rate": 2e-05, "loss": 0.45788735, "step": 289 }, { "epoch": 0.58, "grad_norm": 11.224091529846191, "learning_rate": 2e-05, "loss": 0.3790397, "step": 290 }, { "epoch": 0.582, "grad_norm": 9.14792251586914, "learning_rate": 2e-05, "loss": 0.41681296, "step": 291 }, { "epoch": 0.584, "grad_norm": 10.190142631530762, "learning_rate": 2e-05, "loss": 0.37804562, "step": 292 }, { "epoch": 0.586, "grad_norm": 8.600480079650879, "learning_rate": 2e-05, "loss": 0.3269605, "step": 293 }, { "epoch": 0.588, "grad_norm": 9.318472862243652, "learning_rate": 2e-05, "loss": 0.37328252, "step": 294 }, { "epoch": 0.59, "grad_norm": 10.855989456176758, "learning_rate": 2e-05, "loss": 0.40683055, "step": 295 }, { "epoch": 0.592, "grad_norm": 9.69549560546875, "learning_rate": 2e-05, "loss": 0.4051528, "step": 296 }, { "epoch": 0.594, "grad_norm": 11.162879943847656, "learning_rate": 2e-05, "loss": 0.36768031, "step": 297 }, { "epoch": 0.596, "grad_norm": 7.795416355133057, "learning_rate": 2e-05, "loss": 0.40369979, "step": 298 }, { "epoch": 0.598, "grad_norm": 9.13132381439209, "learning_rate": 2e-05, "loss": 0.38117248, "step": 299 }, { "epoch": 0.6, "grad_norm": 8.75375747680664, "learning_rate": 2e-05, "loss": 0.36274794, "step": 300 }, { "epoch": 0.602, "grad_norm": 13.761502265930176, "learning_rate": 2e-05, "loss": 0.36813289, "step": 301 }, { "epoch": 0.604, "grad_norm": 8.565608024597168, "learning_rate": 2e-05, "loss": 0.41786134, "step": 302 }, { "epoch": 0.606, "grad_norm": 7.108330249786377, "learning_rate": 2e-05, "loss": 0.37630647, "step": 303 }, { "epoch": 0.608, "grad_norm": 10.36870002746582, "learning_rate": 2e-05, "loss": 0.38532969, "step": 304 }, { "epoch": 0.61, "grad_norm": 8.469847679138184, "learning_rate": 2e-05, "loss": 0.39516446, "step": 305 }, { "epoch": 0.612, "grad_norm": 8.46594524383545, "learning_rate": 2e-05, "loss": 0.37413603, "step": 306 }, { "epoch": 0.614, "grad_norm": 14.589864730834961, "learning_rate": 2e-05, "loss": 0.29644993, "step": 307 }, { "epoch": 0.616, "grad_norm": 8.893792152404785, "learning_rate": 2e-05, "loss": 0.35566697, "step": 308 }, { "epoch": 0.618, "grad_norm": 8.258418083190918, "learning_rate": 2e-05, "loss": 0.3272469, "step": 309 }, { "epoch": 0.62, "grad_norm": 9.293591499328613, "learning_rate": 2e-05, "loss": 0.34731215, "step": 310 }, { "epoch": 0.622, "grad_norm": 14.461922645568848, "learning_rate": 2e-05, "loss": 0.32006049, "step": 311 }, { "epoch": 0.624, "grad_norm": 8.968352317810059, "learning_rate": 2e-05, "loss": 0.35092264, "step": 312 }, { "epoch": 0.626, "grad_norm": 8.938387870788574, "learning_rate": 2e-05, "loss": 0.34511858, "step": 313 }, { "epoch": 0.628, "grad_norm": 8.786062240600586, "learning_rate": 2e-05, "loss": 0.40028524, "step": 314 }, { "epoch": 0.63, "grad_norm": 10.78768253326416, "learning_rate": 2e-05, "loss": 0.38850594, "step": 315 }, { "epoch": 0.632, "grad_norm": 7.829494953155518, "learning_rate": 2e-05, "loss": 0.38516849, "step": 316 }, { "epoch": 0.634, "grad_norm": 9.96316909790039, "learning_rate": 2e-05, "loss": 0.36188728, "step": 317 }, { "epoch": 0.636, "grad_norm": 11.885942459106445, "learning_rate": 2e-05, "loss": 0.34584537, "step": 318 }, { "epoch": 0.638, "grad_norm": 9.275619506835938, "learning_rate": 2e-05, "loss": 0.34251964, "step": 319 }, { "epoch": 0.64, "grad_norm": 9.044888496398926, "learning_rate": 2e-05, "loss": 0.30762786, "step": 320 }, { "epoch": 0.642, "grad_norm": 7.556959629058838, "learning_rate": 2e-05, "loss": 0.30177569, "step": 321 }, { "epoch": 0.644, "grad_norm": 9.568028450012207, "learning_rate": 2e-05, "loss": 0.31075782, "step": 322 }, { "epoch": 0.646, "grad_norm": 8.744812965393066, "learning_rate": 2e-05, "loss": 0.34825951, "step": 323 }, { "epoch": 0.648, "grad_norm": 9.135826110839844, "learning_rate": 2e-05, "loss": 0.33437163, "step": 324 }, { "epoch": 0.65, "grad_norm": 8.573902130126953, "learning_rate": 2e-05, "loss": 0.31455866, "step": 325 }, { "epoch": 0.652, "grad_norm": 12.827311515808105, "learning_rate": 2e-05, "loss": 0.26615748, "step": 326 }, { "epoch": 0.654, "grad_norm": 10.199881553649902, "learning_rate": 2e-05, "loss": 0.29439351, "step": 327 }, { "epoch": 0.656, "grad_norm": 8.730032920837402, "learning_rate": 2e-05, "loss": 0.30788225, "step": 328 }, { "epoch": 0.658, "grad_norm": 9.316007614135742, "learning_rate": 2e-05, "loss": 0.31494516, "step": 329 }, { "epoch": 0.66, "grad_norm": 9.466320037841797, "learning_rate": 2e-05, "loss": 0.33635843, "step": 330 }, { "epoch": 0.662, "grad_norm": 9.028986930847168, "learning_rate": 2e-05, "loss": 0.32101226, "step": 331 }, { "epoch": 0.664, "grad_norm": 11.648183822631836, "learning_rate": 2e-05, "loss": 0.27512947, "step": 332 }, { "epoch": 0.666, "grad_norm": 8.862263679504395, "learning_rate": 2e-05, "loss": 0.30526465, "step": 333 }, { "epoch": 0.668, "grad_norm": 8.109395027160645, "learning_rate": 2e-05, "loss": 0.30785012, "step": 334 }, { "epoch": 0.67, "grad_norm": 8.084332466125488, "learning_rate": 2e-05, "loss": 0.27569765, "step": 335 }, { "epoch": 0.672, "grad_norm": 8.40103530883789, "learning_rate": 2e-05, "loss": 0.31003582, "step": 336 }, { "epoch": 0.674, "grad_norm": 7.589168548583984, "learning_rate": 2e-05, "loss": 0.2965619, "step": 337 }, { "epoch": 0.676, "grad_norm": 8.535799980163574, "learning_rate": 2e-05, "loss": 0.26852351, "step": 338 }, { "epoch": 0.678, "grad_norm": 9.64848804473877, "learning_rate": 2e-05, "loss": 0.28235379, "step": 339 }, { "epoch": 0.68, "grad_norm": 8.375736236572266, "learning_rate": 2e-05, "loss": 0.26247099, "step": 340 }, { "epoch": 0.682, "grad_norm": 8.637928009033203, "learning_rate": 2e-05, "loss": 0.28399277, "step": 341 }, { "epoch": 0.684, "grad_norm": 7.631738185882568, "learning_rate": 2e-05, "loss": 0.26243538, "step": 342 }, { "epoch": 0.686, "grad_norm": 8.186356544494629, "learning_rate": 2e-05, "loss": 0.30757102, "step": 343 }, { "epoch": 0.688, "grad_norm": 8.129582405090332, "learning_rate": 2e-05, "loss": 0.27535224, "step": 344 }, { "epoch": 0.69, "grad_norm": 9.501965522766113, "learning_rate": 2e-05, "loss": 0.27736142, "step": 345 }, { "epoch": 0.692, "grad_norm": 9.081802368164062, "learning_rate": 2e-05, "loss": 0.2557798, "step": 346 }, { "epoch": 0.694, "grad_norm": 8.457703590393066, "learning_rate": 2e-05, "loss": 0.27636623, "step": 347 }, { "epoch": 0.696, "grad_norm": 9.414407730102539, "learning_rate": 2e-05, "loss": 0.22419247, "step": 348 }, { "epoch": 0.698, "grad_norm": 7.689733982086182, "learning_rate": 2e-05, "loss": 0.27330619, "step": 349 }, { "epoch": 0.7, "grad_norm": 9.236947059631348, "learning_rate": 2e-05, "loss": 0.26299375, "step": 350 }, { "epoch": 0.702, "grad_norm": 7.239039897918701, "learning_rate": 2e-05, "loss": 0.2583639, "step": 351 }, { "epoch": 0.704, "grad_norm": 8.369779586791992, "learning_rate": 2e-05, "loss": 0.28978628, "step": 352 }, { "epoch": 0.706, "grad_norm": 7.378870010375977, "learning_rate": 2e-05, "loss": 0.27083173, "step": 353 }, { "epoch": 0.708, "grad_norm": 7.452456951141357, "learning_rate": 2e-05, "loss": 0.23647673, "step": 354 }, { "epoch": 0.71, "grad_norm": 8.29498291015625, "learning_rate": 2e-05, "loss": 0.26737219, "step": 355 }, { "epoch": 0.712, "grad_norm": 7.587810516357422, "learning_rate": 2e-05, "loss": 0.25613093, "step": 356 }, { "epoch": 0.714, "grad_norm": 9.508119583129883, "learning_rate": 2e-05, "loss": 0.28915808, "step": 357 }, { "epoch": 0.716, "grad_norm": 8.8358793258667, "learning_rate": 2e-05, "loss": 0.26786244, "step": 358 }, { "epoch": 0.718, "grad_norm": 8.760651588439941, "learning_rate": 2e-05, "loss": 0.23989725, "step": 359 }, { "epoch": 0.72, "grad_norm": 8.099164009094238, "learning_rate": 2e-05, "loss": 0.2592206, "step": 360 }, { "epoch": 0.722, "grad_norm": 8.189947128295898, "learning_rate": 2e-05, "loss": 0.2426452, "step": 361 }, { "epoch": 0.724, "grad_norm": 8.329895973205566, "learning_rate": 2e-05, "loss": 0.25776201, "step": 362 }, { "epoch": 0.726, "grad_norm": 9.17768669128418, "learning_rate": 2e-05, "loss": 0.26782098, "step": 363 }, { "epoch": 0.728, "grad_norm": 8.237401008605957, "learning_rate": 2e-05, "loss": 0.25413582, "step": 364 }, { "epoch": 0.73, "grad_norm": 8.999911308288574, "learning_rate": 2e-05, "loss": 0.26776153, "step": 365 }, { "epoch": 0.732, "grad_norm": 8.053738594055176, "learning_rate": 2e-05, "loss": 0.25965407, "step": 366 }, { "epoch": 0.734, "grad_norm": 10.274346351623535, "learning_rate": 2e-05, "loss": 0.26627558, "step": 367 }, { "epoch": 0.736, "grad_norm": 7.4600958824157715, "learning_rate": 2e-05, "loss": 0.23696093, "step": 368 }, { "epoch": 0.738, "grad_norm": 10.24486255645752, "learning_rate": 2e-05, "loss": 0.23890413, "step": 369 }, { "epoch": 0.74, "grad_norm": 7.502708911895752, "learning_rate": 2e-05, "loss": 0.28164238, "step": 370 }, { "epoch": 0.742, "grad_norm": 7.764088153839111, "learning_rate": 2e-05, "loss": 0.27442193, "step": 371 }, { "epoch": 0.744, "grad_norm": 11.618070602416992, "learning_rate": 2e-05, "loss": 0.27990225, "step": 372 }, { "epoch": 0.746, "grad_norm": 11.763664245605469, "learning_rate": 2e-05, "loss": 0.25891316, "step": 373 }, { "epoch": 0.748, "grad_norm": 9.323328018188477, "learning_rate": 2e-05, "loss": 0.25076765, "step": 374 }, { "epoch": 0.75, "grad_norm": 9.46410083770752, "learning_rate": 2e-05, "loss": 0.22990048, "step": 375 }, { "epoch": 0.752, "grad_norm": 11.818742752075195, "learning_rate": 2e-05, "loss": 0.29624608, "step": 376 }, { "epoch": 0.754, "grad_norm": 10.320255279541016, "learning_rate": 2e-05, "loss": 0.23145483, "step": 377 }, { "epoch": 0.756, "grad_norm": 7.935068607330322, "learning_rate": 2e-05, "loss": 0.25640503, "step": 378 }, { "epoch": 0.758, "grad_norm": 9.17369556427002, "learning_rate": 2e-05, "loss": 0.26844749, "step": 379 }, { "epoch": 0.76, "grad_norm": 9.25487995147705, "learning_rate": 2e-05, "loss": 0.27233654, "step": 380 }, { "epoch": 0.762, "grad_norm": 9.202385902404785, "learning_rate": 2e-05, "loss": 0.27029282, "step": 381 }, { "epoch": 0.764, "grad_norm": 10.687050819396973, "learning_rate": 2e-05, "loss": 0.22704509, "step": 382 }, { "epoch": 0.766, "grad_norm": 11.430596351623535, "learning_rate": 2e-05, "loss": 0.24819949, "step": 383 }, { "epoch": 0.768, "grad_norm": 9.59858226776123, "learning_rate": 2e-05, "loss": 0.20492283, "step": 384 }, { "epoch": 0.77, "grad_norm": 10.298796653747559, "learning_rate": 2e-05, "loss": 0.23234451, "step": 385 }, { "epoch": 0.772, "grad_norm": 11.105032920837402, "learning_rate": 2e-05, "loss": 0.25131649, "step": 386 }, { "epoch": 0.774, "grad_norm": 7.17149543762207, "learning_rate": 2e-05, "loss": 0.25338146, "step": 387 }, { "epoch": 0.776, "grad_norm": 8.1101655960083, "learning_rate": 2e-05, "loss": 0.2304658, "step": 388 }, { "epoch": 0.778, "grad_norm": 9.913678169250488, "learning_rate": 2e-05, "loss": 0.2471588, "step": 389 }, { "epoch": 0.78, "grad_norm": 7.092897415161133, "learning_rate": 2e-05, "loss": 0.21154702, "step": 390 }, { "epoch": 0.782, "grad_norm": 8.214350700378418, "learning_rate": 2e-05, "loss": 0.23165342, "step": 391 }, { "epoch": 0.784, "grad_norm": 10.43057632446289, "learning_rate": 2e-05, "loss": 0.2593866, "step": 392 }, { "epoch": 0.786, "grad_norm": 8.125869750976562, "learning_rate": 2e-05, "loss": 0.22178486, "step": 393 }, { "epoch": 0.788, "grad_norm": 9.094653129577637, "learning_rate": 2e-05, "loss": 0.2440795, "step": 394 }, { "epoch": 0.79, "grad_norm": 22.677011489868164, "learning_rate": 2e-05, "loss": 0.25330836, "step": 395 }, { "epoch": 0.792, "grad_norm": 8.887162208557129, "learning_rate": 2e-05, "loss": 0.26004073, "step": 396 }, { "epoch": 0.794, "grad_norm": 8.772401809692383, "learning_rate": 2e-05, "loss": 0.24154747, "step": 397 }, { "epoch": 0.796, "grad_norm": 8.075626373291016, "learning_rate": 2e-05, "loss": 0.24130732, "step": 398 }, { "epoch": 0.798, "grad_norm": 9.375115394592285, "learning_rate": 2e-05, "loss": 0.21437789, "step": 399 }, { "epoch": 0.8, "grad_norm": 9.550851821899414, "learning_rate": 2e-05, "loss": 0.22947007, "step": 400 }, { "epoch": 0.802, "grad_norm": 10.5350923538208, "learning_rate": 2e-05, "loss": 0.26220185, "step": 401 }, { "epoch": 0.804, "grad_norm": 10.192209243774414, "learning_rate": 2e-05, "loss": 0.19829628, "step": 402 }, { "epoch": 0.806, "grad_norm": 11.02525520324707, "learning_rate": 2e-05, "loss": 0.24071318, "step": 403 }, { "epoch": 0.808, "grad_norm": 8.990074157714844, "learning_rate": 2e-05, "loss": 0.24007747, "step": 404 }, { "epoch": 0.81, "grad_norm": 8.052278518676758, "learning_rate": 2e-05, "loss": 0.20440187, "step": 405 }, { "epoch": 0.812, "grad_norm": 10.494431495666504, "learning_rate": 2e-05, "loss": 0.29422927, "step": 406 }, { "epoch": 0.814, "grad_norm": 10.418055534362793, "learning_rate": 2e-05, "loss": 0.19100657, "step": 407 }, { "epoch": 0.816, "grad_norm": 7.269287109375, "learning_rate": 2e-05, "loss": 0.2122273, "step": 408 }, { "epoch": 0.818, "grad_norm": 11.642622947692871, "learning_rate": 2e-05, "loss": 0.22119646, "step": 409 }, { "epoch": 0.82, "grad_norm": 9.44233512878418, "learning_rate": 2e-05, "loss": 0.18269446, "step": 410 }, { "epoch": 0.822, "grad_norm": 9.73427677154541, "learning_rate": 2e-05, "loss": 0.24550433, "step": 411 }, { "epoch": 0.824, "grad_norm": 11.774269104003906, "learning_rate": 2e-05, "loss": 0.19573331, "step": 412 }, { "epoch": 0.826, "grad_norm": 12.070868492126465, "learning_rate": 2e-05, "loss": 0.2454766, "step": 413 }, { "epoch": 0.828, "grad_norm": 30.536991119384766, "learning_rate": 2e-05, "loss": 0.28785318, "step": 414 }, { "epoch": 0.83, "grad_norm": 8.952959060668945, "learning_rate": 2e-05, "loss": 0.21242946, "step": 415 }, { "epoch": 0.832, "grad_norm": 12.865764617919922, "learning_rate": 2e-05, "loss": 0.25488707, "step": 416 }, { "epoch": 0.834, "grad_norm": 7.865818977355957, "learning_rate": 2e-05, "loss": 0.22773927, "step": 417 }, { "epoch": 0.836, "grad_norm": 10.616582870483398, "learning_rate": 2e-05, "loss": 0.20832525, "step": 418 }, { "epoch": 0.838, "grad_norm": 13.630742073059082, "learning_rate": 2e-05, "loss": 0.22533262, "step": 419 }, { "epoch": 0.84, "grad_norm": 12.48819637298584, "learning_rate": 2e-05, "loss": 0.2267963, "step": 420 }, { "epoch": 0.842, "grad_norm": 49.00496292114258, "learning_rate": 2e-05, "loss": 0.20933047, "step": 421 }, { "epoch": 0.844, "grad_norm": 10.214642524719238, "learning_rate": 2e-05, "loss": 0.19640952, "step": 422 }, { "epoch": 0.846, "grad_norm": 7.38570499420166, "learning_rate": 2e-05, "loss": 0.208951, "step": 423 }, { "epoch": 0.848, "grad_norm": 7.651857376098633, "learning_rate": 2e-05, "loss": 0.20262444, "step": 424 }, { "epoch": 0.85, "grad_norm": 7.756916046142578, "learning_rate": 2e-05, "loss": 0.19864225, "step": 425 }, { "epoch": 0.852, "grad_norm": 18.758731842041016, "learning_rate": 2e-05, "loss": 0.24699372, "step": 426 }, { "epoch": 0.854, "grad_norm": 15.921015739440918, "learning_rate": 2e-05, "loss": 0.22508204, "step": 427 }, { "epoch": 0.856, "grad_norm": 10.708938598632812, "learning_rate": 2e-05, "loss": 0.19556707, "step": 428 }, { "epoch": 0.858, "grad_norm": 9.651545524597168, "learning_rate": 2e-05, "loss": 0.2463102, "step": 429 }, { "epoch": 0.86, "grad_norm": 11.192805290222168, "learning_rate": 2e-05, "loss": 0.23927864, "step": 430 }, { "epoch": 0.862, "grad_norm": 9.653233528137207, "learning_rate": 2e-05, "loss": 0.18516284, "step": 431 }, { "epoch": 0.864, "grad_norm": 10.25500774383545, "learning_rate": 2e-05, "loss": 0.21994749, "step": 432 }, { "epoch": 0.866, "grad_norm": 9.381180763244629, "learning_rate": 2e-05, "loss": 0.22610214, "step": 433 }, { "epoch": 0.868, "grad_norm": 9.213058471679688, "learning_rate": 2e-05, "loss": 0.22984818, "step": 434 }, { "epoch": 0.87, "grad_norm": 7.3136420249938965, "learning_rate": 2e-05, "loss": 0.20804462, "step": 435 }, { "epoch": 0.872, "grad_norm": 7.3534836769104, "learning_rate": 2e-05, "loss": 0.17623085, "step": 436 }, { "epoch": 0.874, "grad_norm": 7.46639347076416, "learning_rate": 2e-05, "loss": 0.16259909, "step": 437 }, { "epoch": 0.876, "grad_norm": 8.127357482910156, "learning_rate": 2e-05, "loss": 0.19474852, "step": 438 }, { "epoch": 0.878, "grad_norm": 11.9406156539917, "learning_rate": 2e-05, "loss": 0.19439326, "step": 439 }, { "epoch": 0.88, "grad_norm": 11.632986068725586, "learning_rate": 2e-05, "loss": 0.21577129, "step": 440 }, { "epoch": 0.882, "grad_norm": 6.348249435424805, "learning_rate": 2e-05, "loss": 0.19429414, "step": 441 }, { "epoch": 0.884, "grad_norm": 8.807539939880371, "learning_rate": 2e-05, "loss": 0.18690637, "step": 442 }, { "epoch": 0.886, "grad_norm": 7.119425296783447, "learning_rate": 2e-05, "loss": 0.18575813, "step": 443 }, { "epoch": 0.888, "grad_norm": 9.67291259765625, "learning_rate": 2e-05, "loss": 0.1841501, "step": 444 }, { "epoch": 0.89, "grad_norm": 7.581745624542236, "learning_rate": 2e-05, "loss": 0.17849687, "step": 445 }, { "epoch": 0.892, "grad_norm": 8.863199234008789, "learning_rate": 2e-05, "loss": 0.18014331, "step": 446 }, { "epoch": 0.894, "grad_norm": 48.26422882080078, "learning_rate": 2e-05, "loss": 0.19486973, "step": 447 }, { "epoch": 0.896, "grad_norm": 9.63617992401123, "learning_rate": 2e-05, "loss": 0.19428737, "step": 448 }, { "epoch": 0.898, "grad_norm": 225.39251708984375, "learning_rate": 2e-05, "loss": 0.16966523, "step": 449 }, { "epoch": 0.9, "grad_norm": 8.553603172302246, "learning_rate": 2e-05, "loss": 0.20677483, "step": 450 }, { "epoch": 0.902, "grad_norm": 7.960504055023193, "learning_rate": 2e-05, "loss": 0.20762101, "step": 451 }, { "epoch": 0.904, "grad_norm": 7.527916431427002, "learning_rate": 2e-05, "loss": 0.17864805, "step": 452 }, { "epoch": 0.906, "grad_norm": 7.535462379455566, "learning_rate": 2e-05, "loss": 0.17986968, "step": 453 }, { "epoch": 0.908, "grad_norm": 7.1303391456604, "learning_rate": 2e-05, "loss": 0.18420139, "step": 454 }, { "epoch": 0.91, "grad_norm": 9.318174362182617, "learning_rate": 2e-05, "loss": 0.19748238, "step": 455 }, { "epoch": 0.912, "grad_norm": 11.204310417175293, "learning_rate": 2e-05, "loss": 0.18874854, "step": 456 }, { "epoch": 0.914, "grad_norm": 10.74419116973877, "learning_rate": 2e-05, "loss": 0.17254297, "step": 457 }, { "epoch": 0.916, "grad_norm": 7.54042387008667, "learning_rate": 2e-05, "loss": 0.16226724, "step": 458 }, { "epoch": 0.918, "grad_norm": 7.576972484588623, "learning_rate": 2e-05, "loss": 0.16021928, "step": 459 }, { "epoch": 0.92, "grad_norm": 8.07433795928955, "learning_rate": 2e-05, "loss": 0.16289628, "step": 460 }, { "epoch": 0.922, "grad_norm": 8.325987815856934, "learning_rate": 2e-05, "loss": 0.1841329, "step": 461 }, { "epoch": 0.924, "grad_norm": 8.568528175354004, "learning_rate": 2e-05, "loss": 0.17074604, "step": 462 }, { "epoch": 0.926, "grad_norm": 8.838940620422363, "learning_rate": 2e-05, "loss": 0.18597496, "step": 463 }, { "epoch": 0.928, "grad_norm": 17.187196731567383, "learning_rate": 2e-05, "loss": 0.17975765, "step": 464 }, { "epoch": 0.93, "grad_norm": 7.8493475914001465, "learning_rate": 2e-05, "loss": 0.18260917, "step": 465 }, { "epoch": 0.932, "grad_norm": 6.252185344696045, "learning_rate": 2e-05, "loss": 0.16380104, "step": 466 }, { "epoch": 0.934, "grad_norm": 6.650010585784912, "learning_rate": 2e-05, "loss": 0.1917378, "step": 467 }, { "epoch": 0.936, "grad_norm": 7.425052165985107, "learning_rate": 2e-05, "loss": 0.16281684, "step": 468 }, { "epoch": 0.938, "grad_norm": 9.723055839538574, "learning_rate": 2e-05, "loss": 0.19804594, "step": 469 }, { "epoch": 0.94, "grad_norm": 11.09811019897461, "learning_rate": 2e-05, "loss": 0.1871161, "step": 470 }, { "epoch": 0.942, "grad_norm": 6.637765407562256, "learning_rate": 2e-05, "loss": 0.16631864, "step": 471 }, { "epoch": 0.944, "grad_norm": 6.937755584716797, "learning_rate": 2e-05, "loss": 0.14354506, "step": 472 }, { "epoch": 0.946, "grad_norm": 7.605268478393555, "learning_rate": 2e-05, "loss": 0.16136648, "step": 473 }, { "epoch": 0.948, "grad_norm": 8.746881484985352, "learning_rate": 2e-05, "loss": 0.19564337, "step": 474 }, { "epoch": 0.95, "grad_norm": 7.2804388999938965, "learning_rate": 2e-05, "loss": 0.17795783, "step": 475 }, { "epoch": 0.952, "grad_norm": 9.462972640991211, "learning_rate": 2e-05, "loss": 0.15540716, "step": 476 }, { "epoch": 0.954, "grad_norm": 8.656143188476562, "learning_rate": 2e-05, "loss": 0.18736553, "step": 477 }, { "epoch": 0.956, "grad_norm": 7.274444103240967, "learning_rate": 2e-05, "loss": 0.18578169, "step": 478 }, { "epoch": 0.958, "grad_norm": 7.2398152351379395, "learning_rate": 2e-05, "loss": 0.15727285, "step": 479 }, { "epoch": 0.96, "grad_norm": 7.077881336212158, "learning_rate": 2e-05, "loss": 0.16003323, "step": 480 }, { "epoch": 0.962, "grad_norm": 7.389111518859863, "learning_rate": 2e-05, "loss": 0.16985857, "step": 481 }, { "epoch": 0.964, "grad_norm": 7.8312554359436035, "learning_rate": 2e-05, "loss": 0.13501915, "step": 482 }, { "epoch": 0.966, "grad_norm": 6.245406150817871, "learning_rate": 2e-05, "loss": 0.14326826, "step": 483 }, { "epoch": 0.968, "grad_norm": 6.919559001922607, "learning_rate": 2e-05, "loss": 0.1775257, "step": 484 }, { "epoch": 0.97, "grad_norm": 7.477849960327148, "learning_rate": 2e-05, "loss": 0.19931534, "step": 485 }, { "epoch": 0.972, "grad_norm": 6.868001937866211, "learning_rate": 2e-05, "loss": 0.12091704, "step": 486 }, { "epoch": 0.974, "grad_norm": 7.020143985748291, "learning_rate": 2e-05, "loss": 0.18501379, "step": 487 }, { "epoch": 0.976, "grad_norm": 6.665947914123535, "learning_rate": 2e-05, "loss": 0.14134347, "step": 488 }, { "epoch": 0.978, "grad_norm": 10.563578605651855, "learning_rate": 2e-05, "loss": 0.15379462, "step": 489 }, { "epoch": 0.98, "grad_norm": 7.667459964752197, "learning_rate": 2e-05, "loss": 0.19612834, "step": 490 }, { "epoch": 0.982, "grad_norm": 7.303009033203125, "learning_rate": 2e-05, "loss": 0.14922182, "step": 491 }, { "epoch": 0.984, "grad_norm": 6.5201029777526855, "learning_rate": 2e-05, "loss": 0.19149822, "step": 492 }, { "epoch": 0.986, "grad_norm": 8.773056983947754, "learning_rate": 2e-05, "loss": 0.14373818, "step": 493 }, { "epoch": 0.988, "grad_norm": 7.309536457061768, "learning_rate": 2e-05, "loss": 0.13618967, "step": 494 }, { "epoch": 0.99, "grad_norm": 7.026773929595947, "learning_rate": 2e-05, "loss": 0.1740814, "step": 495 }, { "epoch": 0.992, "grad_norm": 7.677907943725586, "learning_rate": 2e-05, "loss": 0.16484037, "step": 496 }, { "epoch": 0.994, "grad_norm": 7.906644821166992, "learning_rate": 2e-05, "loss": 0.18340473, "step": 497 }, { "epoch": 0.996, "grad_norm": 9.86483383178711, "learning_rate": 2e-05, "loss": 0.1523212, "step": 498 }, { "epoch": 0.998, "grad_norm": 7.647320747375488, "learning_rate": 2e-05, "loss": 0.15927653, "step": 499 }, { "epoch": 1.0, "grad_norm": 8.235305786132812, "learning_rate": 2e-05, "loss": 0.18083042, "step": 500 }, { "epoch": 1.0, "eval_performance": { "AngleClassification_1": 0.906, "AngleClassification_2": 0.532, "AngleClassification_3": 0.5109780439121756, "Equal_1": 0.148, "Equal_2": 0.10778443113772455, "Equal_3": 0.12574850299401197, "LineComparison_1": 0.57, "LineComparison_2": 0.6087824351297405, "LineComparison_3": 0.5489021956087824, "Parallel_1": 0.3466933867735471, "Parallel_2": 0.87374749498998, "Parallel_3": 0.236, "Perpendicular_1": 0.434, "Perpendicular_2": 0.164, "Perpendicular_3": 0.1062124248496994, "PointLiesOnCircle_1": 0.8220440881763527, "PointLiesOnCircle_2": 0.4721, "PointLiesOnCircle_3": 0.31226666666666664, "PointLiesOnLine_1": 0.49298597194388777, "PointLiesOnLine_2": 0.3026052104208417, "PointLiesOnLine_3": 0.25948103792415167 }, "eval_runtime": 233.9688, "eval_samples_per_second": 44.878, "eval_steps_per_second": 0.898, "step": 500 }, { "epoch": 1.002, "grad_norm": 7.672991752624512, "learning_rate": 2e-05, "loss": 0.15046903, "step": 501 }, { "epoch": 1.004, "grad_norm": 7.54102087020874, "learning_rate": 2e-05, "loss": 0.14351633, "step": 502 }, { "epoch": 1.006, "grad_norm": 8.034358978271484, "learning_rate": 2e-05, "loss": 0.1461335, "step": 503 }, { "epoch": 1.008, "grad_norm": 6.78055477142334, "learning_rate": 2e-05, "loss": 0.13234013, "step": 504 }, { "epoch": 1.01, "grad_norm": 8.263693809509277, "learning_rate": 2e-05, "loss": 0.16652954, "step": 505 }, { "epoch": 1.012, "grad_norm": 9.360066413879395, "learning_rate": 2e-05, "loss": 0.20651054, "step": 506 }, { "epoch": 1.014, "grad_norm": 7.902182579040527, "learning_rate": 2e-05, "loss": 0.1746522, "step": 507 }, { "epoch": 1.016, "grad_norm": 8.103141784667969, "learning_rate": 2e-05, "loss": 0.18815628, "step": 508 }, { "epoch": 1.018, "grad_norm": 8.566903114318848, "learning_rate": 2e-05, "loss": 0.16128799, "step": 509 }, { "epoch": 1.02, "grad_norm": 9.628020286560059, "learning_rate": 2e-05, "loss": 0.17618746, "step": 510 }, { "epoch": 1.022, "grad_norm": 8.664970397949219, "learning_rate": 2e-05, "loss": 0.17353214, "step": 511 }, { "epoch": 1.024, "grad_norm": 9.319419860839844, "learning_rate": 2e-05, "loss": 0.15416327, "step": 512 }, { "epoch": 1.026, "grad_norm": 8.675823211669922, "learning_rate": 2e-05, "loss": 0.14480302, "step": 513 }, { "epoch": 1.028, "grad_norm": 7.280163288116455, "learning_rate": 2e-05, "loss": 0.14420159, "step": 514 }, { "epoch": 1.03, "grad_norm": 7.530873775482178, "learning_rate": 2e-05, "loss": 0.14790046, "step": 515 }, { "epoch": 1.032, "grad_norm": 9.945538520812988, "learning_rate": 2e-05, "loss": 0.14324212, "step": 516 }, { "epoch": 1.034, "grad_norm": 6.742035865783691, "learning_rate": 2e-05, "loss": 0.19113801, "step": 517 }, { "epoch": 1.036, "grad_norm": 9.154641151428223, "learning_rate": 2e-05, "loss": 0.18051702, "step": 518 }, { "epoch": 1.038, "grad_norm": 7.945432662963867, "learning_rate": 2e-05, "loss": 0.16185431, "step": 519 }, { "epoch": 1.04, "grad_norm": 6.631221294403076, "learning_rate": 2e-05, "loss": 0.17696021, "step": 520 }, { "epoch": 1.042, "grad_norm": 10.772687911987305, "learning_rate": 2e-05, "loss": 0.16086422, "step": 521 }, { "epoch": 1.044, "grad_norm": 9.90352725982666, "learning_rate": 2e-05, "loss": 0.18745291, "step": 522 }, { "epoch": 1.046, "grad_norm": 6.925354480743408, "learning_rate": 2e-05, "loss": 0.16798532, "step": 523 }, { "epoch": 1.048, "grad_norm": 6.8902716636657715, "learning_rate": 2e-05, "loss": 0.14201325, "step": 524 }, { "epoch": 1.05, "grad_norm": 6.715397834777832, "learning_rate": 2e-05, "loss": 0.18864059, "step": 525 }, { "epoch": 1.052, "grad_norm": 6.597648620605469, "learning_rate": 2e-05, "loss": 0.15214109, "step": 526 }, { "epoch": 1.054, "grad_norm": 8.459199905395508, "learning_rate": 2e-05, "loss": 0.10485458, "step": 527 }, { "epoch": 1.056, "grad_norm": 8.003029823303223, "learning_rate": 2e-05, "loss": 0.18169786, "step": 528 }, { "epoch": 1.058, "grad_norm": 6.603515148162842, "learning_rate": 2e-05, "loss": 0.12764937, "step": 529 }, { "epoch": 1.06, "grad_norm": 6.966209411621094, "learning_rate": 2e-05, "loss": 0.16956715, "step": 530 }, { "epoch": 1.062, "grad_norm": 7.1723833084106445, "learning_rate": 2e-05, "loss": 0.14768022, "step": 531 }, { "epoch": 1.064, "grad_norm": 7.107058048248291, "learning_rate": 2e-05, "loss": 0.13689019, "step": 532 }, { "epoch": 1.066, "grad_norm": 6.907801151275635, "learning_rate": 2e-05, "loss": 0.15448931, "step": 533 }, { "epoch": 1.068, "grad_norm": 7.344670295715332, "learning_rate": 2e-05, "loss": 0.11896788, "step": 534 }, { "epoch": 1.07, "grad_norm": 6.394691467285156, "learning_rate": 2e-05, "loss": 0.15560499, "step": 535 }, { "epoch": 1.072, "grad_norm": 7.375394344329834, "learning_rate": 2e-05, "loss": 0.15558317, "step": 536 }, { "epoch": 1.074, "grad_norm": 15.8858003616333, "learning_rate": 2e-05, "loss": 0.17090333, "step": 537 }, { "epoch": 1.076, "grad_norm": 9.197127342224121, "learning_rate": 2e-05, "loss": 0.15567461, "step": 538 }, { "epoch": 1.078, "grad_norm": 9.22225284576416, "learning_rate": 2e-05, "loss": 0.20081103, "step": 539 }, { "epoch": 1.08, "grad_norm": 7.7414960861206055, "learning_rate": 2e-05, "loss": 0.12876059, "step": 540 }, { "epoch": 1.082, "grad_norm": 8.419815063476562, "learning_rate": 2e-05, "loss": 0.16605327, "step": 541 }, { "epoch": 1.084, "grad_norm": 6.084380149841309, "learning_rate": 2e-05, "loss": 0.10173029, "step": 542 }, { "epoch": 1.086, "grad_norm": 7.99392032623291, "learning_rate": 2e-05, "loss": 0.1431815, "step": 543 }, { "epoch": 1.088, "grad_norm": 7.9788498878479, "learning_rate": 2e-05, "loss": 0.14620928, "step": 544 }, { "epoch": 1.09, "grad_norm": 6.837804794311523, "learning_rate": 2e-05, "loss": 0.12597725, "step": 545 }, { "epoch": 1.092, "grad_norm": 7.278705596923828, "learning_rate": 2e-05, "loss": 0.13989165, "step": 546 }, { "epoch": 1.094, "grad_norm": 8.524149894714355, "learning_rate": 2e-05, "loss": 0.15147434, "step": 547 }, { "epoch": 1.096, "grad_norm": 8.31219482421875, "learning_rate": 2e-05, "loss": 0.1382006, "step": 548 }, { "epoch": 1.098, "grad_norm": 7.475712299346924, "learning_rate": 2e-05, "loss": 0.11729804, "step": 549 }, { "epoch": 1.1, "grad_norm": 8.244794845581055, "learning_rate": 2e-05, "loss": 0.19327024, "step": 550 }, { "epoch": 1.102, "grad_norm": 8.934778213500977, "learning_rate": 2e-05, "loss": 0.15421249, "step": 551 }, { "epoch": 1.104, "grad_norm": 7.182073593139648, "learning_rate": 2e-05, "loss": 0.12203865, "step": 552 }, { "epoch": 1.106, "grad_norm": 7.048975944519043, "learning_rate": 2e-05, "loss": 0.13822083, "step": 553 }, { "epoch": 1.108, "grad_norm": 7.831173419952393, "learning_rate": 2e-05, "loss": 0.11979244, "step": 554 }, { "epoch": 1.11, "grad_norm": 10.501673698425293, "learning_rate": 2e-05, "loss": 0.140343, "step": 555 }, { "epoch": 1.112, "grad_norm": 6.838818073272705, "learning_rate": 2e-05, "loss": 0.11415543, "step": 556 }, { "epoch": 1.114, "grad_norm": 7.030392646789551, "learning_rate": 2e-05, "loss": 0.16885363, "step": 557 }, { "epoch": 1.116, "grad_norm": 6.777606010437012, "learning_rate": 2e-05, "loss": 0.15308785, "step": 558 }, { "epoch": 1.1179999999999999, "grad_norm": 365.9435729980469, "learning_rate": 2e-05, "loss": 0.18541735, "step": 559 }, { "epoch": 1.12, "grad_norm": 6.016968727111816, "learning_rate": 2e-05, "loss": 0.14968255, "step": 560 }, { "epoch": 1.1219999999999999, "grad_norm": 8.162703514099121, "learning_rate": 2e-05, "loss": 0.1607656, "step": 561 }, { "epoch": 1.124, "grad_norm": 6.629195213317871, "learning_rate": 2e-05, "loss": 0.13448995, "step": 562 }, { "epoch": 1.126, "grad_norm": 5.913632869720459, "learning_rate": 2e-05, "loss": 0.14810136, "step": 563 }, { "epoch": 1.1280000000000001, "grad_norm": 6.120894908905029, "learning_rate": 2e-05, "loss": 0.13957745, "step": 564 }, { "epoch": 1.13, "grad_norm": 9.728507041931152, "learning_rate": 2e-05, "loss": 0.12335063, "step": 565 }, { "epoch": 1.1320000000000001, "grad_norm": 11.407793998718262, "learning_rate": 2e-05, "loss": 0.13287321, "step": 566 }, { "epoch": 1.134, "grad_norm": 11.956124305725098, "learning_rate": 2e-05, "loss": 0.17672065, "step": 567 }, { "epoch": 1.1360000000000001, "grad_norm": 5.662302494049072, "learning_rate": 2e-05, "loss": 0.12920174, "step": 568 }, { "epoch": 1.138, "grad_norm": 8.951680183410645, "learning_rate": 2e-05, "loss": 0.15979359, "step": 569 }, { "epoch": 1.1400000000000001, "grad_norm": 7.5991926193237305, "learning_rate": 2e-05, "loss": 0.16690001, "step": 570 }, { "epoch": 1.142, "grad_norm": 6.6517767906188965, "learning_rate": 2e-05, "loss": 0.12853871, "step": 571 }, { "epoch": 1.144, "grad_norm": 7.1599578857421875, "learning_rate": 2e-05, "loss": 0.11263332, "step": 572 }, { "epoch": 1.146, "grad_norm": 7.765303134918213, "learning_rate": 2e-05, "loss": 0.1502133, "step": 573 }, { "epoch": 1.148, "grad_norm": 5.828958988189697, "learning_rate": 2e-05, "loss": 0.1053561, "step": 574 }, { "epoch": 1.15, "grad_norm": 7.953757286071777, "learning_rate": 2e-05, "loss": 0.15777536, "step": 575 }, { "epoch": 1.152, "grad_norm": 7.724732875823975, "learning_rate": 2e-05, "loss": 0.13268322, "step": 576 }, { "epoch": 1.154, "grad_norm": 6.852049350738525, "learning_rate": 2e-05, "loss": 0.12776849, "step": 577 }, { "epoch": 1.156, "grad_norm": 6.566437721252441, "learning_rate": 2e-05, "loss": 0.16585195, "step": 578 }, { "epoch": 1.158, "grad_norm": 7.749902248382568, "learning_rate": 2e-05, "loss": 0.14742595, "step": 579 }, { "epoch": 1.16, "grad_norm": 7.920928955078125, "learning_rate": 2e-05, "loss": 0.1637167, "step": 580 }, { "epoch": 1.162, "grad_norm": 6.1189188957214355, "learning_rate": 2e-05, "loss": 0.11474222, "step": 581 }, { "epoch": 1.164, "grad_norm": 7.329008102416992, "learning_rate": 2e-05, "loss": 0.1180867, "step": 582 }, { "epoch": 1.166, "grad_norm": 7.136618614196777, "learning_rate": 2e-05, "loss": 0.11984835, "step": 583 }, { "epoch": 1.168, "grad_norm": 7.411508560180664, "learning_rate": 2e-05, "loss": 0.17667937, "step": 584 }, { "epoch": 1.17, "grad_norm": 7.703707695007324, "learning_rate": 2e-05, "loss": 0.13826272, "step": 585 }, { "epoch": 1.172, "grad_norm": 7.02106237411499, "learning_rate": 2e-05, "loss": 0.13469142, "step": 586 }, { "epoch": 1.174, "grad_norm": 6.505448818206787, "learning_rate": 2e-05, "loss": 0.13529301, "step": 587 }, { "epoch": 1.176, "grad_norm": 6.88942289352417, "learning_rate": 2e-05, "loss": 0.10342747, "step": 588 }, { "epoch": 1.178, "grad_norm": 5.812454700469971, "learning_rate": 2e-05, "loss": 0.11642061, "step": 589 }, { "epoch": 1.18, "grad_norm": 6.910039901733398, "learning_rate": 2e-05, "loss": 0.11613733, "step": 590 }, { "epoch": 1.182, "grad_norm": 6.791270732879639, "learning_rate": 2e-05, "loss": 0.12102244, "step": 591 }, { "epoch": 1.184, "grad_norm": 6.810131549835205, "learning_rate": 2e-05, "loss": 0.11492567, "step": 592 }, { "epoch": 1.186, "grad_norm": 6.020359039306641, "learning_rate": 2e-05, "loss": 0.11758475, "step": 593 }, { "epoch": 1.188, "grad_norm": 7.514242649078369, "learning_rate": 2e-05, "loss": 0.12776956, "step": 594 }, { "epoch": 1.19, "grad_norm": 5.959623336791992, "learning_rate": 2e-05, "loss": 0.12490214, "step": 595 }, { "epoch": 1.192, "grad_norm": 8.227696418762207, "learning_rate": 2e-05, "loss": 0.15466234, "step": 596 }, { "epoch": 1.194, "grad_norm": 6.582882404327393, "learning_rate": 2e-05, "loss": 0.16650522, "step": 597 }, { "epoch": 1.196, "grad_norm": 8.464044570922852, "learning_rate": 2e-05, "loss": 0.13877106, "step": 598 }, { "epoch": 1.198, "grad_norm": 6.40317440032959, "learning_rate": 2e-05, "loss": 0.12645724, "step": 599 }, { "epoch": 1.2, "grad_norm": 7.446037769317627, "learning_rate": 2e-05, "loss": 0.14600489, "step": 600 }, { "epoch": 1.202, "grad_norm": 7.27105188369751, "learning_rate": 2e-05, "loss": 0.14670137, "step": 601 }, { "epoch": 1.204, "grad_norm": 7.501110076904297, "learning_rate": 2e-05, "loss": 0.12703331, "step": 602 }, { "epoch": 1.206, "grad_norm": 7.583506107330322, "learning_rate": 2e-05, "loss": 0.16439816, "step": 603 }, { "epoch": 1.208, "grad_norm": 8.459193229675293, "learning_rate": 2e-05, "loss": 0.13327542, "step": 604 }, { "epoch": 1.21, "grad_norm": 6.829992294311523, "learning_rate": 2e-05, "loss": 0.12586203, "step": 605 }, { "epoch": 1.212, "grad_norm": 9.046102523803711, "learning_rate": 2e-05, "loss": 0.14401428, "step": 606 }, { "epoch": 1.214, "grad_norm": 8.604940414428711, "learning_rate": 2e-05, "loss": 0.15417491, "step": 607 }, { "epoch": 1.216, "grad_norm": 11.232022285461426, "learning_rate": 2e-05, "loss": 0.1403241, "step": 608 }, { "epoch": 1.218, "grad_norm": 7.7439680099487305, "learning_rate": 2e-05, "loss": 0.14281566, "step": 609 }, { "epoch": 1.22, "grad_norm": 5.812641620635986, "learning_rate": 2e-05, "loss": 0.13614374, "step": 610 }, { "epoch": 1.222, "grad_norm": 9.642447471618652, "learning_rate": 2e-05, "loss": 0.11232011, "step": 611 }, { "epoch": 1.224, "grad_norm": 10.144608497619629, "learning_rate": 2e-05, "loss": 0.14832418, "step": 612 }, { "epoch": 1.226, "grad_norm": 8.029473304748535, "learning_rate": 2e-05, "loss": 0.10801431, "step": 613 }, { "epoch": 1.228, "grad_norm": 6.919248580932617, "learning_rate": 2e-05, "loss": 0.10865074, "step": 614 }, { "epoch": 1.23, "grad_norm": 9.7305908203125, "learning_rate": 2e-05, "loss": 0.12264413, "step": 615 }, { "epoch": 1.232, "grad_norm": 6.256381511688232, "learning_rate": 2e-05, "loss": 0.12773332, "step": 616 }, { "epoch": 1.234, "grad_norm": 361.88519287109375, "learning_rate": 2e-05, "loss": 0.14587097, "step": 617 }, { "epoch": 1.236, "grad_norm": 7.750764846801758, "learning_rate": 2e-05, "loss": 0.13632666, "step": 618 }, { "epoch": 1.238, "grad_norm": 8.356324195861816, "learning_rate": 2e-05, "loss": 0.1411123, "step": 619 }, { "epoch": 1.24, "grad_norm": 7.020558834075928, "learning_rate": 2e-05, "loss": 0.14831221, "step": 620 }, { "epoch": 1.242, "grad_norm": 7.680114269256592, "learning_rate": 2e-05, "loss": 0.14265683, "step": 621 }, { "epoch": 1.244, "grad_norm": 6.049409866333008, "learning_rate": 2e-05, "loss": 0.10246728, "step": 622 }, { "epoch": 1.246, "grad_norm": 7.97797155380249, "learning_rate": 2e-05, "loss": 0.12498067, "step": 623 }, { "epoch": 1.248, "grad_norm": 8.866729736328125, "learning_rate": 2e-05, "loss": 0.14478242, "step": 624 }, { "epoch": 1.25, "grad_norm": 7.1009416580200195, "learning_rate": 2e-05, "loss": 0.10981826, "step": 625 }, { "epoch": 1.252, "grad_norm": 11.852424621582031, "learning_rate": 2e-05, "loss": 0.14420906, "step": 626 }, { "epoch": 1.254, "grad_norm": 7.123490810394287, "learning_rate": 2e-05, "loss": 0.12589622, "step": 627 }, { "epoch": 1.256, "grad_norm": 7.414560317993164, "learning_rate": 2e-05, "loss": 0.13033554, "step": 628 }, { "epoch": 1.258, "grad_norm": 6.605888366699219, "learning_rate": 2e-05, "loss": 0.10761927, "step": 629 }, { "epoch": 1.26, "grad_norm": 7.649274826049805, "learning_rate": 2e-05, "loss": 0.14205627, "step": 630 }, { "epoch": 1.262, "grad_norm": 6.519278526306152, "learning_rate": 2e-05, "loss": 0.09576035, "step": 631 }, { "epoch": 1.264, "grad_norm": 12.06980037689209, "learning_rate": 2e-05, "loss": 0.11028983, "step": 632 }, { "epoch": 1.266, "grad_norm": 7.423180103302002, "learning_rate": 2e-05, "loss": 0.14019629, "step": 633 }, { "epoch": 1.268, "grad_norm": 11.850340843200684, "learning_rate": 2e-05, "loss": 0.16270405, "step": 634 }, { "epoch": 1.27, "grad_norm": 8.345808982849121, "learning_rate": 2e-05, "loss": 0.12013553, "step": 635 }, { "epoch": 1.272, "grad_norm": 6.913541316986084, "learning_rate": 2e-05, "loss": 0.12492045, "step": 636 }, { "epoch": 1.274, "grad_norm": 8.417133331298828, "learning_rate": 2e-05, "loss": 0.14936861, "step": 637 }, { "epoch": 1.276, "grad_norm": 6.285032749176025, "learning_rate": 2e-05, "loss": 0.14542431, "step": 638 }, { "epoch": 1.278, "grad_norm": 7.140380859375, "learning_rate": 2e-05, "loss": 0.12978765, "step": 639 }, { "epoch": 1.28, "grad_norm": 9.204744338989258, "learning_rate": 2e-05, "loss": 0.11895553, "step": 640 }, { "epoch": 1.282, "grad_norm": 6.695951461791992, "learning_rate": 2e-05, "loss": 0.12877251, "step": 641 }, { "epoch": 1.284, "grad_norm": 8.183476448059082, "learning_rate": 2e-05, "loss": 0.17350066, "step": 642 }, { "epoch": 1.286, "grad_norm": 6.097690582275391, "learning_rate": 2e-05, "loss": 0.11399058, "step": 643 }, { "epoch": 1.288, "grad_norm": 9.30003833770752, "learning_rate": 2e-05, "loss": 0.17027095, "step": 644 }, { "epoch": 1.29, "grad_norm": 10.765653610229492, "learning_rate": 2e-05, "loss": 0.17434001, "step": 645 }, { "epoch": 1.292, "grad_norm": 5.88740348815918, "learning_rate": 2e-05, "loss": 0.15665102, "step": 646 }, { "epoch": 1.294, "grad_norm": 8.2492036819458, "learning_rate": 2e-05, "loss": 0.12385593, "step": 647 }, { "epoch": 1.296, "grad_norm": 5.881067276000977, "learning_rate": 2e-05, "loss": 0.12011569, "step": 648 }, { "epoch": 1.298, "grad_norm": 8.322135925292969, "learning_rate": 2e-05, "loss": 0.1337342, "step": 649 }, { "epoch": 1.3, "grad_norm": 6.9860124588012695, "learning_rate": 2e-05, "loss": 0.13800102, "step": 650 }, { "epoch": 1.302, "grad_norm": 12.352527618408203, "learning_rate": 2e-05, "loss": 0.15338378, "step": 651 }, { "epoch": 1.304, "grad_norm": 8.948180198669434, "learning_rate": 2e-05, "loss": 0.15160236, "step": 652 }, { "epoch": 1.306, "grad_norm": 7.948204040527344, "learning_rate": 2e-05, "loss": 0.09736265, "step": 653 }, { "epoch": 1.308, "grad_norm": 7.0965471267700195, "learning_rate": 2e-05, "loss": 0.12006413, "step": 654 }, { "epoch": 1.31, "grad_norm": 7.391663074493408, "learning_rate": 2e-05, "loss": 0.13464636, "step": 655 }, { "epoch": 1.312, "grad_norm": 7.110716342926025, "learning_rate": 2e-05, "loss": 0.14254007, "step": 656 }, { "epoch": 1.314, "grad_norm": 8.599250793457031, "learning_rate": 2e-05, "loss": 0.13639411, "step": 657 }, { "epoch": 1.316, "grad_norm": 7.659989833831787, "learning_rate": 2e-05, "loss": 0.12393644, "step": 658 }, { "epoch": 1.318, "grad_norm": 9.000189781188965, "learning_rate": 2e-05, "loss": 0.12246567, "step": 659 }, { "epoch": 1.32, "grad_norm": 7.529514789581299, "learning_rate": 2e-05, "loss": 0.10893363, "step": 660 }, { "epoch": 1.322, "grad_norm": 7.818619251251221, "learning_rate": 2e-05, "loss": 0.11932263, "step": 661 }, { "epoch": 1.324, "grad_norm": 8.059538841247559, "learning_rate": 2e-05, "loss": 0.11955521, "step": 662 }, { "epoch": 1.326, "grad_norm": 10.673921585083008, "learning_rate": 2e-05, "loss": 0.16968741, "step": 663 }, { "epoch": 1.328, "grad_norm": 6.559533596038818, "learning_rate": 2e-05, "loss": 0.12729934, "step": 664 }, { "epoch": 1.33, "grad_norm": 7.830654144287109, "learning_rate": 2e-05, "loss": 0.12289311, "step": 665 }, { "epoch": 1.332, "grad_norm": 7.075840473175049, "learning_rate": 2e-05, "loss": 0.09802037, "step": 666 }, { "epoch": 1.334, "grad_norm": 6.611058235168457, "learning_rate": 2e-05, "loss": 0.12370251, "step": 667 }, { "epoch": 1.336, "grad_norm": 7.394319534301758, "learning_rate": 2e-05, "loss": 0.14683181, "step": 668 }, { "epoch": 1.338, "grad_norm": 7.754279136657715, "learning_rate": 2e-05, "loss": 0.1299786, "step": 669 }, { "epoch": 1.34, "grad_norm": 8.085694313049316, "learning_rate": 2e-05, "loss": 0.11091569, "step": 670 }, { "epoch": 1.342, "grad_norm": 7.589231014251709, "learning_rate": 2e-05, "loss": 0.11727732, "step": 671 }, { "epoch": 1.3439999999999999, "grad_norm": 6.278670787811279, "learning_rate": 2e-05, "loss": 0.14133112, "step": 672 }, { "epoch": 1.346, "grad_norm": 5.896803855895996, "learning_rate": 2e-05, "loss": 0.11285257, "step": 673 }, { "epoch": 1.3479999999999999, "grad_norm": 6.169055461883545, "learning_rate": 2e-05, "loss": 0.10887566, "step": 674 }, { "epoch": 1.35, "grad_norm": 7.765378475189209, "learning_rate": 2e-05, "loss": 0.16116959, "step": 675 }, { "epoch": 1.3519999999999999, "grad_norm": 6.4480438232421875, "learning_rate": 2e-05, "loss": 0.10548604, "step": 676 }, { "epoch": 1.354, "grad_norm": 6.0134782791137695, "learning_rate": 2e-05, "loss": 0.11822873, "step": 677 }, { "epoch": 1.3559999999999999, "grad_norm": 8.026538848876953, "learning_rate": 2e-05, "loss": 0.13639766, "step": 678 }, { "epoch": 1.358, "grad_norm": 6.741827964782715, "learning_rate": 2e-05, "loss": 0.13743089, "step": 679 }, { "epoch": 1.3599999999999999, "grad_norm": 7.600939750671387, "learning_rate": 2e-05, "loss": 0.11072077, "step": 680 }, { "epoch": 1.362, "grad_norm": 6.397022724151611, "learning_rate": 2e-05, "loss": 0.10721733, "step": 681 }, { "epoch": 1.3639999999999999, "grad_norm": 6.8020172119140625, "learning_rate": 2e-05, "loss": 0.13457298, "step": 682 }, { "epoch": 1.366, "grad_norm": 5.296658515930176, "learning_rate": 2e-05, "loss": 0.09861284, "step": 683 }, { "epoch": 1.3679999999999999, "grad_norm": 6.905277729034424, "learning_rate": 2e-05, "loss": 0.14065263, "step": 684 }, { "epoch": 1.37, "grad_norm": 7.811028957366943, "learning_rate": 2e-05, "loss": 0.14804435, "step": 685 }, { "epoch": 1.3719999999999999, "grad_norm": 7.36633825302124, "learning_rate": 2e-05, "loss": 0.13097915, "step": 686 }, { "epoch": 1.374, "grad_norm": 7.699337005615234, "learning_rate": 2e-05, "loss": 0.12762341, "step": 687 }, { "epoch": 1.376, "grad_norm": 6.240560531616211, "learning_rate": 2e-05, "loss": 0.12633774, "step": 688 }, { "epoch": 1.3780000000000001, "grad_norm": 6.404963970184326, "learning_rate": 2e-05, "loss": 0.12723273, "step": 689 }, { "epoch": 1.38, "grad_norm": 7.39198112487793, "learning_rate": 2e-05, "loss": 0.11706609, "step": 690 }, { "epoch": 1.3820000000000001, "grad_norm": 6.012218952178955, "learning_rate": 2e-05, "loss": 0.1400499, "step": 691 }, { "epoch": 1.384, "grad_norm": 6.968564033508301, "learning_rate": 2e-05, "loss": 0.09385957, "step": 692 }, { "epoch": 1.3860000000000001, "grad_norm": 6.732478141784668, "learning_rate": 2e-05, "loss": 0.12916751, "step": 693 }, { "epoch": 1.388, "grad_norm": 7.560279369354248, "learning_rate": 2e-05, "loss": 0.11552735, "step": 694 }, { "epoch": 1.3900000000000001, "grad_norm": 6.573136329650879, "learning_rate": 2e-05, "loss": 0.10633837, "step": 695 }, { "epoch": 1.392, "grad_norm": 6.231831073760986, "learning_rate": 2e-05, "loss": 0.08779217, "step": 696 }, { "epoch": 1.3940000000000001, "grad_norm": 6.229379653930664, "learning_rate": 2e-05, "loss": 0.1283851, "step": 697 }, { "epoch": 1.396, "grad_norm": 6.671759128570557, "learning_rate": 2e-05, "loss": 0.10272512, "step": 698 }, { "epoch": 1.3980000000000001, "grad_norm": 7.803220748901367, "learning_rate": 2e-05, "loss": 0.12400424, "step": 699 }, { "epoch": 1.4, "grad_norm": 7.6913981437683105, "learning_rate": 2e-05, "loss": 0.15030053, "step": 700 }, { "epoch": 1.4020000000000001, "grad_norm": 6.135531425476074, "learning_rate": 2e-05, "loss": 0.09517136, "step": 701 }, { "epoch": 1.404, "grad_norm": 7.549393653869629, "learning_rate": 2e-05, "loss": 0.1235142, "step": 702 }, { "epoch": 1.4060000000000001, "grad_norm": 7.8764729499816895, "learning_rate": 2e-05, "loss": 0.12000819, "step": 703 }, { "epoch": 1.408, "grad_norm": 6.700076580047607, "learning_rate": 2e-05, "loss": 0.10073349, "step": 704 }, { "epoch": 1.41, "grad_norm": 7.190609931945801, "learning_rate": 2e-05, "loss": 0.10502741, "step": 705 }, { "epoch": 1.412, "grad_norm": 5.181861400604248, "learning_rate": 2e-05, "loss": 0.09669125, "step": 706 }, { "epoch": 1.414, "grad_norm": 6.7262983322143555, "learning_rate": 2e-05, "loss": 0.13815945, "step": 707 }, { "epoch": 1.416, "grad_norm": 5.903952121734619, "learning_rate": 2e-05, "loss": 0.11092122, "step": 708 }, { "epoch": 1.418, "grad_norm": 7.125625133514404, "learning_rate": 2e-05, "loss": 0.13397256, "step": 709 }, { "epoch": 1.42, "grad_norm": 5.561816692352295, "learning_rate": 2e-05, "loss": 0.1156281, "step": 710 }, { "epoch": 1.422, "grad_norm": 6.877422332763672, "learning_rate": 2e-05, "loss": 0.10380179, "step": 711 }, { "epoch": 1.424, "grad_norm": 7.971022129058838, "learning_rate": 2e-05, "loss": 0.13770789, "step": 712 }, { "epoch": 1.426, "grad_norm": 6.559322357177734, "learning_rate": 2e-05, "loss": 0.08833808, "step": 713 }, { "epoch": 1.428, "grad_norm": 7.767148017883301, "learning_rate": 2e-05, "loss": 0.08398707, "step": 714 }, { "epoch": 1.43, "grad_norm": 8.450983047485352, "learning_rate": 2e-05, "loss": 0.14797822, "step": 715 }, { "epoch": 1.432, "grad_norm": 6.695959091186523, "learning_rate": 2e-05, "loss": 0.11736782, "step": 716 }, { "epoch": 1.434, "grad_norm": 8.09652042388916, "learning_rate": 2e-05, "loss": 0.13674074, "step": 717 }, { "epoch": 1.436, "grad_norm": 7.060610771179199, "learning_rate": 2e-05, "loss": 0.11990857, "step": 718 }, { "epoch": 1.438, "grad_norm": 7.436458110809326, "learning_rate": 2e-05, "loss": 0.1123884, "step": 719 }, { "epoch": 1.44, "grad_norm": 7.211880683898926, "learning_rate": 2e-05, "loss": 0.11206084, "step": 720 }, { "epoch": 1.442, "grad_norm": 5.833174705505371, "learning_rate": 2e-05, "loss": 0.10575607, "step": 721 }, { "epoch": 1.444, "grad_norm": 6.252803802490234, "learning_rate": 2e-05, "loss": 0.09617543, "step": 722 }, { "epoch": 1.446, "grad_norm": 7.151079177856445, "learning_rate": 2e-05, "loss": 0.11806791, "step": 723 }, { "epoch": 1.448, "grad_norm": 6.42376184463501, "learning_rate": 2e-05, "loss": 0.12720951, "step": 724 }, { "epoch": 1.45, "grad_norm": 5.261049270629883, "learning_rate": 2e-05, "loss": 0.07983638, "step": 725 }, { "epoch": 1.452, "grad_norm": 6.42758846282959, "learning_rate": 2e-05, "loss": 0.09344797, "step": 726 }, { "epoch": 1.454, "grad_norm": 5.817408084869385, "learning_rate": 2e-05, "loss": 0.12633443, "step": 727 }, { "epoch": 1.456, "grad_norm": 6.253754138946533, "learning_rate": 2e-05, "loss": 0.11862963, "step": 728 }, { "epoch": 1.458, "grad_norm": 7.424831390380859, "learning_rate": 2e-05, "loss": 0.13585119, "step": 729 }, { "epoch": 1.46, "grad_norm": 6.451118469238281, "learning_rate": 2e-05, "loss": 0.09739763, "step": 730 }, { "epoch": 1.462, "grad_norm": 7.451110363006592, "learning_rate": 2e-05, "loss": 0.09459317, "step": 731 }, { "epoch": 1.464, "grad_norm": 6.2205095291137695, "learning_rate": 2e-05, "loss": 0.09902443, "step": 732 }, { "epoch": 1.466, "grad_norm": 6.951596260070801, "learning_rate": 2e-05, "loss": 0.10905683, "step": 733 }, { "epoch": 1.468, "grad_norm": 6.248591423034668, "learning_rate": 2e-05, "loss": 0.11875764, "step": 734 }, { "epoch": 1.47, "grad_norm": 6.559650421142578, "learning_rate": 2e-05, "loss": 0.11254105, "step": 735 }, { "epoch": 1.472, "grad_norm": 6.9839630126953125, "learning_rate": 2e-05, "loss": 0.10611336, "step": 736 }, { "epoch": 1.474, "grad_norm": 6.550286293029785, "learning_rate": 2e-05, "loss": 0.11927168, "step": 737 }, { "epoch": 1.476, "grad_norm": 6.05648136138916, "learning_rate": 2e-05, "loss": 0.09478179, "step": 738 }, { "epoch": 1.478, "grad_norm": 6.733935356140137, "learning_rate": 2e-05, "loss": 0.12366334, "step": 739 }, { "epoch": 1.48, "grad_norm": 6.260366439819336, "learning_rate": 2e-05, "loss": 0.09063195, "step": 740 }, { "epoch": 1.482, "grad_norm": 7.614420413970947, "learning_rate": 2e-05, "loss": 0.10779889, "step": 741 }, { "epoch": 1.484, "grad_norm": 6.70510721206665, "learning_rate": 2e-05, "loss": 0.10829385, "step": 742 }, { "epoch": 1.486, "grad_norm": 5.922757625579834, "learning_rate": 2e-05, "loss": 0.10680553, "step": 743 }, { "epoch": 1.488, "grad_norm": 7.903866767883301, "learning_rate": 2e-05, "loss": 0.12156613, "step": 744 }, { "epoch": 1.49, "grad_norm": 8.794750213623047, "learning_rate": 2e-05, "loss": 0.12958995, "step": 745 }, { "epoch": 1.492, "grad_norm": 5.361749172210693, "learning_rate": 2e-05, "loss": 0.09895682, "step": 746 }, { "epoch": 1.494, "grad_norm": 6.881910800933838, "learning_rate": 2e-05, "loss": 0.09354668, "step": 747 }, { "epoch": 1.496, "grad_norm": 7.749849319458008, "learning_rate": 2e-05, "loss": 0.12458353, "step": 748 }, { "epoch": 1.498, "grad_norm": 7.6609368324279785, "learning_rate": 2e-05, "loss": 0.10578149, "step": 749 }, { "epoch": 1.5, "grad_norm": 7.065557479858398, "learning_rate": 2e-05, "loss": 0.09657469, "step": 750 }, { "epoch": 1.502, "grad_norm": 7.005954265594482, "learning_rate": 2e-05, "loss": 0.11572766, "step": 751 }, { "epoch": 1.504, "grad_norm": 8.220402717590332, "learning_rate": 2e-05, "loss": 0.12430849, "step": 752 }, { "epoch": 1.506, "grad_norm": 7.7641921043396, "learning_rate": 2e-05, "loss": 0.13409793, "step": 753 }, { "epoch": 1.508, "grad_norm": 5.373050212860107, "learning_rate": 2e-05, "loss": 0.09191975, "step": 754 }, { "epoch": 1.51, "grad_norm": 7.822051525115967, "learning_rate": 2e-05, "loss": 0.10553467, "step": 755 }, { "epoch": 1.512, "grad_norm": 6.383801460266113, "learning_rate": 2e-05, "loss": 0.10873458, "step": 756 }, { "epoch": 1.514, "grad_norm": 5.93826961517334, "learning_rate": 2e-05, "loss": 0.0889692, "step": 757 }, { "epoch": 1.516, "grad_norm": 11.082666397094727, "learning_rate": 2e-05, "loss": 0.09550989, "step": 758 }, { "epoch": 1.518, "grad_norm": 18.662195205688477, "learning_rate": 2e-05, "loss": 0.09863414, "step": 759 }, { "epoch": 1.52, "grad_norm": 6.628091812133789, "learning_rate": 2e-05, "loss": 0.1067197, "step": 760 }, { "epoch": 1.522, "grad_norm": 6.630215644836426, "learning_rate": 2e-05, "loss": 0.10325408, "step": 761 }, { "epoch": 1.524, "grad_norm": 7.012215614318848, "learning_rate": 2e-05, "loss": 0.09811021, "step": 762 }, { "epoch": 1.526, "grad_norm": 5.480252265930176, "learning_rate": 2e-05, "loss": 0.07472679, "step": 763 }, { "epoch": 1.528, "grad_norm": 5.4422688484191895, "learning_rate": 2e-05, "loss": 0.08200334, "step": 764 }, { "epoch": 1.53, "grad_norm": 5.999731540679932, "learning_rate": 2e-05, "loss": 0.08999962, "step": 765 }, { "epoch": 1.532, "grad_norm": 6.590312480926514, "learning_rate": 2e-05, "loss": 0.07797834, "step": 766 }, { "epoch": 1.534, "grad_norm": 6.310613632202148, "learning_rate": 2e-05, "loss": 0.09896718, "step": 767 }, { "epoch": 1.536, "grad_norm": 6.2471394538879395, "learning_rate": 2e-05, "loss": 0.07709666, "step": 768 }, { "epoch": 1.538, "grad_norm": 7.0532546043396, "learning_rate": 2e-05, "loss": 0.1032034, "step": 769 }, { "epoch": 1.54, "grad_norm": 7.018381595611572, "learning_rate": 2e-05, "loss": 0.11972788, "step": 770 }, { "epoch": 1.542, "grad_norm": 5.642862319946289, "learning_rate": 2e-05, "loss": 0.07764281, "step": 771 }, { "epoch": 1.544, "grad_norm": 6.698234558105469, "learning_rate": 2e-05, "loss": 0.08778076, "step": 772 }, { "epoch": 1.546, "grad_norm": 7.662743091583252, "learning_rate": 2e-05, "loss": 0.11157984, "step": 773 }, { "epoch": 1.548, "grad_norm": 6.0838541984558105, "learning_rate": 2e-05, "loss": 0.06307139, "step": 774 }, { "epoch": 1.55, "grad_norm": 6.993627071380615, "learning_rate": 2e-05, "loss": 0.11414985, "step": 775 }, { "epoch": 1.552, "grad_norm": 6.6558637619018555, "learning_rate": 2e-05, "loss": 0.1116517, "step": 776 }, { "epoch": 1.554, "grad_norm": 6.532042026519775, "learning_rate": 2e-05, "loss": 0.09595405, "step": 777 }, { "epoch": 1.556, "grad_norm": 5.867588043212891, "learning_rate": 2e-05, "loss": 0.0638399, "step": 778 }, { "epoch": 1.558, "grad_norm": 8.715815544128418, "learning_rate": 2e-05, "loss": 0.10308509, "step": 779 }, { "epoch": 1.56, "grad_norm": 8.474085807800293, "learning_rate": 2e-05, "loss": 0.08221825, "step": 780 }, { "epoch": 1.562, "grad_norm": 7.919243335723877, "learning_rate": 2e-05, "loss": 0.10870614, "step": 781 }, { "epoch": 1.564, "grad_norm": 6.559986591339111, "learning_rate": 2e-05, "loss": 0.08578913, "step": 782 }, { "epoch": 1.5659999999999998, "grad_norm": 8.993151664733887, "learning_rate": 2e-05, "loss": 0.1358231, "step": 783 }, { "epoch": 1.568, "grad_norm": 8.25828742980957, "learning_rate": 2e-05, "loss": 0.09963953, "step": 784 }, { "epoch": 1.5699999999999998, "grad_norm": 9.375242233276367, "learning_rate": 2e-05, "loss": 0.11151037, "step": 785 }, { "epoch": 1.572, "grad_norm": 8.190119743347168, "learning_rate": 2e-05, "loss": 0.090362, "step": 786 }, { "epoch": 1.5739999999999998, "grad_norm": 7.379586219787598, "learning_rate": 2e-05, "loss": 0.09313273, "step": 787 }, { "epoch": 1.576, "grad_norm": 7.8609700202941895, "learning_rate": 2e-05, "loss": 0.11915591, "step": 788 }, { "epoch": 1.5779999999999998, "grad_norm": 7.8925323486328125, "learning_rate": 2e-05, "loss": 0.12312968, "step": 789 }, { "epoch": 1.58, "grad_norm": 6.057433605194092, "learning_rate": 2e-05, "loss": 0.08744247, "step": 790 }, { "epoch": 1.5819999999999999, "grad_norm": 7.279991149902344, "learning_rate": 2e-05, "loss": 0.1070512, "step": 791 }, { "epoch": 1.584, "grad_norm": 6.304085731506348, "learning_rate": 2e-05, "loss": 0.06650187, "step": 792 }, { "epoch": 1.5859999999999999, "grad_norm": 6.232430934906006, "learning_rate": 2e-05, "loss": 0.10530697, "step": 793 }, { "epoch": 1.588, "grad_norm": 5.4284348487854, "learning_rate": 2e-05, "loss": 0.0916101, "step": 794 }, { "epoch": 1.5899999999999999, "grad_norm": 6.930412292480469, "learning_rate": 2e-05, "loss": 0.09970197, "step": 795 }, { "epoch": 1.592, "grad_norm": 6.033024311065674, "learning_rate": 2e-05, "loss": 0.11107236, "step": 796 }, { "epoch": 1.5939999999999999, "grad_norm": 6.148260116577148, "learning_rate": 2e-05, "loss": 0.10647646, "step": 797 }, { "epoch": 1.596, "grad_norm": 6.8578314781188965, "learning_rate": 2e-05, "loss": 0.06893492, "step": 798 }, { "epoch": 1.5979999999999999, "grad_norm": 7.9737548828125, "learning_rate": 2e-05, "loss": 0.08726038, "step": 799 }, { "epoch": 1.6, "grad_norm": 6.506727695465088, "learning_rate": 2e-05, "loss": 0.10219696, "step": 800 }, { "epoch": 1.6019999999999999, "grad_norm": 6.682027816772461, "learning_rate": 2e-05, "loss": 0.11973037, "step": 801 }, { "epoch": 1.604, "grad_norm": 6.24838399887085, "learning_rate": 2e-05, "loss": 0.08154324, "step": 802 }, { "epoch": 1.6059999999999999, "grad_norm": 5.957355499267578, "learning_rate": 2e-05, "loss": 0.096405, "step": 803 }, { "epoch": 1.608, "grad_norm": 7.033812999725342, "learning_rate": 2e-05, "loss": 0.09952313, "step": 804 }, { "epoch": 1.6099999999999999, "grad_norm": 7.580658912658691, "learning_rate": 2e-05, "loss": 0.09978154, "step": 805 }, { "epoch": 1.612, "grad_norm": 7.647181034088135, "learning_rate": 2e-05, "loss": 0.12008274, "step": 806 }, { "epoch": 1.6139999999999999, "grad_norm": 6.756109714508057, "learning_rate": 2e-05, "loss": 0.11643834, "step": 807 }, { "epoch": 1.616, "grad_norm": 6.645536422729492, "learning_rate": 2e-05, "loss": 0.09560777, "step": 808 }, { "epoch": 1.6179999999999999, "grad_norm": 6.775121688842773, "learning_rate": 2e-05, "loss": 0.09249366, "step": 809 }, { "epoch": 1.62, "grad_norm": 6.844783782958984, "learning_rate": 2e-05, "loss": 0.1230599, "step": 810 }, { "epoch": 1.6219999999999999, "grad_norm": 6.739988327026367, "learning_rate": 2e-05, "loss": 0.09061924, "step": 811 }, { "epoch": 1.624, "grad_norm": 6.914920330047607, "learning_rate": 2e-05, "loss": 0.0943954, "step": 812 }, { "epoch": 1.626, "grad_norm": 7.730279445648193, "learning_rate": 2e-05, "loss": 0.12506056, "step": 813 }, { "epoch": 1.6280000000000001, "grad_norm": 5.598476409912109, "learning_rate": 2e-05, "loss": 0.0823666, "step": 814 }, { "epoch": 1.63, "grad_norm": 7.6769843101501465, "learning_rate": 2e-05, "loss": 0.11194047, "step": 815 }, { "epoch": 1.6320000000000001, "grad_norm": 6.550734996795654, "learning_rate": 2e-05, "loss": 0.08922696, "step": 816 }, { "epoch": 1.634, "grad_norm": 5.997371673583984, "learning_rate": 2e-05, "loss": 0.09031668, "step": 817 }, { "epoch": 1.6360000000000001, "grad_norm": 6.193819999694824, "learning_rate": 2e-05, "loss": 0.08669251, "step": 818 }, { "epoch": 1.638, "grad_norm": 5.953890800476074, "learning_rate": 2e-05, "loss": 0.07713512, "step": 819 }, { "epoch": 1.6400000000000001, "grad_norm": 7.553543567657471, "learning_rate": 2e-05, "loss": 0.09000906, "step": 820 }, { "epoch": 1.642, "grad_norm": 11.03736400604248, "learning_rate": 2e-05, "loss": 0.08614812, "step": 821 }, { "epoch": 1.6440000000000001, "grad_norm": 6.0494256019592285, "learning_rate": 2e-05, "loss": 0.06890115, "step": 822 }, { "epoch": 1.646, "grad_norm": 6.099550724029541, "learning_rate": 2e-05, "loss": 0.1026978, "step": 823 }, { "epoch": 1.6480000000000001, "grad_norm": 7.1636433601379395, "learning_rate": 2e-05, "loss": 0.08252162, "step": 824 }, { "epoch": 1.65, "grad_norm": 6.723221302032471, "learning_rate": 2e-05, "loss": 0.08033492, "step": 825 }, { "epoch": 1.6520000000000001, "grad_norm": 5.727433204650879, "learning_rate": 2e-05, "loss": 0.08445412, "step": 826 }, { "epoch": 1.654, "grad_norm": 6.050266265869141, "learning_rate": 2e-05, "loss": 0.09819476, "step": 827 }, { "epoch": 1.6560000000000001, "grad_norm": 7.659172534942627, "learning_rate": 2e-05, "loss": 0.10615657, "step": 828 }, { "epoch": 1.658, "grad_norm": 5.446779727935791, "learning_rate": 2e-05, "loss": 0.09162195, "step": 829 }, { "epoch": 1.6600000000000001, "grad_norm": 6.158524513244629, "learning_rate": 2e-05, "loss": 0.08816704, "step": 830 }, { "epoch": 1.662, "grad_norm": 5.787862300872803, "learning_rate": 2e-05, "loss": 0.06790222, "step": 831 }, { "epoch": 1.6640000000000001, "grad_norm": 6.438635349273682, "learning_rate": 2e-05, "loss": 0.10423005, "step": 832 }, { "epoch": 1.666, "grad_norm": 6.338016510009766, "learning_rate": 2e-05, "loss": 0.09123434, "step": 833 }, { "epoch": 1.6680000000000001, "grad_norm": 5.405874729156494, "learning_rate": 2e-05, "loss": 0.0809098, "step": 834 }, { "epoch": 1.67, "grad_norm": 5.616071701049805, "learning_rate": 2e-05, "loss": 0.07267334, "step": 835 }, { "epoch": 1.6720000000000002, "grad_norm": 6.650810718536377, "learning_rate": 2e-05, "loss": 0.13262673, "step": 836 }, { "epoch": 1.674, "grad_norm": 5.87054967880249, "learning_rate": 2e-05, "loss": 0.08202045, "step": 837 }, { "epoch": 1.6760000000000002, "grad_norm": 5.650069713592529, "learning_rate": 2e-05, "loss": 0.07036343, "step": 838 }, { "epoch": 1.678, "grad_norm": 6.341075897216797, "learning_rate": 2e-05, "loss": 0.08050346, "step": 839 }, { "epoch": 1.6800000000000002, "grad_norm": 7.926372051239014, "learning_rate": 2e-05, "loss": 0.09585924, "step": 840 }, { "epoch": 1.682, "grad_norm": 6.60725736618042, "learning_rate": 2e-05, "loss": 0.08254407, "step": 841 }, { "epoch": 1.6840000000000002, "grad_norm": 6.295833587646484, "learning_rate": 2e-05, "loss": 0.10681386, "step": 842 }, { "epoch": 1.686, "grad_norm": 7.604525566101074, "learning_rate": 2e-05, "loss": 0.1167973, "step": 843 }, { "epoch": 1.688, "grad_norm": 5.448155403137207, "learning_rate": 2e-05, "loss": 0.07091927, "step": 844 }, { "epoch": 1.69, "grad_norm": 8.825179100036621, "learning_rate": 2e-05, "loss": 0.08705544, "step": 845 }, { "epoch": 1.692, "grad_norm": 4.532454490661621, "learning_rate": 2e-05, "loss": 0.07031232, "step": 846 }, { "epoch": 1.694, "grad_norm": 6.737591743469238, "learning_rate": 2e-05, "loss": 0.12384254, "step": 847 }, { "epoch": 1.696, "grad_norm": 7.228618621826172, "learning_rate": 2e-05, "loss": 0.10456499, "step": 848 }, { "epoch": 1.698, "grad_norm": 5.6962151527404785, "learning_rate": 2e-05, "loss": 0.06535837, "step": 849 }, { "epoch": 1.7, "grad_norm": 7.648133277893066, "learning_rate": 2e-05, "loss": 0.1042631, "step": 850 }, { "epoch": 1.702, "grad_norm": 5.600685119628906, "learning_rate": 2e-05, "loss": 0.10142366, "step": 851 }, { "epoch": 1.704, "grad_norm": 7.414607524871826, "learning_rate": 2e-05, "loss": 0.09157774, "step": 852 }, { "epoch": 1.706, "grad_norm": 6.639456748962402, "learning_rate": 2e-05, "loss": 0.08576006, "step": 853 }, { "epoch": 1.708, "grad_norm": 6.079015731811523, "learning_rate": 2e-05, "loss": 0.07663426, "step": 854 }, { "epoch": 1.71, "grad_norm": 7.947822570800781, "learning_rate": 2e-05, "loss": 0.10197342, "step": 855 }, { "epoch": 1.712, "grad_norm": 6.243646144866943, "learning_rate": 2e-05, "loss": 0.0723168, "step": 856 }, { "epoch": 1.714, "grad_norm": 6.511617183685303, "learning_rate": 2e-05, "loss": 0.0959603, "step": 857 }, { "epoch": 1.716, "grad_norm": 6.084030628204346, "learning_rate": 2e-05, "loss": 0.08375153, "step": 858 }, { "epoch": 1.718, "grad_norm": 4.823752403259277, "learning_rate": 2e-05, "loss": 0.099583, "step": 859 }, { "epoch": 1.72, "grad_norm": 6.112275123596191, "learning_rate": 2e-05, "loss": 0.07927334, "step": 860 }, { "epoch": 1.722, "grad_norm": 5.232405185699463, "learning_rate": 2e-05, "loss": 0.09133521, "step": 861 }, { "epoch": 1.724, "grad_norm": 8.09445571899414, "learning_rate": 2e-05, "loss": 0.05064913, "step": 862 }, { "epoch": 1.726, "grad_norm": 5.758426189422607, "learning_rate": 2e-05, "loss": 0.05941902, "step": 863 }, { "epoch": 1.728, "grad_norm": 6.230708599090576, "learning_rate": 2e-05, "loss": 0.07696392, "step": 864 }, { "epoch": 1.73, "grad_norm": 9.45265007019043, "learning_rate": 2e-05, "loss": 0.0977644, "step": 865 }, { "epoch": 1.732, "grad_norm": 6.776828765869141, "learning_rate": 2e-05, "loss": 0.0740013, "step": 866 }, { "epoch": 1.734, "grad_norm": 5.9897236824035645, "learning_rate": 2e-05, "loss": 0.10689935, "step": 867 }, { "epoch": 1.736, "grad_norm": 7.968659400939941, "learning_rate": 2e-05, "loss": 0.09492065, "step": 868 }, { "epoch": 1.738, "grad_norm": 7.489259243011475, "learning_rate": 2e-05, "loss": 0.10448711, "step": 869 }, { "epoch": 1.74, "grad_norm": 6.4903130531311035, "learning_rate": 2e-05, "loss": 0.0939475, "step": 870 }, { "epoch": 1.742, "grad_norm": 5.988752365112305, "learning_rate": 2e-05, "loss": 0.08839993, "step": 871 }, { "epoch": 1.744, "grad_norm": 5.980820655822754, "learning_rate": 2e-05, "loss": 0.08525012, "step": 872 }, { "epoch": 1.746, "grad_norm": 5.982888698577881, "learning_rate": 2e-05, "loss": 0.08260086, "step": 873 }, { "epoch": 1.748, "grad_norm": 7.195910453796387, "learning_rate": 2e-05, "loss": 0.08393706, "step": 874 }, { "epoch": 1.75, "grad_norm": 4.47223424911499, "learning_rate": 2e-05, "loss": 0.04688883, "step": 875 }, { "epoch": 1.752, "grad_norm": 7.666622638702393, "learning_rate": 2e-05, "loss": 0.09070026, "step": 876 }, { "epoch": 1.754, "grad_norm": 10.301369667053223, "learning_rate": 2e-05, "loss": 0.11324744, "step": 877 }, { "epoch": 1.756, "grad_norm": 9.853584289550781, "learning_rate": 2e-05, "loss": 0.12401251, "step": 878 }, { "epoch": 1.758, "grad_norm": 8.028107643127441, "learning_rate": 2e-05, "loss": 0.07682119, "step": 879 }, { "epoch": 1.76, "grad_norm": 7.617894649505615, "learning_rate": 2e-05, "loss": 0.11872394, "step": 880 }, { "epoch": 1.762, "grad_norm": 6.536325454711914, "learning_rate": 2e-05, "loss": 0.08617866, "step": 881 }, { "epoch": 1.764, "grad_norm": 6.377828121185303, "learning_rate": 2e-05, "loss": 0.09066105, "step": 882 }, { "epoch": 1.766, "grad_norm": 5.527547359466553, "learning_rate": 2e-05, "loss": 0.07256127, "step": 883 }, { "epoch": 1.768, "grad_norm": 6.013676166534424, "learning_rate": 2e-05, "loss": 0.08411078, "step": 884 }, { "epoch": 1.77, "grad_norm": 6.797071933746338, "learning_rate": 2e-05, "loss": 0.09081668, "step": 885 }, { "epoch": 1.772, "grad_norm": 7.716439723968506, "learning_rate": 2e-05, "loss": 0.12274131, "step": 886 }, { "epoch": 1.774, "grad_norm": 7.224231243133545, "learning_rate": 2e-05, "loss": 0.08545151, "step": 887 }, { "epoch": 1.776, "grad_norm": 6.198343276977539, "learning_rate": 2e-05, "loss": 0.07063843, "step": 888 }, { "epoch": 1.778, "grad_norm": 6.558867931365967, "learning_rate": 2e-05, "loss": 0.08810833, "step": 889 }, { "epoch": 1.78, "grad_norm": 7.798099517822266, "learning_rate": 2e-05, "loss": 0.08349015, "step": 890 }, { "epoch": 1.782, "grad_norm": 6.4789557456970215, "learning_rate": 2e-05, "loss": 0.09349858, "step": 891 }, { "epoch": 1.784, "grad_norm": 6.342051029205322, "learning_rate": 2e-05, "loss": 0.08632416, "step": 892 }, { "epoch": 1.786, "grad_norm": 6.692185401916504, "learning_rate": 2e-05, "loss": 0.09073203, "step": 893 }, { "epoch": 1.788, "grad_norm": 5.751002311706543, "learning_rate": 2e-05, "loss": 0.06307799, "step": 894 }, { "epoch": 1.79, "grad_norm": 5.913455486297607, "learning_rate": 2e-05, "loss": 0.07896129, "step": 895 }, { "epoch": 1.792, "grad_norm": 6.992930889129639, "learning_rate": 2e-05, "loss": 0.11484307, "step": 896 }, { "epoch": 1.794, "grad_norm": 5.7984395027160645, "learning_rate": 2e-05, "loss": 0.0806881, "step": 897 }, { "epoch": 1.796, "grad_norm": 6.629338264465332, "learning_rate": 2e-05, "loss": 0.10427346, "step": 898 }, { "epoch": 1.798, "grad_norm": 7.1573591232299805, "learning_rate": 2e-05, "loss": 0.08466835, "step": 899 }, { "epoch": 1.8, "grad_norm": 6.95459508895874, "learning_rate": 2e-05, "loss": 0.11079477, "step": 900 }, { "epoch": 1.802, "grad_norm": 6.994939804077148, "learning_rate": 2e-05, "loss": 0.11122356, "step": 901 }, { "epoch": 1.804, "grad_norm": 6.411318778991699, "learning_rate": 2e-05, "loss": 0.07929517, "step": 902 }, { "epoch": 1.806, "grad_norm": 7.465093612670898, "learning_rate": 2e-05, "loss": 0.08886904, "step": 903 }, { "epoch": 1.808, "grad_norm": 7.3899946212768555, "learning_rate": 2e-05, "loss": 0.09108968, "step": 904 }, { "epoch": 1.81, "grad_norm": 5.9346394538879395, "learning_rate": 2e-05, "loss": 0.08338928, "step": 905 }, { "epoch": 1.812, "grad_norm": 7.208188533782959, "learning_rate": 2e-05, "loss": 0.10159224, "step": 906 }, { "epoch": 1.814, "grad_norm": 6.63840389251709, "learning_rate": 2e-05, "loss": 0.08786465, "step": 907 }, { "epoch": 1.8159999999999998, "grad_norm": 5.428511619567871, "learning_rate": 2e-05, "loss": 0.08521967, "step": 908 }, { "epoch": 1.818, "grad_norm": 7.23980188369751, "learning_rate": 2e-05, "loss": 0.07308835, "step": 909 }, { "epoch": 1.8199999999999998, "grad_norm": 7.0117645263671875, "learning_rate": 2e-05, "loss": 0.08310041, "step": 910 }, { "epoch": 1.822, "grad_norm": 10.849108695983887, "learning_rate": 2e-05, "loss": 0.1070163, "step": 911 }, { "epoch": 1.8239999999999998, "grad_norm": 8.361151695251465, "learning_rate": 2e-05, "loss": 0.09858213, "step": 912 }, { "epoch": 1.826, "grad_norm": 5.723193645477295, "learning_rate": 2e-05, "loss": 0.07075511, "step": 913 }, { "epoch": 1.8279999999999998, "grad_norm": 7.726888656616211, "learning_rate": 2e-05, "loss": 0.11251749, "step": 914 }, { "epoch": 1.83, "grad_norm": 6.3023223876953125, "learning_rate": 2e-05, "loss": 0.093556, "step": 915 }, { "epoch": 1.8319999999999999, "grad_norm": 6.006319999694824, "learning_rate": 2e-05, "loss": 0.08642451, "step": 916 }, { "epoch": 1.834, "grad_norm": 7.0457377433776855, "learning_rate": 2e-05, "loss": 0.08117503, "step": 917 }, { "epoch": 1.8359999999999999, "grad_norm": 6.439599514007568, "learning_rate": 2e-05, "loss": 0.09040029, "step": 918 }, { "epoch": 1.838, "grad_norm": 5.463101863861084, "learning_rate": 2e-05, "loss": 0.0806648, "step": 919 }, { "epoch": 1.8399999999999999, "grad_norm": 5.786152362823486, "learning_rate": 2e-05, "loss": 0.07837954, "step": 920 }, { "epoch": 1.842, "grad_norm": 5.678594589233398, "learning_rate": 2e-05, "loss": 0.07307442, "step": 921 }, { "epoch": 1.8439999999999999, "grad_norm": 7.171142578125, "learning_rate": 2e-05, "loss": 0.1255897, "step": 922 }, { "epoch": 1.846, "grad_norm": 5.736705303192139, "learning_rate": 2e-05, "loss": 0.09313449, "step": 923 }, { "epoch": 1.8479999999999999, "grad_norm": 5.670688152313232, "learning_rate": 2e-05, "loss": 0.08534977, "step": 924 }, { "epoch": 1.85, "grad_norm": 6.940417766571045, "learning_rate": 2e-05, "loss": 0.10553095, "step": 925 }, { "epoch": 1.8519999999999999, "grad_norm": 5.162463665008545, "learning_rate": 2e-05, "loss": 0.0865514, "step": 926 }, { "epoch": 1.854, "grad_norm": 5.819636821746826, "learning_rate": 2e-05, "loss": 0.0812398, "step": 927 }, { "epoch": 1.8559999999999999, "grad_norm": 5.4693098068237305, "learning_rate": 2e-05, "loss": 0.08950453, "step": 928 }, { "epoch": 1.858, "grad_norm": 5.408148765563965, "learning_rate": 2e-05, "loss": 0.07980577, "step": 929 }, { "epoch": 1.8599999999999999, "grad_norm": 5.197288513183594, "learning_rate": 2e-05, "loss": 0.07636063, "step": 930 }, { "epoch": 1.862, "grad_norm": 5.462472915649414, "learning_rate": 2e-05, "loss": 0.08865924, "step": 931 }, { "epoch": 1.8639999999999999, "grad_norm": 5.021551132202148, "learning_rate": 2e-05, "loss": 0.06055816, "step": 932 }, { "epoch": 1.866, "grad_norm": 5.665159702301025, "learning_rate": 2e-05, "loss": 0.08568858, "step": 933 }, { "epoch": 1.8679999999999999, "grad_norm": 5.89557409286499, "learning_rate": 2e-05, "loss": 0.08510157, "step": 934 }, { "epoch": 1.87, "grad_norm": 6.0552167892456055, "learning_rate": 2e-05, "loss": 0.07803413, "step": 935 }, { "epoch": 1.8719999999999999, "grad_norm": 6.329714298248291, "learning_rate": 2e-05, "loss": 0.06721306, "step": 936 }, { "epoch": 1.874, "grad_norm": 7.109095096588135, "learning_rate": 2e-05, "loss": 0.1098647, "step": 937 }, { "epoch": 1.876, "grad_norm": 6.339148044586182, "learning_rate": 2e-05, "loss": 0.10333439, "step": 938 }, { "epoch": 1.8780000000000001, "grad_norm": 6.413509845733643, "learning_rate": 2e-05, "loss": 0.07200794, "step": 939 }, { "epoch": 1.88, "grad_norm": 5.705143451690674, "learning_rate": 2e-05, "loss": 0.06853146, "step": 940 }, { "epoch": 1.8820000000000001, "grad_norm": 6.176608085632324, "learning_rate": 2e-05, "loss": 0.08500596, "step": 941 }, { "epoch": 1.884, "grad_norm": 5.1187567710876465, "learning_rate": 2e-05, "loss": 0.08432245, "step": 942 }, { "epoch": 1.8860000000000001, "grad_norm": 5.66195821762085, "learning_rate": 2e-05, "loss": 0.08069464, "step": 943 }, { "epoch": 1.888, "grad_norm": 5.817798137664795, "learning_rate": 2e-05, "loss": 0.10365196, "step": 944 }, { "epoch": 1.8900000000000001, "grad_norm": 5.564085483551025, "learning_rate": 2e-05, "loss": 0.07923815, "step": 945 }, { "epoch": 1.892, "grad_norm": 5.592848777770996, "learning_rate": 2e-05, "loss": 0.07895133, "step": 946 }, { "epoch": 1.8940000000000001, "grad_norm": 7.726067066192627, "learning_rate": 2e-05, "loss": 0.09378611, "step": 947 }, { "epoch": 1.896, "grad_norm": 5.238822937011719, "learning_rate": 2e-05, "loss": 0.05780448, "step": 948 }, { "epoch": 1.8980000000000001, "grad_norm": 4.402891635894775, "learning_rate": 2e-05, "loss": 0.06716388, "step": 949 }, { "epoch": 1.9, "grad_norm": 4.693483352661133, "learning_rate": 2e-05, "loss": 0.06642798, "step": 950 }, { "epoch": 1.9020000000000001, "grad_norm": 5.334549903869629, "learning_rate": 2e-05, "loss": 0.08704714, "step": 951 }, { "epoch": 1.904, "grad_norm": 5.377449035644531, "learning_rate": 2e-05, "loss": 0.08278981, "step": 952 }, { "epoch": 1.9060000000000001, "grad_norm": 7.536509037017822, "learning_rate": 2e-05, "loss": 0.11738987, "step": 953 }, { "epoch": 1.908, "grad_norm": 5.507444381713867, "learning_rate": 2e-05, "loss": 0.06531938, "step": 954 }, { "epoch": 1.9100000000000001, "grad_norm": 4.8320417404174805, "learning_rate": 2e-05, "loss": 0.07463697, "step": 955 }, { "epoch": 1.912, "grad_norm": 4.767637729644775, "learning_rate": 2e-05, "loss": 0.06733444, "step": 956 }, { "epoch": 1.9140000000000001, "grad_norm": 5.721960067749023, "learning_rate": 2e-05, "loss": 0.0730796, "step": 957 }, { "epoch": 1.916, "grad_norm": 6.459201335906982, "learning_rate": 2e-05, "loss": 0.08597382, "step": 958 }, { "epoch": 1.9180000000000001, "grad_norm": 6.380370140075684, "learning_rate": 2e-05, "loss": 0.07593976, "step": 959 }, { "epoch": 1.92, "grad_norm": 7.044502258300781, "learning_rate": 2e-05, "loss": 0.08926235, "step": 960 }, { "epoch": 1.9220000000000002, "grad_norm": 6.474540710449219, "learning_rate": 2e-05, "loss": 0.09497936, "step": 961 }, { "epoch": 1.924, "grad_norm": 7.206709384918213, "learning_rate": 2e-05, "loss": 0.07414088, "step": 962 }, { "epoch": 1.9260000000000002, "grad_norm": 5.44899845123291, "learning_rate": 2e-05, "loss": 0.08851054, "step": 963 }, { "epoch": 1.928, "grad_norm": 6.5250420570373535, "learning_rate": 2e-05, "loss": 0.09423383, "step": 964 }, { "epoch": 1.9300000000000002, "grad_norm": 6.720390319824219, "learning_rate": 2e-05, "loss": 0.09722751, "step": 965 }, { "epoch": 1.932, "grad_norm": 6.4455695152282715, "learning_rate": 2e-05, "loss": 0.11846717, "step": 966 }, { "epoch": 1.9340000000000002, "grad_norm": 6.086376667022705, "learning_rate": 2e-05, "loss": 0.09491129, "step": 967 }, { "epoch": 1.936, "grad_norm": 6.304816246032715, "learning_rate": 2e-05, "loss": 0.08396909, "step": 968 }, { "epoch": 1.938, "grad_norm": 8.445196151733398, "learning_rate": 2e-05, "loss": 0.06416641, "step": 969 }, { "epoch": 1.94, "grad_norm": 5.074264049530029, "learning_rate": 2e-05, "loss": 0.05769046, "step": 970 }, { "epoch": 1.942, "grad_norm": 7.1893720626831055, "learning_rate": 2e-05, "loss": 0.09772494, "step": 971 }, { "epoch": 1.944, "grad_norm": 7.159612655639648, "learning_rate": 2e-05, "loss": 0.0916353, "step": 972 }, { "epoch": 1.946, "grad_norm": 6.2384419441223145, "learning_rate": 2e-05, "loss": 0.07486523, "step": 973 }, { "epoch": 1.948, "grad_norm": 7.298469543457031, "learning_rate": 2e-05, "loss": 0.0874768, "step": 974 }, { "epoch": 1.95, "grad_norm": 6.044239044189453, "learning_rate": 2e-05, "loss": 0.08542761, "step": 975 }, { "epoch": 1.952, "grad_norm": 6.651734828948975, "learning_rate": 2e-05, "loss": 0.08901096, "step": 976 }, { "epoch": 1.954, "grad_norm": 6.1580305099487305, "learning_rate": 2e-05, "loss": 0.08394586, "step": 977 }, { "epoch": 1.956, "grad_norm": 6.56711483001709, "learning_rate": 2e-05, "loss": 0.0742816, "step": 978 }, { "epoch": 1.958, "grad_norm": 5.370865821838379, "learning_rate": 2e-05, "loss": 0.0798419, "step": 979 }, { "epoch": 1.96, "grad_norm": 5.589046478271484, "learning_rate": 2e-05, "loss": 0.06897587, "step": 980 }, { "epoch": 1.962, "grad_norm": 7.22554874420166, "learning_rate": 2e-05, "loss": 0.116116, "step": 981 }, { "epoch": 1.964, "grad_norm": 6.257077217102051, "learning_rate": 2e-05, "loss": 0.07505171, "step": 982 }, { "epoch": 1.966, "grad_norm": 6.041742324829102, "learning_rate": 2e-05, "loss": 0.08705196, "step": 983 }, { "epoch": 1.968, "grad_norm": 5.596206188201904, "learning_rate": 2e-05, "loss": 0.09797958, "step": 984 }, { "epoch": 1.97, "grad_norm": 6.748600482940674, "learning_rate": 2e-05, "loss": 0.08495703, "step": 985 }, { "epoch": 1.972, "grad_norm": 6.933077812194824, "learning_rate": 2e-05, "loss": 0.10241848, "step": 986 }, { "epoch": 1.974, "grad_norm": 5.3895158767700195, "learning_rate": 2e-05, "loss": 0.08071596, "step": 987 }, { "epoch": 1.976, "grad_norm": 5.876412868499756, "learning_rate": 2e-05, "loss": 0.05421221, "step": 988 }, { "epoch": 1.978, "grad_norm": 5.811347961425781, "learning_rate": 2e-05, "loss": 0.09953476, "step": 989 }, { "epoch": 1.98, "grad_norm": 5.284328937530518, "learning_rate": 2e-05, "loss": 0.09931732, "step": 990 }, { "epoch": 1.982, "grad_norm": 6.064516544342041, "learning_rate": 2e-05, "loss": 0.10275357, "step": 991 }, { "epoch": 1.984, "grad_norm": 4.727668285369873, "learning_rate": 2e-05, "loss": 0.0730349, "step": 992 }, { "epoch": 1.986, "grad_norm": 4.79754114151001, "learning_rate": 2e-05, "loss": 0.06537021, "step": 993 }, { "epoch": 1.988, "grad_norm": 4.8797173500061035, "learning_rate": 2e-05, "loss": 0.08368631, "step": 994 }, { "epoch": 1.99, "grad_norm": 5.771665096282959, "learning_rate": 2e-05, "loss": 0.09043793, "step": 995 }, { "epoch": 1.992, "grad_norm": 5.609272480010986, "learning_rate": 2e-05, "loss": 0.09305952, "step": 996 }, { "epoch": 1.994, "grad_norm": 5.353261947631836, "learning_rate": 2e-05, "loss": 0.07852168, "step": 997 }, { "epoch": 1.996, "grad_norm": 5.1136474609375, "learning_rate": 2e-05, "loss": 0.0769015, "step": 998 }, { "epoch": 1.998, "grad_norm": 5.327400207519531, "learning_rate": 2e-05, "loss": 0.08302744, "step": 999 }, { "epoch": 2.0, "grad_norm": 5.4508867263793945, "learning_rate": 2e-05, "loss": 0.09140044, "step": 1000 }, { "epoch": 2.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 0.664, "AngleClassification_3": 0.5189620758483033, "Equal_1": 0.788, "Equal_2": 0.5528942115768463, "Equal_3": 0.5828343313373253, "LineComparison_1": 0.974, "LineComparison_2": 0.9461077844311377, "LineComparison_3": 0.8363273453093812, "Parallel_1": 0.7214428857715431, "Parallel_2": 0.9378757515030061, "Parallel_3": 0.388, "Perpendicular_1": 0.826, "Perpendicular_2": 0.296, "Perpendicular_3": 0.08316633266533066, "PointLiesOnCircle_1": 0.992251169004676, "PointLiesOnCircle_2": 0.9790999999999999, "PointLiesOnCircle_3": 0.7548333333333334, "PointLiesOnLine_1": 0.7354709418837675, "PointLiesOnLine_2": 0.4188376753507014, "PointLiesOnLine_3": 0.2634730538922156 }, "eval_runtime": 224.9939, "eval_samples_per_second": 46.668, "eval_steps_per_second": 0.933, "step": 1000 }, { "epoch": 2.002, "grad_norm": 6.127501487731934, "learning_rate": 2e-05, "loss": 0.09605525, "step": 1001 }, { "epoch": 2.004, "grad_norm": 5.803463935852051, "learning_rate": 2e-05, "loss": 0.08370565, "step": 1002 }, { "epoch": 2.006, "grad_norm": 4.599699974060059, "learning_rate": 2e-05, "loss": 0.07494158, "step": 1003 }, { "epoch": 2.008, "grad_norm": 6.703914642333984, "learning_rate": 2e-05, "loss": 0.0742216, "step": 1004 }, { "epoch": 2.01, "grad_norm": 5.665539741516113, "learning_rate": 2e-05, "loss": 0.06932583, "step": 1005 }, { "epoch": 2.012, "grad_norm": 5.154899597167969, "learning_rate": 2e-05, "loss": 0.08177568, "step": 1006 }, { "epoch": 2.014, "grad_norm": 4.637820720672607, "learning_rate": 2e-05, "loss": 0.06241836, "step": 1007 }, { "epoch": 2.016, "grad_norm": 4.956040382385254, "learning_rate": 2e-05, "loss": 0.04467124, "step": 1008 }, { "epoch": 2.018, "grad_norm": 6.109738826751709, "learning_rate": 2e-05, "loss": 0.08741914, "step": 1009 }, { "epoch": 2.02, "grad_norm": 6.145622253417969, "learning_rate": 2e-05, "loss": 0.08232199, "step": 1010 }, { "epoch": 2.022, "grad_norm": 6.279349327087402, "learning_rate": 2e-05, "loss": 0.09628882, "step": 1011 }, { "epoch": 2.024, "grad_norm": 5.525803089141846, "learning_rate": 2e-05, "loss": 0.08221775, "step": 1012 }, { "epoch": 2.026, "grad_norm": 6.564820289611816, "learning_rate": 2e-05, "loss": 0.1056895, "step": 1013 }, { "epoch": 2.028, "grad_norm": 6.0837225914001465, "learning_rate": 2e-05, "loss": 0.07582446, "step": 1014 }, { "epoch": 2.03, "grad_norm": 5.1909050941467285, "learning_rate": 2e-05, "loss": 0.07464769, "step": 1015 }, { "epoch": 2.032, "grad_norm": 4.655552387237549, "learning_rate": 2e-05, "loss": 0.06795575, "step": 1016 }, { "epoch": 2.034, "grad_norm": 5.959053993225098, "learning_rate": 2e-05, "loss": 0.08964419, "step": 1017 }, { "epoch": 2.036, "grad_norm": 5.0768938064575195, "learning_rate": 2e-05, "loss": 0.07672279, "step": 1018 }, { "epoch": 2.038, "grad_norm": 5.039190769195557, "learning_rate": 2e-05, "loss": 0.07658345, "step": 1019 }, { "epoch": 2.04, "grad_norm": 5.619899272918701, "learning_rate": 2e-05, "loss": 0.08196241, "step": 1020 }, { "epoch": 2.042, "grad_norm": 7.010852336883545, "learning_rate": 2e-05, "loss": 0.08686353, "step": 1021 }, { "epoch": 2.044, "grad_norm": 4.937379837036133, "learning_rate": 2e-05, "loss": 0.07323407, "step": 1022 }, { "epoch": 2.046, "grad_norm": 4.988183498382568, "learning_rate": 2e-05, "loss": 0.06818819, "step": 1023 }, { "epoch": 2.048, "grad_norm": 6.082690238952637, "learning_rate": 2e-05, "loss": 0.09276307, "step": 1024 }, { "epoch": 2.05, "grad_norm": 7.151668071746826, "learning_rate": 2e-05, "loss": 0.12061563, "step": 1025 }, { "epoch": 2.052, "grad_norm": 5.288543701171875, "learning_rate": 2e-05, "loss": 0.08137431, "step": 1026 }, { "epoch": 2.054, "grad_norm": 7.265625476837158, "learning_rate": 2e-05, "loss": 0.1261089, "step": 1027 }, { "epoch": 2.056, "grad_norm": 5.863519191741943, "learning_rate": 2e-05, "loss": 0.06764249, "step": 1028 }, { "epoch": 2.058, "grad_norm": 5.5973734855651855, "learning_rate": 2e-05, "loss": 0.07235534, "step": 1029 }, { "epoch": 2.06, "grad_norm": 5.530547142028809, "learning_rate": 2e-05, "loss": 0.06375699, "step": 1030 }, { "epoch": 2.062, "grad_norm": 5.46246337890625, "learning_rate": 2e-05, "loss": 0.09294739, "step": 1031 }, { "epoch": 2.064, "grad_norm": 5.801873683929443, "learning_rate": 2e-05, "loss": 0.09410387, "step": 1032 }, { "epoch": 2.066, "grad_norm": 4.608308792114258, "learning_rate": 2e-05, "loss": 0.07345197, "step": 1033 }, { "epoch": 2.068, "grad_norm": 4.545248985290527, "learning_rate": 2e-05, "loss": 0.06891242, "step": 1034 }, { "epoch": 2.07, "grad_norm": 3.976973533630371, "learning_rate": 2e-05, "loss": 0.06393366, "step": 1035 }, { "epoch": 2.072, "grad_norm": 5.615103721618652, "learning_rate": 2e-05, "loss": 0.09274217, "step": 1036 }, { "epoch": 2.074, "grad_norm": 5.611616134643555, "learning_rate": 2e-05, "loss": 0.09699512, "step": 1037 }, { "epoch": 2.076, "grad_norm": 4.313354969024658, "learning_rate": 2e-05, "loss": 0.0616723, "step": 1038 }, { "epoch": 2.078, "grad_norm": 5.224069595336914, "learning_rate": 2e-05, "loss": 0.06845634, "step": 1039 }, { "epoch": 2.08, "grad_norm": 4.326113700866699, "learning_rate": 2e-05, "loss": 0.06099868, "step": 1040 }, { "epoch": 2.082, "grad_norm": 7.178597450256348, "learning_rate": 2e-05, "loss": 0.10244808, "step": 1041 }, { "epoch": 2.084, "grad_norm": 5.4881463050842285, "learning_rate": 2e-05, "loss": 0.08736734, "step": 1042 }, { "epoch": 2.086, "grad_norm": 6.4596781730651855, "learning_rate": 2e-05, "loss": 0.10417489, "step": 1043 }, { "epoch": 2.088, "grad_norm": 5.694255828857422, "learning_rate": 2e-05, "loss": 0.08424762, "step": 1044 }, { "epoch": 2.09, "grad_norm": 6.513037204742432, "learning_rate": 2e-05, "loss": 0.08413312, "step": 1045 }, { "epoch": 2.092, "grad_norm": 7.854562282562256, "learning_rate": 2e-05, "loss": 0.06828485, "step": 1046 }, { "epoch": 2.094, "grad_norm": 5.506662368774414, "learning_rate": 2e-05, "loss": 0.07331842, "step": 1047 }, { "epoch": 2.096, "grad_norm": 5.808956623077393, "learning_rate": 2e-05, "loss": 0.08894197, "step": 1048 }, { "epoch": 2.098, "grad_norm": 4.787161350250244, "learning_rate": 2e-05, "loss": 0.07078961, "step": 1049 }, { "epoch": 2.1, "grad_norm": 6.782310962677002, "learning_rate": 2e-05, "loss": 0.07347016, "step": 1050 }, { "epoch": 2.102, "grad_norm": 9.133805274963379, "learning_rate": 2e-05, "loss": 0.12958816, "step": 1051 }, { "epoch": 2.104, "grad_norm": 5.40065336227417, "learning_rate": 2e-05, "loss": 0.08760545, "step": 1052 }, { "epoch": 2.106, "grad_norm": 5.910422325134277, "learning_rate": 2e-05, "loss": 0.09682372, "step": 1053 }, { "epoch": 2.108, "grad_norm": 4.841230392456055, "learning_rate": 2e-05, "loss": 0.0938227, "step": 1054 }, { "epoch": 2.11, "grad_norm": 4.990455627441406, "learning_rate": 2e-05, "loss": 0.06578302, "step": 1055 }, { "epoch": 2.112, "grad_norm": 7.2492218017578125, "learning_rate": 2e-05, "loss": 0.09669866, "step": 1056 }, { "epoch": 2.114, "grad_norm": 5.629972457885742, "learning_rate": 2e-05, "loss": 0.07953146, "step": 1057 }, { "epoch": 2.116, "grad_norm": 5.828209400177002, "learning_rate": 2e-05, "loss": 0.08794236, "step": 1058 }, { "epoch": 2.118, "grad_norm": 6.12166166305542, "learning_rate": 2e-05, "loss": 0.08398376, "step": 1059 }, { "epoch": 2.12, "grad_norm": 5.690420150756836, "learning_rate": 2e-05, "loss": 0.08332887, "step": 1060 }, { "epoch": 2.122, "grad_norm": 5.3961262702941895, "learning_rate": 2e-05, "loss": 0.07512006, "step": 1061 }, { "epoch": 2.124, "grad_norm": 5.752467155456543, "learning_rate": 2e-05, "loss": 0.08231425, "step": 1062 }, { "epoch": 2.126, "grad_norm": 5.985095977783203, "learning_rate": 2e-05, "loss": 0.0777497, "step": 1063 }, { "epoch": 2.128, "grad_norm": 6.34456729888916, "learning_rate": 2e-05, "loss": 0.09492326, "step": 1064 }, { "epoch": 2.13, "grad_norm": 4.722805023193359, "learning_rate": 2e-05, "loss": 0.06763934, "step": 1065 }, { "epoch": 2.132, "grad_norm": 6.623774528503418, "learning_rate": 2e-05, "loss": 0.1013829, "step": 1066 }, { "epoch": 2.134, "grad_norm": 7.571280002593994, "learning_rate": 2e-05, "loss": 0.09418335, "step": 1067 }, { "epoch": 2.136, "grad_norm": 5.225839138031006, "learning_rate": 2e-05, "loss": 0.07921963, "step": 1068 }, { "epoch": 2.138, "grad_norm": 5.1321330070495605, "learning_rate": 2e-05, "loss": 0.08395393, "step": 1069 }, { "epoch": 2.14, "grad_norm": 5.141408443450928, "learning_rate": 2e-05, "loss": 0.07597708, "step": 1070 }, { "epoch": 2.142, "grad_norm": 5.3547492027282715, "learning_rate": 2e-05, "loss": 0.09044183, "step": 1071 }, { "epoch": 2.144, "grad_norm": 5.196559906005859, "learning_rate": 2e-05, "loss": 0.06513615, "step": 1072 }, { "epoch": 2.146, "grad_norm": 5.343927383422852, "learning_rate": 2e-05, "loss": 0.09016806, "step": 1073 }, { "epoch": 2.148, "grad_norm": 4.83909273147583, "learning_rate": 2e-05, "loss": 0.05893473, "step": 1074 }, { "epoch": 2.15, "grad_norm": 4.730642318725586, "learning_rate": 2e-05, "loss": 0.07633822, "step": 1075 }, { "epoch": 2.152, "grad_norm": 5.930988311767578, "learning_rate": 2e-05, "loss": 0.08787534, "step": 1076 }, { "epoch": 2.154, "grad_norm": 5.553809642791748, "learning_rate": 2e-05, "loss": 0.06826856, "step": 1077 }, { "epoch": 2.156, "grad_norm": 5.980209827423096, "learning_rate": 2e-05, "loss": 0.08702347, "step": 1078 }, { "epoch": 2.158, "grad_norm": 7.344969749450684, "learning_rate": 2e-05, "loss": 0.14479248, "step": 1079 }, { "epoch": 2.16, "grad_norm": 5.42683744430542, "learning_rate": 2e-05, "loss": 0.06802665, "step": 1080 }, { "epoch": 2.162, "grad_norm": 4.288047790527344, "learning_rate": 2e-05, "loss": 0.06471375, "step": 1081 }, { "epoch": 2.164, "grad_norm": 5.744663715362549, "learning_rate": 2e-05, "loss": 0.08457594, "step": 1082 }, { "epoch": 2.166, "grad_norm": 6.799096584320068, "learning_rate": 2e-05, "loss": 0.10932826, "step": 1083 }, { "epoch": 2.168, "grad_norm": 5.655609607696533, "learning_rate": 2e-05, "loss": 0.07049353, "step": 1084 }, { "epoch": 2.17, "grad_norm": 5.365757942199707, "learning_rate": 2e-05, "loss": 0.08996421, "step": 1085 }, { "epoch": 2.172, "grad_norm": 6.2932515144348145, "learning_rate": 2e-05, "loss": 0.08896601, "step": 1086 }, { "epoch": 2.174, "grad_norm": 5.727114200592041, "learning_rate": 2e-05, "loss": 0.06082165, "step": 1087 }, { "epoch": 2.176, "grad_norm": 5.433359622955322, "learning_rate": 2e-05, "loss": 0.06335334, "step": 1088 }, { "epoch": 2.178, "grad_norm": 6.771284103393555, "learning_rate": 2e-05, "loss": 0.0858922, "step": 1089 }, { "epoch": 2.18, "grad_norm": 6.822760581970215, "learning_rate": 2e-05, "loss": 0.0847028, "step": 1090 }, { "epoch": 2.182, "grad_norm": 4.728634357452393, "learning_rate": 2e-05, "loss": 0.08418906, "step": 1091 }, { "epoch": 2.184, "grad_norm": 7.832975387573242, "learning_rate": 2e-05, "loss": 0.08424877, "step": 1092 }, { "epoch": 2.186, "grad_norm": 7.904279708862305, "learning_rate": 2e-05, "loss": 0.0663535, "step": 1093 }, { "epoch": 2.188, "grad_norm": 6.61509895324707, "learning_rate": 2e-05, "loss": 0.09131549, "step": 1094 }, { "epoch": 2.19, "grad_norm": 6.704990386962891, "learning_rate": 2e-05, "loss": 0.08428369, "step": 1095 }, { "epoch": 2.192, "grad_norm": 6.394984722137451, "learning_rate": 2e-05, "loss": 0.0803104, "step": 1096 }, { "epoch": 2.194, "grad_norm": 5.163837432861328, "learning_rate": 2e-05, "loss": 0.09020324, "step": 1097 }, { "epoch": 2.196, "grad_norm": 6.586339950561523, "learning_rate": 2e-05, "loss": 0.08435233, "step": 1098 }, { "epoch": 2.198, "grad_norm": 5.437119007110596, "learning_rate": 2e-05, "loss": 0.06368561, "step": 1099 }, { "epoch": 2.2, "grad_norm": 6.680806636810303, "learning_rate": 2e-05, "loss": 0.07174167, "step": 1100 }, { "epoch": 2.202, "grad_norm": 6.230100631713867, "learning_rate": 2e-05, "loss": 0.07952577, "step": 1101 }, { "epoch": 2.204, "grad_norm": 7.268177509307861, "learning_rate": 2e-05, "loss": 0.08972441, "step": 1102 }, { "epoch": 2.206, "grad_norm": 6.04674768447876, "learning_rate": 2e-05, "loss": 0.08055016, "step": 1103 }, { "epoch": 2.208, "grad_norm": 5.880983829498291, "learning_rate": 2e-05, "loss": 0.07890573, "step": 1104 }, { "epoch": 2.21, "grad_norm": 5.15964412689209, "learning_rate": 2e-05, "loss": 0.0822338, "step": 1105 }, { "epoch": 2.212, "grad_norm": 4.933701038360596, "learning_rate": 2e-05, "loss": 0.06214568, "step": 1106 }, { "epoch": 2.214, "grad_norm": 7.201409339904785, "learning_rate": 2e-05, "loss": 0.10794005, "step": 1107 }, { "epoch": 2.216, "grad_norm": 4.709375381469727, "learning_rate": 2e-05, "loss": 0.08287051, "step": 1108 }, { "epoch": 2.218, "grad_norm": 4.8598480224609375, "learning_rate": 2e-05, "loss": 0.07301231, "step": 1109 }, { "epoch": 2.22, "grad_norm": 5.664350509643555, "learning_rate": 2e-05, "loss": 0.07297143, "step": 1110 }, { "epoch": 2.222, "grad_norm": 5.539565086364746, "learning_rate": 2e-05, "loss": 0.07656091, "step": 1111 }, { "epoch": 2.224, "grad_norm": 5.560678482055664, "learning_rate": 2e-05, "loss": 0.09850764, "step": 1112 }, { "epoch": 2.226, "grad_norm": 5.358619213104248, "learning_rate": 2e-05, "loss": 0.08147183, "step": 1113 }, { "epoch": 2.228, "grad_norm": 5.97714376449585, "learning_rate": 2e-05, "loss": 0.08706747, "step": 1114 }, { "epoch": 2.23, "grad_norm": 6.1291327476501465, "learning_rate": 2e-05, "loss": 0.07758181, "step": 1115 }, { "epoch": 2.232, "grad_norm": 6.922235012054443, "learning_rate": 2e-05, "loss": 0.11712295, "step": 1116 }, { "epoch": 2.234, "grad_norm": 4.929329872131348, "learning_rate": 2e-05, "loss": 0.0659353, "step": 1117 }, { "epoch": 2.2359999999999998, "grad_norm": 4.7161993980407715, "learning_rate": 2e-05, "loss": 0.05406016, "step": 1118 }, { "epoch": 2.238, "grad_norm": 6.171803951263428, "learning_rate": 2e-05, "loss": 0.05673445, "step": 1119 }, { "epoch": 2.24, "grad_norm": 7.007102966308594, "learning_rate": 2e-05, "loss": 0.08631399, "step": 1120 }, { "epoch": 2.242, "grad_norm": 5.295566082000732, "learning_rate": 2e-05, "loss": 0.09364566, "step": 1121 }, { "epoch": 2.2439999999999998, "grad_norm": 5.972996234893799, "learning_rate": 2e-05, "loss": 0.06623536, "step": 1122 }, { "epoch": 2.246, "grad_norm": 5.0150299072265625, "learning_rate": 2e-05, "loss": 0.07361893, "step": 1123 }, { "epoch": 2.248, "grad_norm": 5.281479835510254, "learning_rate": 2e-05, "loss": 0.09310797, "step": 1124 }, { "epoch": 2.25, "grad_norm": 5.068182468414307, "learning_rate": 2e-05, "loss": 0.08031745, "step": 1125 }, { "epoch": 2.252, "grad_norm": 4.9923505783081055, "learning_rate": 2e-05, "loss": 0.07722259, "step": 1126 }, { "epoch": 2.254, "grad_norm": 3.902501344680786, "learning_rate": 2e-05, "loss": 0.04702578, "step": 1127 }, { "epoch": 2.2560000000000002, "grad_norm": 5.173608779907227, "learning_rate": 2e-05, "loss": 0.07434365, "step": 1128 }, { "epoch": 2.258, "grad_norm": 4.771775722503662, "learning_rate": 2e-05, "loss": 0.07803064, "step": 1129 }, { "epoch": 2.26, "grad_norm": 6.124810695648193, "learning_rate": 2e-05, "loss": 0.10975792, "step": 1130 }, { "epoch": 2.262, "grad_norm": 4.663224697113037, "learning_rate": 2e-05, "loss": 0.06447563, "step": 1131 }, { "epoch": 2.2640000000000002, "grad_norm": 4.7791218757629395, "learning_rate": 2e-05, "loss": 0.0565554, "step": 1132 }, { "epoch": 2.266, "grad_norm": 4.972031593322754, "learning_rate": 2e-05, "loss": 0.07270075, "step": 1133 }, { "epoch": 2.268, "grad_norm": 5.91547155380249, "learning_rate": 2e-05, "loss": 0.10115614, "step": 1134 }, { "epoch": 2.27, "grad_norm": 5.274033546447754, "learning_rate": 2e-05, "loss": 0.09601148, "step": 1135 }, { "epoch": 2.2720000000000002, "grad_norm": 6.01421594619751, "learning_rate": 2e-05, "loss": 0.07112142, "step": 1136 }, { "epoch": 2.274, "grad_norm": 5.376286029815674, "learning_rate": 2e-05, "loss": 0.07552647, "step": 1137 }, { "epoch": 2.276, "grad_norm": 5.428375244140625, "learning_rate": 2e-05, "loss": 0.06634416, "step": 1138 }, { "epoch": 2.278, "grad_norm": 5.473386764526367, "learning_rate": 2e-05, "loss": 0.09482514, "step": 1139 }, { "epoch": 2.2800000000000002, "grad_norm": 5.675748348236084, "learning_rate": 2e-05, "loss": 0.08074202, "step": 1140 }, { "epoch": 2.282, "grad_norm": 5.547980308532715, "learning_rate": 2e-05, "loss": 0.06691185, "step": 1141 }, { "epoch": 2.284, "grad_norm": 4.706792831420898, "learning_rate": 2e-05, "loss": 0.06850377, "step": 1142 }, { "epoch": 2.286, "grad_norm": 6.7053141593933105, "learning_rate": 2e-05, "loss": 0.08840274, "step": 1143 }, { "epoch": 2.288, "grad_norm": 4.547291278839111, "learning_rate": 2e-05, "loss": 0.06282304, "step": 1144 }, { "epoch": 2.29, "grad_norm": 8.066536903381348, "learning_rate": 2e-05, "loss": 0.05761047, "step": 1145 }, { "epoch": 2.292, "grad_norm": 5.985126972198486, "learning_rate": 2e-05, "loss": 0.08994497, "step": 1146 }, { "epoch": 2.294, "grad_norm": 5.541914463043213, "learning_rate": 2e-05, "loss": 0.08463421, "step": 1147 }, { "epoch": 2.296, "grad_norm": 4.761512279510498, "learning_rate": 2e-05, "loss": 0.06920115, "step": 1148 }, { "epoch": 2.298, "grad_norm": 5.000847339630127, "learning_rate": 2e-05, "loss": 0.07110731, "step": 1149 }, { "epoch": 2.3, "grad_norm": 6.84883975982666, "learning_rate": 2e-05, "loss": 0.10046914, "step": 1150 }, { "epoch": 2.302, "grad_norm": 6.188674449920654, "learning_rate": 2e-05, "loss": 0.0813382, "step": 1151 }, { "epoch": 2.304, "grad_norm": 6.1702399253845215, "learning_rate": 2e-05, "loss": 0.09208418, "step": 1152 }, { "epoch": 2.306, "grad_norm": 4.966809272766113, "learning_rate": 2e-05, "loss": 0.08065267, "step": 1153 }, { "epoch": 2.308, "grad_norm": 6.767495155334473, "learning_rate": 2e-05, "loss": 0.10228308, "step": 1154 }, { "epoch": 2.31, "grad_norm": 4.213733196258545, "learning_rate": 2e-05, "loss": 0.07357322, "step": 1155 }, { "epoch": 2.312, "grad_norm": 5.07329797744751, "learning_rate": 2e-05, "loss": 0.08187807, "step": 1156 }, { "epoch": 2.314, "grad_norm": 6.373464584350586, "learning_rate": 2e-05, "loss": 0.10489684, "step": 1157 }, { "epoch": 2.316, "grad_norm": 6.485351085662842, "learning_rate": 2e-05, "loss": 0.07705772, "step": 1158 }, { "epoch": 2.318, "grad_norm": 5.263455867767334, "learning_rate": 2e-05, "loss": 0.07434805, "step": 1159 }, { "epoch": 2.32, "grad_norm": 5.497191905975342, "learning_rate": 2e-05, "loss": 0.06877284, "step": 1160 }, { "epoch": 2.322, "grad_norm": 4.9819440841674805, "learning_rate": 2e-05, "loss": 0.0858512, "step": 1161 }, { "epoch": 2.324, "grad_norm": 4.758230209350586, "learning_rate": 2e-05, "loss": 0.07791442, "step": 1162 }, { "epoch": 2.326, "grad_norm": 6.072017669677734, "learning_rate": 2e-05, "loss": 0.10620181, "step": 1163 }, { "epoch": 2.328, "grad_norm": 5.8445820808410645, "learning_rate": 2e-05, "loss": 0.0853112, "step": 1164 }, { "epoch": 2.33, "grad_norm": 7.001210689544678, "learning_rate": 2e-05, "loss": 0.09143803, "step": 1165 }, { "epoch": 2.332, "grad_norm": 6.406255722045898, "learning_rate": 2e-05, "loss": 0.09526061, "step": 1166 }, { "epoch": 2.334, "grad_norm": 7.393507480621338, "learning_rate": 2e-05, "loss": 0.07722938, "step": 1167 }, { "epoch": 2.336, "grad_norm": 4.938442707061768, "learning_rate": 2e-05, "loss": 0.0625077, "step": 1168 }, { "epoch": 2.338, "grad_norm": 6.283682346343994, "learning_rate": 2e-05, "loss": 0.09158147, "step": 1169 }, { "epoch": 2.34, "grad_norm": 4.3046135902404785, "learning_rate": 2e-05, "loss": 0.06907889, "step": 1170 }, { "epoch": 2.342, "grad_norm": 5.108433246612549, "learning_rate": 2e-05, "loss": 0.06866778, "step": 1171 }, { "epoch": 2.344, "grad_norm": 5.7269287109375, "learning_rate": 2e-05, "loss": 0.08142249, "step": 1172 }, { "epoch": 2.346, "grad_norm": 6.6444993019104, "learning_rate": 2e-05, "loss": 0.09772035, "step": 1173 }, { "epoch": 2.348, "grad_norm": 4.564742088317871, "learning_rate": 2e-05, "loss": 0.06758714, "step": 1174 }, { "epoch": 2.35, "grad_norm": 10.260642051696777, "learning_rate": 2e-05, "loss": 0.09065817, "step": 1175 }, { "epoch": 2.352, "grad_norm": 6.014193058013916, "learning_rate": 2e-05, "loss": 0.07945534, "step": 1176 }, { "epoch": 2.354, "grad_norm": 6.960853576660156, "learning_rate": 2e-05, "loss": 0.0872013, "step": 1177 }, { "epoch": 2.356, "grad_norm": 4.0830769538879395, "learning_rate": 2e-05, "loss": 0.06472573, "step": 1178 }, { "epoch": 2.358, "grad_norm": 5.20409631729126, "learning_rate": 2e-05, "loss": 0.06385504, "step": 1179 }, { "epoch": 2.36, "grad_norm": 5.178505897521973, "learning_rate": 2e-05, "loss": 0.06840534, "step": 1180 }, { "epoch": 2.362, "grad_norm": 5.554486274719238, "learning_rate": 2e-05, "loss": 0.06780627, "step": 1181 }, { "epoch": 2.364, "grad_norm": 5.020849704742432, "learning_rate": 2e-05, "loss": 0.07858465, "step": 1182 }, { "epoch": 2.366, "grad_norm": 5.24446964263916, "learning_rate": 2e-05, "loss": 0.08223285, "step": 1183 }, { "epoch": 2.368, "grad_norm": 6.806016445159912, "learning_rate": 2e-05, "loss": 0.08063979, "step": 1184 }, { "epoch": 2.37, "grad_norm": 15.481805801391602, "learning_rate": 2e-05, "loss": 0.07941291, "step": 1185 }, { "epoch": 2.372, "grad_norm": 7.141376972198486, "learning_rate": 2e-05, "loss": 0.06634214, "step": 1186 }, { "epoch": 2.374, "grad_norm": 5.498879909515381, "learning_rate": 2e-05, "loss": 0.07255428, "step": 1187 }, { "epoch": 2.376, "grad_norm": 5.054952144622803, "learning_rate": 2e-05, "loss": 0.0813861, "step": 1188 }, { "epoch": 2.378, "grad_norm": 5.9393486976623535, "learning_rate": 2e-05, "loss": 0.09017763, "step": 1189 }, { "epoch": 2.38, "grad_norm": 5.591245174407959, "learning_rate": 2e-05, "loss": 0.09344522, "step": 1190 }, { "epoch": 2.382, "grad_norm": 37.33818054199219, "learning_rate": 2e-05, "loss": 0.07337327, "step": 1191 }, { "epoch": 2.384, "grad_norm": 5.594890594482422, "learning_rate": 2e-05, "loss": 0.06210938, "step": 1192 }, { "epoch": 2.386, "grad_norm": 6.054887294769287, "learning_rate": 2e-05, "loss": 0.0693, "step": 1193 }, { "epoch": 2.388, "grad_norm": 5.290995121002197, "learning_rate": 2e-05, "loss": 0.06914002, "step": 1194 }, { "epoch": 2.39, "grad_norm": 6.306216239929199, "learning_rate": 2e-05, "loss": 0.08888833, "step": 1195 }, { "epoch": 2.392, "grad_norm": 5.278257846832275, "learning_rate": 2e-05, "loss": 0.08751106, "step": 1196 }, { "epoch": 2.394, "grad_norm": 4.105417728424072, "learning_rate": 2e-05, "loss": 0.05546909, "step": 1197 }, { "epoch": 2.396, "grad_norm": 5.260617733001709, "learning_rate": 2e-05, "loss": 0.07649423, "step": 1198 }, { "epoch": 2.398, "grad_norm": 5.761737823486328, "learning_rate": 2e-05, "loss": 0.05329856, "step": 1199 }, { "epoch": 2.4, "grad_norm": 6.4426422119140625, "learning_rate": 2e-05, "loss": 0.07726578, "step": 1200 }, { "epoch": 2.402, "grad_norm": 6.715033531188965, "learning_rate": 2e-05, "loss": 0.06243331, "step": 1201 }, { "epoch": 2.404, "grad_norm": 4.354744911193848, "learning_rate": 2e-05, "loss": 0.05478795, "step": 1202 }, { "epoch": 2.406, "grad_norm": 6.116797924041748, "learning_rate": 2e-05, "loss": 0.06525201, "step": 1203 }, { "epoch": 2.408, "grad_norm": 5.35870361328125, "learning_rate": 2e-05, "loss": 0.07480222, "step": 1204 }, { "epoch": 2.41, "grad_norm": 5.8935723304748535, "learning_rate": 2e-05, "loss": 0.09852718, "step": 1205 }, { "epoch": 2.412, "grad_norm": 4.484679222106934, "learning_rate": 2e-05, "loss": 0.07187925, "step": 1206 }, { "epoch": 2.414, "grad_norm": 5.751153945922852, "learning_rate": 2e-05, "loss": 0.06693185, "step": 1207 }, { "epoch": 2.416, "grad_norm": 5.549046039581299, "learning_rate": 2e-05, "loss": 0.08989806, "step": 1208 }, { "epoch": 2.418, "grad_norm": 5.3534321784973145, "learning_rate": 2e-05, "loss": 0.08330922, "step": 1209 }, { "epoch": 2.42, "grad_norm": 6.561792373657227, "learning_rate": 2e-05, "loss": 0.07936198, "step": 1210 }, { "epoch": 2.422, "grad_norm": 5.024012088775635, "learning_rate": 2e-05, "loss": 0.08394977, "step": 1211 }, { "epoch": 2.424, "grad_norm": 6.4466938972473145, "learning_rate": 2e-05, "loss": 0.08277696, "step": 1212 }, { "epoch": 2.426, "grad_norm": 6.761798858642578, "learning_rate": 2e-05, "loss": 0.06964228, "step": 1213 }, { "epoch": 2.428, "grad_norm": 4.503235340118408, "learning_rate": 2e-05, "loss": 0.0568054, "step": 1214 }, { "epoch": 2.43, "grad_norm": 5.743484020233154, "learning_rate": 2e-05, "loss": 0.06583805, "step": 1215 }, { "epoch": 2.432, "grad_norm": 6.312178611755371, "learning_rate": 2e-05, "loss": 0.07125016, "step": 1216 }, { "epoch": 2.434, "grad_norm": 7.05535888671875, "learning_rate": 2e-05, "loss": 0.09196765, "step": 1217 }, { "epoch": 2.436, "grad_norm": 5.990293025970459, "learning_rate": 2e-05, "loss": 0.10468022, "step": 1218 }, { "epoch": 2.438, "grad_norm": 5.076596260070801, "learning_rate": 2e-05, "loss": 0.07186237, "step": 1219 }, { "epoch": 2.44, "grad_norm": 5.002408027648926, "learning_rate": 2e-05, "loss": 0.0641041, "step": 1220 }, { "epoch": 2.442, "grad_norm": 5.5737624168396, "learning_rate": 2e-05, "loss": 0.05410431, "step": 1221 }, { "epoch": 2.444, "grad_norm": 8.078147888183594, "learning_rate": 2e-05, "loss": 0.08526271, "step": 1222 }, { "epoch": 2.446, "grad_norm": 7.746601104736328, "learning_rate": 2e-05, "loss": 0.08832434, "step": 1223 }, { "epoch": 2.448, "grad_norm": 4.811911106109619, "learning_rate": 2e-05, "loss": 0.07613131, "step": 1224 }, { "epoch": 2.45, "grad_norm": 4.417200565338135, "learning_rate": 2e-05, "loss": 0.05493128, "step": 1225 }, { "epoch": 2.452, "grad_norm": 4.264017581939697, "learning_rate": 2e-05, "loss": 0.05327323, "step": 1226 }, { "epoch": 2.454, "grad_norm": 9.60299015045166, "learning_rate": 2e-05, "loss": 0.09563737, "step": 1227 }, { "epoch": 2.456, "grad_norm": 5.353407382965088, "learning_rate": 2e-05, "loss": 0.0802802, "step": 1228 }, { "epoch": 2.458, "grad_norm": 7.351835250854492, "learning_rate": 2e-05, "loss": 0.08160001, "step": 1229 }, { "epoch": 2.46, "grad_norm": 7.252406597137451, "learning_rate": 2e-05, "loss": 0.06769105, "step": 1230 }, { "epoch": 2.462, "grad_norm": 5.489690780639648, "learning_rate": 2e-05, "loss": 0.08315643, "step": 1231 }, { "epoch": 2.464, "grad_norm": 6.582641124725342, "learning_rate": 2e-05, "loss": 0.09513499, "step": 1232 }, { "epoch": 2.466, "grad_norm": 6.51853609085083, "learning_rate": 2e-05, "loss": 0.10061066, "step": 1233 }, { "epoch": 2.468, "grad_norm": 4.916713714599609, "learning_rate": 2e-05, "loss": 0.04970203, "step": 1234 }, { "epoch": 2.4699999999999998, "grad_norm": 5.960850715637207, "learning_rate": 2e-05, "loss": 0.07904314, "step": 1235 }, { "epoch": 2.472, "grad_norm": 4.7265729904174805, "learning_rate": 2e-05, "loss": 0.05169798, "step": 1236 }, { "epoch": 2.474, "grad_norm": 5.009814739227295, "learning_rate": 2e-05, "loss": 0.05214093, "step": 1237 }, { "epoch": 2.476, "grad_norm": 5.183404445648193, "learning_rate": 2e-05, "loss": 0.08365726, "step": 1238 }, { "epoch": 2.4779999999999998, "grad_norm": 4.317318439483643, "learning_rate": 2e-05, "loss": 0.05602934, "step": 1239 }, { "epoch": 2.48, "grad_norm": 4.67846155166626, "learning_rate": 2e-05, "loss": 0.07362764, "step": 1240 }, { "epoch": 2.482, "grad_norm": 3.8671252727508545, "learning_rate": 2e-05, "loss": 0.06022756, "step": 1241 }, { "epoch": 2.484, "grad_norm": 5.294753074645996, "learning_rate": 2e-05, "loss": 0.10089048, "step": 1242 }, { "epoch": 2.4859999999999998, "grad_norm": 5.837879657745361, "learning_rate": 2e-05, "loss": 0.07914748, "step": 1243 }, { "epoch": 2.488, "grad_norm": 5.373979568481445, "learning_rate": 2e-05, "loss": 0.08071019, "step": 1244 }, { "epoch": 2.49, "grad_norm": 6.142536640167236, "learning_rate": 2e-05, "loss": 0.07054859, "step": 1245 }, { "epoch": 2.492, "grad_norm": 4.367980480194092, "learning_rate": 2e-05, "loss": 0.04863194, "step": 1246 }, { "epoch": 2.4939999999999998, "grad_norm": 6.110530853271484, "learning_rate": 2e-05, "loss": 0.06037983, "step": 1247 }, { "epoch": 2.496, "grad_norm": 4.9649457931518555, "learning_rate": 2e-05, "loss": 0.07024585, "step": 1248 }, { "epoch": 2.498, "grad_norm": 6.242032527923584, "learning_rate": 2e-05, "loss": 0.07558655, "step": 1249 }, { "epoch": 2.5, "grad_norm": 7.605246543884277, "learning_rate": 2e-05, "loss": 0.10647859, "step": 1250 }, { "epoch": 2.502, "grad_norm": 6.1875691413879395, "learning_rate": 2e-05, "loss": 0.10073202, "step": 1251 }, { "epoch": 2.504, "grad_norm": 4.690088748931885, "learning_rate": 2e-05, "loss": 0.06636994, "step": 1252 }, { "epoch": 2.5060000000000002, "grad_norm": 5.160874843597412, "learning_rate": 2e-05, "loss": 0.07866879, "step": 1253 }, { "epoch": 2.508, "grad_norm": 6.658988952636719, "learning_rate": 2e-05, "loss": 0.10458536, "step": 1254 }, { "epoch": 2.51, "grad_norm": 6.365893840789795, "learning_rate": 2e-05, "loss": 0.08236815, "step": 1255 }, { "epoch": 2.512, "grad_norm": 7.188866138458252, "learning_rate": 2e-05, "loss": 0.10720773, "step": 1256 }, { "epoch": 2.5140000000000002, "grad_norm": 6.304254055023193, "learning_rate": 2e-05, "loss": 0.07806879, "step": 1257 }, { "epoch": 2.516, "grad_norm": 5.643954277038574, "learning_rate": 2e-05, "loss": 0.08640802, "step": 1258 }, { "epoch": 2.518, "grad_norm": 6.35936975479126, "learning_rate": 2e-05, "loss": 0.09682062, "step": 1259 }, { "epoch": 2.52, "grad_norm": 5.179384231567383, "learning_rate": 2e-05, "loss": 0.08847062, "step": 1260 }, { "epoch": 2.5220000000000002, "grad_norm": 5.6360764503479, "learning_rate": 2e-05, "loss": 0.08188983, "step": 1261 }, { "epoch": 2.524, "grad_norm": 5.515124320983887, "learning_rate": 2e-05, "loss": 0.070769, "step": 1262 }, { "epoch": 2.526, "grad_norm": 5.865851402282715, "learning_rate": 2e-05, "loss": 0.06173115, "step": 1263 }, { "epoch": 2.528, "grad_norm": 4.06578254699707, "learning_rate": 2e-05, "loss": 0.0883168, "step": 1264 }, { "epoch": 2.5300000000000002, "grad_norm": 4.592488765716553, "learning_rate": 2e-05, "loss": 0.08153678, "step": 1265 }, { "epoch": 2.532, "grad_norm": 6.047873497009277, "learning_rate": 2e-05, "loss": 0.10035791, "step": 1266 }, { "epoch": 2.534, "grad_norm": 6.966950416564941, "learning_rate": 2e-05, "loss": 0.0934031, "step": 1267 }, { "epoch": 2.536, "grad_norm": 5.9313883781433105, "learning_rate": 2e-05, "loss": 0.07072015, "step": 1268 }, { "epoch": 2.5380000000000003, "grad_norm": 5.758888244628906, "learning_rate": 2e-05, "loss": 0.07625753, "step": 1269 }, { "epoch": 2.54, "grad_norm": 6.424624919891357, "learning_rate": 2e-05, "loss": 0.08509238, "step": 1270 }, { "epoch": 2.542, "grad_norm": 5.09171199798584, "learning_rate": 2e-05, "loss": 0.07427575, "step": 1271 }, { "epoch": 2.544, "grad_norm": 5.691224575042725, "learning_rate": 2e-05, "loss": 0.07022868, "step": 1272 }, { "epoch": 2.5460000000000003, "grad_norm": 6.550955772399902, "learning_rate": 2e-05, "loss": 0.07997157, "step": 1273 }, { "epoch": 2.548, "grad_norm": 4.806466579437256, "learning_rate": 2e-05, "loss": 0.06719106, "step": 1274 }, { "epoch": 2.55, "grad_norm": 5.4354448318481445, "learning_rate": 2e-05, "loss": 0.08597517, "step": 1275 }, { "epoch": 2.552, "grad_norm": 3.655636787414551, "learning_rate": 2e-05, "loss": 0.06977253, "step": 1276 }, { "epoch": 2.5540000000000003, "grad_norm": 4.464087963104248, "learning_rate": 2e-05, "loss": 0.0659724, "step": 1277 }, { "epoch": 2.556, "grad_norm": 4.551310062408447, "learning_rate": 2e-05, "loss": 0.07688382, "step": 1278 }, { "epoch": 2.558, "grad_norm": 4.5061516761779785, "learning_rate": 2e-05, "loss": 0.06800941, "step": 1279 }, { "epoch": 2.56, "grad_norm": 6.325785160064697, "learning_rate": 2e-05, "loss": 0.07439247, "step": 1280 }, { "epoch": 2.5620000000000003, "grad_norm": 4.956794261932373, "learning_rate": 2e-05, "loss": 0.07928403, "step": 1281 }, { "epoch": 2.564, "grad_norm": 5.484737873077393, "learning_rate": 2e-05, "loss": 0.09598506, "step": 1282 }, { "epoch": 2.566, "grad_norm": 4.947423934936523, "learning_rate": 2e-05, "loss": 0.06288248, "step": 1283 }, { "epoch": 2.568, "grad_norm": 3.828274726867676, "learning_rate": 2e-05, "loss": 0.0543214, "step": 1284 }, { "epoch": 2.57, "grad_norm": 4.15639066696167, "learning_rate": 2e-05, "loss": 0.05491457, "step": 1285 }, { "epoch": 2.572, "grad_norm": 4.405662536621094, "learning_rate": 2e-05, "loss": 0.05447604, "step": 1286 }, { "epoch": 2.574, "grad_norm": 5.216904640197754, "learning_rate": 2e-05, "loss": 0.06671653, "step": 1287 }, { "epoch": 2.576, "grad_norm": 5.616031169891357, "learning_rate": 2e-05, "loss": 0.09019951, "step": 1288 }, { "epoch": 2.578, "grad_norm": 5.422451972961426, "learning_rate": 2e-05, "loss": 0.08179002, "step": 1289 }, { "epoch": 2.58, "grad_norm": 5.5299577713012695, "learning_rate": 2e-05, "loss": 0.08797802, "step": 1290 }, { "epoch": 2.582, "grad_norm": 7.264585971832275, "learning_rate": 2e-05, "loss": 0.07468256, "step": 1291 }, { "epoch": 2.584, "grad_norm": 4.143320083618164, "learning_rate": 2e-05, "loss": 0.05867775, "step": 1292 }, { "epoch": 2.586, "grad_norm": 5.1407389640808105, "learning_rate": 2e-05, "loss": 0.0726826, "step": 1293 }, { "epoch": 2.588, "grad_norm": 5.972986698150635, "learning_rate": 2e-05, "loss": 0.06919511, "step": 1294 }, { "epoch": 2.59, "grad_norm": 5.99080228805542, "learning_rate": 2e-05, "loss": 0.07770181, "step": 1295 }, { "epoch": 2.592, "grad_norm": 4.994774341583252, "learning_rate": 2e-05, "loss": 0.07375708, "step": 1296 }, { "epoch": 2.594, "grad_norm": 6.169222831726074, "learning_rate": 2e-05, "loss": 0.07211353, "step": 1297 }, { "epoch": 2.596, "grad_norm": 4.878756046295166, "learning_rate": 2e-05, "loss": 0.06312254, "step": 1298 }, { "epoch": 2.598, "grad_norm": 5.18170690536499, "learning_rate": 2e-05, "loss": 0.0804698, "step": 1299 }, { "epoch": 2.6, "grad_norm": 6.046295166015625, "learning_rate": 2e-05, "loss": 0.05458752, "step": 1300 }, { "epoch": 2.602, "grad_norm": 4.170097827911377, "learning_rate": 2e-05, "loss": 0.07297536, "step": 1301 }, { "epoch": 2.604, "grad_norm": 5.846515655517578, "learning_rate": 2e-05, "loss": 0.07929747, "step": 1302 }, { "epoch": 2.606, "grad_norm": 4.205134391784668, "learning_rate": 2e-05, "loss": 0.05458839, "step": 1303 }, { "epoch": 2.608, "grad_norm": 5.617702007293701, "learning_rate": 2e-05, "loss": 0.09402043, "step": 1304 }, { "epoch": 2.61, "grad_norm": 5.576301574707031, "learning_rate": 2e-05, "loss": 0.07889977, "step": 1305 }, { "epoch": 2.612, "grad_norm": 7.310355186462402, "learning_rate": 2e-05, "loss": 0.11928101, "step": 1306 }, { "epoch": 2.614, "grad_norm": 5.422811031341553, "learning_rate": 2e-05, "loss": 0.07145589, "step": 1307 }, { "epoch": 2.616, "grad_norm": 4.83586311340332, "learning_rate": 2e-05, "loss": 0.06326032, "step": 1308 }, { "epoch": 2.618, "grad_norm": 5.717964172363281, "learning_rate": 2e-05, "loss": 0.06816898, "step": 1309 }, { "epoch": 2.62, "grad_norm": 5.125365257263184, "learning_rate": 2e-05, "loss": 0.05895907, "step": 1310 }, { "epoch": 2.622, "grad_norm": 4.338197231292725, "learning_rate": 2e-05, "loss": 0.06730356, "step": 1311 }, { "epoch": 2.624, "grad_norm": 4.345890522003174, "learning_rate": 2e-05, "loss": 0.05717725, "step": 1312 }, { "epoch": 2.626, "grad_norm": 4.829798698425293, "learning_rate": 2e-05, "loss": 0.0682925, "step": 1313 }, { "epoch": 2.628, "grad_norm": 6.491635322570801, "learning_rate": 2e-05, "loss": 0.07650462, "step": 1314 }, { "epoch": 2.63, "grad_norm": 5.380314826965332, "learning_rate": 2e-05, "loss": 0.07516222, "step": 1315 }, { "epoch": 2.632, "grad_norm": 4.747267723083496, "learning_rate": 2e-05, "loss": 0.06479242, "step": 1316 }, { "epoch": 2.634, "grad_norm": 5.502625465393066, "learning_rate": 2e-05, "loss": 0.0927922, "step": 1317 }, { "epoch": 2.636, "grad_norm": 6.715336322784424, "learning_rate": 2e-05, "loss": 0.09325027, "step": 1318 }, { "epoch": 2.638, "grad_norm": 4.112151622772217, "learning_rate": 2e-05, "loss": 0.05589619, "step": 1319 }, { "epoch": 2.64, "grad_norm": 4.051852703094482, "learning_rate": 2e-05, "loss": 0.04420281, "step": 1320 }, { "epoch": 2.642, "grad_norm": 4.909099578857422, "learning_rate": 2e-05, "loss": 0.05575426, "step": 1321 }, { "epoch": 2.644, "grad_norm": 6.179635047912598, "learning_rate": 2e-05, "loss": 0.09131166, "step": 1322 }, { "epoch": 2.646, "grad_norm": 4.497567653656006, "learning_rate": 2e-05, "loss": 0.07597355, "step": 1323 }, { "epoch": 2.648, "grad_norm": 4.5020318031311035, "learning_rate": 2e-05, "loss": 0.08069295, "step": 1324 }, { "epoch": 2.65, "grad_norm": 4.305017471313477, "learning_rate": 2e-05, "loss": 0.06162541, "step": 1325 }, { "epoch": 2.652, "grad_norm": 6.5832343101501465, "learning_rate": 2e-05, "loss": 0.07771528, "step": 1326 }, { "epoch": 2.654, "grad_norm": 4.385313987731934, "learning_rate": 2e-05, "loss": 0.09039738, "step": 1327 }, { "epoch": 2.656, "grad_norm": 6.124626636505127, "learning_rate": 2e-05, "loss": 0.07884485, "step": 1328 }, { "epoch": 2.658, "grad_norm": 3.5760817527770996, "learning_rate": 2e-05, "loss": 0.05744233, "step": 1329 }, { "epoch": 2.66, "grad_norm": 5.044926643371582, "learning_rate": 2e-05, "loss": 0.08581438, "step": 1330 }, { "epoch": 2.662, "grad_norm": 5.170158386230469, "learning_rate": 2e-05, "loss": 0.077742, "step": 1331 }, { "epoch": 2.664, "grad_norm": 6.509402751922607, "learning_rate": 2e-05, "loss": 0.11366113, "step": 1332 }, { "epoch": 2.666, "grad_norm": 4.188140392303467, "learning_rate": 2e-05, "loss": 0.06148365, "step": 1333 }, { "epoch": 2.668, "grad_norm": 5.087245464324951, "learning_rate": 2e-05, "loss": 0.08526592, "step": 1334 }, { "epoch": 2.67, "grad_norm": 4.59771728515625, "learning_rate": 2e-05, "loss": 0.07254293, "step": 1335 }, { "epoch": 2.672, "grad_norm": 4.312002658843994, "learning_rate": 2e-05, "loss": 0.0762312, "step": 1336 }, { "epoch": 2.674, "grad_norm": 4.7877278327941895, "learning_rate": 2e-05, "loss": 0.05891936, "step": 1337 }, { "epoch": 2.676, "grad_norm": 4.1538262367248535, "learning_rate": 2e-05, "loss": 0.0565079, "step": 1338 }, { "epoch": 2.678, "grad_norm": 4.586218357086182, "learning_rate": 2e-05, "loss": 0.06201287, "step": 1339 }, { "epoch": 2.68, "grad_norm": 4.457334995269775, "learning_rate": 2e-05, "loss": 0.07935358, "step": 1340 }, { "epoch": 2.682, "grad_norm": 4.879036903381348, "learning_rate": 2e-05, "loss": 0.06873338, "step": 1341 }, { "epoch": 2.684, "grad_norm": 3.673203945159912, "learning_rate": 2e-05, "loss": 0.06258509, "step": 1342 }, { "epoch": 2.686, "grad_norm": 4.876821994781494, "learning_rate": 2e-05, "loss": 0.08811098, "step": 1343 }, { "epoch": 2.6879999999999997, "grad_norm": 4.253979682922363, "learning_rate": 2e-05, "loss": 0.08000984, "step": 1344 }, { "epoch": 2.69, "grad_norm": 4.685088634490967, "learning_rate": 2e-05, "loss": 0.07478537, "step": 1345 }, { "epoch": 2.692, "grad_norm": 4.919564723968506, "learning_rate": 2e-05, "loss": 0.08269052, "step": 1346 }, { "epoch": 2.694, "grad_norm": 4.0247931480407715, "learning_rate": 2e-05, "loss": 0.04647397, "step": 1347 }, { "epoch": 2.6959999999999997, "grad_norm": 4.788113117218018, "learning_rate": 2e-05, "loss": 0.06723179, "step": 1348 }, { "epoch": 2.698, "grad_norm": 4.487485408782959, "learning_rate": 2e-05, "loss": 0.05801991, "step": 1349 }, { "epoch": 2.7, "grad_norm": 4.474399089813232, "learning_rate": 2e-05, "loss": 0.05765761, "step": 1350 }, { "epoch": 2.702, "grad_norm": 3.7378993034362793, "learning_rate": 2e-05, "loss": 0.06673734, "step": 1351 }, { "epoch": 2.7039999999999997, "grad_norm": 5.631976127624512, "learning_rate": 2e-05, "loss": 0.05933935, "step": 1352 }, { "epoch": 2.706, "grad_norm": 5.648502349853516, "learning_rate": 2e-05, "loss": 0.0844043, "step": 1353 }, { "epoch": 2.708, "grad_norm": 6.377685546875, "learning_rate": 2e-05, "loss": 0.09165794, "step": 1354 }, { "epoch": 2.71, "grad_norm": 4.606356620788574, "learning_rate": 2e-05, "loss": 0.06095506, "step": 1355 }, { "epoch": 2.7119999999999997, "grad_norm": 5.599758625030518, "learning_rate": 2e-05, "loss": 0.06079054, "step": 1356 }, { "epoch": 2.714, "grad_norm": 5.73459529876709, "learning_rate": 2e-05, "loss": 0.09509335, "step": 1357 }, { "epoch": 2.716, "grad_norm": 3.690424919128418, "learning_rate": 2e-05, "loss": 0.05736064, "step": 1358 }, { "epoch": 2.718, "grad_norm": 4.053906440734863, "learning_rate": 2e-05, "loss": 0.05108993, "step": 1359 }, { "epoch": 2.7199999999999998, "grad_norm": 4.809811115264893, "learning_rate": 2e-05, "loss": 0.07024194, "step": 1360 }, { "epoch": 2.722, "grad_norm": 5.437577247619629, "learning_rate": 2e-05, "loss": 0.0698881, "step": 1361 }, { "epoch": 2.724, "grad_norm": 5.100543022155762, "learning_rate": 2e-05, "loss": 0.06575071, "step": 1362 }, { "epoch": 2.726, "grad_norm": 4.769706726074219, "learning_rate": 2e-05, "loss": 0.06985468, "step": 1363 }, { "epoch": 2.7279999999999998, "grad_norm": 5.803448677062988, "learning_rate": 2e-05, "loss": 0.07427995, "step": 1364 }, { "epoch": 2.73, "grad_norm": 4.377640247344971, "learning_rate": 2e-05, "loss": 0.06729538, "step": 1365 }, { "epoch": 2.732, "grad_norm": 5.4514617919921875, "learning_rate": 2e-05, "loss": 0.06171414, "step": 1366 }, { "epoch": 2.734, "grad_norm": 4.662461757659912, "learning_rate": 2e-05, "loss": 0.06353465, "step": 1367 }, { "epoch": 2.7359999999999998, "grad_norm": 5.443431854248047, "learning_rate": 2e-05, "loss": 0.07992643, "step": 1368 }, { "epoch": 2.738, "grad_norm": 5.707746982574463, "learning_rate": 2e-05, "loss": 0.09474852, "step": 1369 }, { "epoch": 2.74, "grad_norm": 4.546706199645996, "learning_rate": 2e-05, "loss": 0.05039814, "step": 1370 }, { "epoch": 2.742, "grad_norm": 5.38310432434082, "learning_rate": 2e-05, "loss": 0.0700554, "step": 1371 }, { "epoch": 2.7439999999999998, "grad_norm": 4.6162519454956055, "learning_rate": 2e-05, "loss": 0.04815501, "step": 1372 }, { "epoch": 2.746, "grad_norm": 5.699137210845947, "learning_rate": 2e-05, "loss": 0.08332913, "step": 1373 }, { "epoch": 2.748, "grad_norm": 4.213466644287109, "learning_rate": 2e-05, "loss": 0.06077214, "step": 1374 }, { "epoch": 2.75, "grad_norm": 4.650712490081787, "learning_rate": 2e-05, "loss": 0.0767339, "step": 1375 }, { "epoch": 2.752, "grad_norm": 5.643423080444336, "learning_rate": 2e-05, "loss": 0.07488929, "step": 1376 }, { "epoch": 2.754, "grad_norm": 4.3261308670043945, "learning_rate": 2e-05, "loss": 0.05642552, "step": 1377 }, { "epoch": 2.7560000000000002, "grad_norm": 5.553139686584473, "learning_rate": 2e-05, "loss": 0.05927372, "step": 1378 }, { "epoch": 2.758, "grad_norm": 5.019532203674316, "learning_rate": 2e-05, "loss": 0.07993215, "step": 1379 }, { "epoch": 2.76, "grad_norm": 5.1430230140686035, "learning_rate": 2e-05, "loss": 0.06921209, "step": 1380 }, { "epoch": 2.762, "grad_norm": 5.408932685852051, "learning_rate": 2e-05, "loss": 0.0604035, "step": 1381 }, { "epoch": 2.7640000000000002, "grad_norm": 5.721793174743652, "learning_rate": 2e-05, "loss": 0.07238592, "step": 1382 }, { "epoch": 2.766, "grad_norm": 4.857784748077393, "learning_rate": 2e-05, "loss": 0.08240078, "step": 1383 }, { "epoch": 2.768, "grad_norm": 5.017916202545166, "learning_rate": 2e-05, "loss": 0.0663954, "step": 1384 }, { "epoch": 2.77, "grad_norm": 4.7667975425720215, "learning_rate": 2e-05, "loss": 0.06147703, "step": 1385 }, { "epoch": 2.7720000000000002, "grad_norm": 5.663891792297363, "learning_rate": 2e-05, "loss": 0.07591307, "step": 1386 }, { "epoch": 2.774, "grad_norm": 5.284800052642822, "learning_rate": 2e-05, "loss": 0.06185038, "step": 1387 }, { "epoch": 2.776, "grad_norm": 3.8098089694976807, "learning_rate": 2e-05, "loss": 0.04929839, "step": 1388 }, { "epoch": 2.778, "grad_norm": 6.143580913543701, "learning_rate": 2e-05, "loss": 0.0871262, "step": 1389 }, { "epoch": 2.7800000000000002, "grad_norm": 5.837085723876953, "learning_rate": 2e-05, "loss": 0.10942918, "step": 1390 }, { "epoch": 2.782, "grad_norm": 5.232043743133545, "learning_rate": 2e-05, "loss": 0.08751905, "step": 1391 }, { "epoch": 2.784, "grad_norm": 4.226070404052734, "learning_rate": 2e-05, "loss": 0.05725982, "step": 1392 }, { "epoch": 2.786, "grad_norm": 4.514472007751465, "learning_rate": 2e-05, "loss": 0.07189968, "step": 1393 }, { "epoch": 2.7880000000000003, "grad_norm": 5.932820796966553, "learning_rate": 2e-05, "loss": 0.08903076, "step": 1394 }, { "epoch": 2.79, "grad_norm": 5.2347798347473145, "learning_rate": 2e-05, "loss": 0.07420037, "step": 1395 }, { "epoch": 2.792, "grad_norm": 5.1436896324157715, "learning_rate": 2e-05, "loss": 0.05075792, "step": 1396 }, { "epoch": 2.794, "grad_norm": 6.220791339874268, "learning_rate": 2e-05, "loss": 0.07871823, "step": 1397 }, { "epoch": 2.7960000000000003, "grad_norm": 4.491309642791748, "learning_rate": 2e-05, "loss": 0.04779669, "step": 1398 }, { "epoch": 2.798, "grad_norm": 4.795475006103516, "learning_rate": 2e-05, "loss": 0.07495861, "step": 1399 }, { "epoch": 2.8, "grad_norm": 4.925161838531494, "learning_rate": 2e-05, "loss": 0.0664306, "step": 1400 }, { "epoch": 2.802, "grad_norm": 4.5475664138793945, "learning_rate": 2e-05, "loss": 0.06985246, "step": 1401 }, { "epoch": 2.8040000000000003, "grad_norm": 4.399571418762207, "learning_rate": 2e-05, "loss": 0.06108593, "step": 1402 }, { "epoch": 2.806, "grad_norm": 7.562832355499268, "learning_rate": 2e-05, "loss": 0.06582917, "step": 1403 }, { "epoch": 2.808, "grad_norm": 4.223455905914307, "learning_rate": 2e-05, "loss": 0.05525767, "step": 1404 }, { "epoch": 2.81, "grad_norm": 5.335864067077637, "learning_rate": 2e-05, "loss": 0.05909792, "step": 1405 }, { "epoch": 2.8120000000000003, "grad_norm": 5.182897567749023, "learning_rate": 2e-05, "loss": 0.08114278, "step": 1406 }, { "epoch": 2.814, "grad_norm": 5.041176795959473, "learning_rate": 2e-05, "loss": 0.06600139, "step": 1407 }, { "epoch": 2.816, "grad_norm": 9.034296035766602, "learning_rate": 2e-05, "loss": 0.07311696, "step": 1408 }, { "epoch": 2.818, "grad_norm": 3.879432201385498, "learning_rate": 2e-05, "loss": 0.05282513, "step": 1409 }, { "epoch": 2.82, "grad_norm": 4.10652494430542, "learning_rate": 2e-05, "loss": 0.05345678, "step": 1410 }, { "epoch": 2.822, "grad_norm": 5.674065589904785, "learning_rate": 2e-05, "loss": 0.06592247, "step": 1411 }, { "epoch": 2.824, "grad_norm": 4.661259651184082, "learning_rate": 2e-05, "loss": 0.0883222, "step": 1412 }, { "epoch": 2.826, "grad_norm": 5.296480655670166, "learning_rate": 2e-05, "loss": 0.06214865, "step": 1413 }, { "epoch": 2.828, "grad_norm": 4.969492435455322, "learning_rate": 2e-05, "loss": 0.06010087, "step": 1414 }, { "epoch": 2.83, "grad_norm": 4.775017261505127, "learning_rate": 2e-05, "loss": 0.07331644, "step": 1415 }, { "epoch": 2.832, "grad_norm": 5.684389114379883, "learning_rate": 2e-05, "loss": 0.09975757, "step": 1416 }, { "epoch": 2.834, "grad_norm": 4.832347393035889, "learning_rate": 2e-05, "loss": 0.05441197, "step": 1417 }, { "epoch": 2.836, "grad_norm": 4.790868282318115, "learning_rate": 2e-05, "loss": 0.05767885, "step": 1418 }, { "epoch": 2.838, "grad_norm": 5.186351776123047, "learning_rate": 2e-05, "loss": 0.05575167, "step": 1419 }, { "epoch": 2.84, "grad_norm": 4.546217441558838, "learning_rate": 2e-05, "loss": 0.05732023, "step": 1420 }, { "epoch": 2.842, "grad_norm": 6.1227240562438965, "learning_rate": 2e-05, "loss": 0.07982071, "step": 1421 }, { "epoch": 2.844, "grad_norm": 4.2666401863098145, "learning_rate": 2e-05, "loss": 0.05464654, "step": 1422 }, { "epoch": 2.846, "grad_norm": 6.266542911529541, "learning_rate": 2e-05, "loss": 0.06304607, "step": 1423 }, { "epoch": 2.848, "grad_norm": 5.274692535400391, "learning_rate": 2e-05, "loss": 0.08358287, "step": 1424 }, { "epoch": 2.85, "grad_norm": 4.728453636169434, "learning_rate": 2e-05, "loss": 0.07299101, "step": 1425 }, { "epoch": 2.852, "grad_norm": 4.427935600280762, "learning_rate": 2e-05, "loss": 0.06664719, "step": 1426 }, { "epoch": 2.854, "grad_norm": 5.089621067047119, "learning_rate": 2e-05, "loss": 0.063522, "step": 1427 }, { "epoch": 2.856, "grad_norm": 4.909590244293213, "learning_rate": 2e-05, "loss": 0.04818332, "step": 1428 }, { "epoch": 2.858, "grad_norm": 4.207398414611816, "learning_rate": 2e-05, "loss": 0.06794796, "step": 1429 }, { "epoch": 2.86, "grad_norm": 4.997939586639404, "learning_rate": 2e-05, "loss": 0.07877656, "step": 1430 }, { "epoch": 2.862, "grad_norm": 5.016386032104492, "learning_rate": 2e-05, "loss": 0.08531716, "step": 1431 }, { "epoch": 2.864, "grad_norm": 4.332927227020264, "learning_rate": 2e-05, "loss": 0.07253157, "step": 1432 }, { "epoch": 2.866, "grad_norm": 3.6805167198181152, "learning_rate": 2e-05, "loss": 0.0489752, "step": 1433 }, { "epoch": 2.868, "grad_norm": 5.073846340179443, "learning_rate": 2e-05, "loss": 0.06045406, "step": 1434 }, { "epoch": 2.87, "grad_norm": 4.925167083740234, "learning_rate": 2e-05, "loss": 0.06546582, "step": 1435 }, { "epoch": 2.872, "grad_norm": 4.428837776184082, "learning_rate": 2e-05, "loss": 0.07281128, "step": 1436 }, { "epoch": 2.874, "grad_norm": 5.2609028816223145, "learning_rate": 2e-05, "loss": 0.04872175, "step": 1437 }, { "epoch": 2.876, "grad_norm": 4.899155139923096, "learning_rate": 2e-05, "loss": 0.09381916, "step": 1438 }, { "epoch": 2.878, "grad_norm": 5.950469493865967, "learning_rate": 2e-05, "loss": 0.07695569, "step": 1439 }, { "epoch": 2.88, "grad_norm": 5.539236068725586, "learning_rate": 2e-05, "loss": 0.06980915, "step": 1440 }, { "epoch": 2.882, "grad_norm": 5.101701736450195, "learning_rate": 2e-05, "loss": 0.05860341, "step": 1441 }, { "epoch": 2.884, "grad_norm": 3.4922256469726562, "learning_rate": 2e-05, "loss": 0.0549218, "step": 1442 }, { "epoch": 2.886, "grad_norm": 4.499914646148682, "learning_rate": 2e-05, "loss": 0.07425135, "step": 1443 }, { "epoch": 2.888, "grad_norm": 5.7068400382995605, "learning_rate": 2e-05, "loss": 0.06604729, "step": 1444 }, { "epoch": 2.89, "grad_norm": 4.692481994628906, "learning_rate": 2e-05, "loss": 0.05810877, "step": 1445 }, { "epoch": 2.892, "grad_norm": 6.431883335113525, "learning_rate": 2e-05, "loss": 0.05421295, "step": 1446 }, { "epoch": 2.894, "grad_norm": 5.984167098999023, "learning_rate": 2e-05, "loss": 0.10578743, "step": 1447 }, { "epoch": 2.896, "grad_norm": 5.361537456512451, "learning_rate": 2e-05, "loss": 0.06565633, "step": 1448 }, { "epoch": 2.898, "grad_norm": 4.809846878051758, "learning_rate": 2e-05, "loss": 0.06923097, "step": 1449 }, { "epoch": 2.9, "grad_norm": 3.9343526363372803, "learning_rate": 2e-05, "loss": 0.05750292, "step": 1450 }, { "epoch": 2.902, "grad_norm": 5.054121971130371, "learning_rate": 2e-05, "loss": 0.06803121, "step": 1451 }, { "epoch": 2.904, "grad_norm": 4.666118621826172, "learning_rate": 2e-05, "loss": 0.06145911, "step": 1452 }, { "epoch": 2.906, "grad_norm": 4.033207893371582, "learning_rate": 2e-05, "loss": 0.05481056, "step": 1453 }, { "epoch": 2.908, "grad_norm": 4.553110122680664, "learning_rate": 2e-05, "loss": 0.04086772, "step": 1454 }, { "epoch": 2.91, "grad_norm": 4.863442897796631, "learning_rate": 2e-05, "loss": 0.05618111, "step": 1455 }, { "epoch": 2.912, "grad_norm": 4.423730850219727, "learning_rate": 2e-05, "loss": 0.05856941, "step": 1456 }, { "epoch": 2.914, "grad_norm": 5.669756889343262, "learning_rate": 2e-05, "loss": 0.07215314, "step": 1457 }, { "epoch": 2.916, "grad_norm": 4.186042308807373, "learning_rate": 2e-05, "loss": 0.06091508, "step": 1458 }, { "epoch": 2.918, "grad_norm": 5.494351863861084, "learning_rate": 2e-05, "loss": 0.0714608, "step": 1459 }, { "epoch": 2.92, "grad_norm": 4.638820648193359, "learning_rate": 2e-05, "loss": 0.05548988, "step": 1460 }, { "epoch": 2.922, "grad_norm": 4.526041030883789, "learning_rate": 2e-05, "loss": 0.06632301, "step": 1461 }, { "epoch": 2.924, "grad_norm": 4.842534065246582, "learning_rate": 2e-05, "loss": 0.05705705, "step": 1462 }, { "epoch": 2.926, "grad_norm": 4.799449920654297, "learning_rate": 2e-05, "loss": 0.072478, "step": 1463 }, { "epoch": 2.928, "grad_norm": 6.218446254730225, "learning_rate": 2e-05, "loss": 0.07640161, "step": 1464 }, { "epoch": 2.93, "grad_norm": 4.530174732208252, "learning_rate": 2e-05, "loss": 0.0634205, "step": 1465 }, { "epoch": 2.932, "grad_norm": 4.903495788574219, "learning_rate": 2e-05, "loss": 0.0531101, "step": 1466 }, { "epoch": 2.934, "grad_norm": 5.096945285797119, "learning_rate": 2e-05, "loss": 0.08750156, "step": 1467 }, { "epoch": 2.936, "grad_norm": 6.575497150421143, "learning_rate": 2e-05, "loss": 0.08634335, "step": 1468 }, { "epoch": 2.9379999999999997, "grad_norm": 4.025604724884033, "learning_rate": 2e-05, "loss": 0.05391017, "step": 1469 }, { "epoch": 2.94, "grad_norm": 4.625178337097168, "learning_rate": 2e-05, "loss": 0.06895852, "step": 1470 }, { "epoch": 2.942, "grad_norm": 5.109052658081055, "learning_rate": 2e-05, "loss": 0.06009777, "step": 1471 }, { "epoch": 2.944, "grad_norm": 4.281373023986816, "learning_rate": 2e-05, "loss": 0.0423565, "step": 1472 }, { "epoch": 2.9459999999999997, "grad_norm": 5.836949348449707, "learning_rate": 2e-05, "loss": 0.07466985, "step": 1473 }, { "epoch": 2.948, "grad_norm": 4.548582553863525, "learning_rate": 2e-05, "loss": 0.0661356, "step": 1474 }, { "epoch": 2.95, "grad_norm": 4.724868297576904, "learning_rate": 2e-05, "loss": 0.05822549, "step": 1475 }, { "epoch": 2.952, "grad_norm": 3.5884885787963867, "learning_rate": 2e-05, "loss": 0.05247332, "step": 1476 }, { "epoch": 2.9539999999999997, "grad_norm": 4.979867458343506, "learning_rate": 2e-05, "loss": 0.07437153, "step": 1477 }, { "epoch": 2.956, "grad_norm": 5.237345218658447, "learning_rate": 2e-05, "loss": 0.08136871, "step": 1478 }, { "epoch": 2.958, "grad_norm": 4.860877990722656, "learning_rate": 2e-05, "loss": 0.05308364, "step": 1479 }, { "epoch": 2.96, "grad_norm": 3.881074905395508, "learning_rate": 2e-05, "loss": 0.04458453, "step": 1480 }, { "epoch": 2.9619999999999997, "grad_norm": 4.207967758178711, "learning_rate": 2e-05, "loss": 0.04740631, "step": 1481 }, { "epoch": 2.964, "grad_norm": 5.108795642852783, "learning_rate": 2e-05, "loss": 0.06322924, "step": 1482 }, { "epoch": 2.966, "grad_norm": 5.611663341522217, "learning_rate": 2e-05, "loss": 0.06308508, "step": 1483 }, { "epoch": 2.968, "grad_norm": 3.6882166862487793, "learning_rate": 2e-05, "loss": 0.047226, "step": 1484 }, { "epoch": 2.9699999999999998, "grad_norm": 4.848994731903076, "learning_rate": 2e-05, "loss": 0.06418704, "step": 1485 }, { "epoch": 2.972, "grad_norm": 3.7295162677764893, "learning_rate": 2e-05, "loss": 0.02893624, "step": 1486 }, { "epoch": 2.974, "grad_norm": 4.756717205047607, "learning_rate": 2e-05, "loss": 0.04974237, "step": 1487 }, { "epoch": 2.976, "grad_norm": 5.72264289855957, "learning_rate": 2e-05, "loss": 0.09116939, "step": 1488 }, { "epoch": 2.9779999999999998, "grad_norm": 4.6187543869018555, "learning_rate": 2e-05, "loss": 0.04494797, "step": 1489 }, { "epoch": 2.98, "grad_norm": 4.823479652404785, "learning_rate": 2e-05, "loss": 0.06975439, "step": 1490 }, { "epoch": 2.982, "grad_norm": 7.214723587036133, "learning_rate": 2e-05, "loss": 0.11026973, "step": 1491 }, { "epoch": 2.984, "grad_norm": 4.527521133422852, "learning_rate": 2e-05, "loss": 0.05313881, "step": 1492 }, { "epoch": 2.9859999999999998, "grad_norm": 4.968887805938721, "learning_rate": 2e-05, "loss": 0.07511236, "step": 1493 }, { "epoch": 2.988, "grad_norm": 5.004446029663086, "learning_rate": 2e-05, "loss": 0.07327528, "step": 1494 }, { "epoch": 2.99, "grad_norm": 4.170538425445557, "learning_rate": 2e-05, "loss": 0.06513852, "step": 1495 }, { "epoch": 2.992, "grad_norm": 5.604336738586426, "learning_rate": 2e-05, "loss": 0.0688771, "step": 1496 }, { "epoch": 2.9939999999999998, "grad_norm": 4.699580192565918, "learning_rate": 2e-05, "loss": 0.0790859, "step": 1497 }, { "epoch": 2.996, "grad_norm": 4.232110023498535, "learning_rate": 2e-05, "loss": 0.05717592, "step": 1498 }, { "epoch": 2.998, "grad_norm": 3.1699609756469727, "learning_rate": 2e-05, "loss": 0.03968649, "step": 1499 }, { "epoch": 3.0, "grad_norm": 3.702878475189209, "learning_rate": 2e-05, "loss": 0.06434544, "step": 1500 }, { "epoch": 3.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.702, "AngleClassification_3": 0.5349301397205589, "Equal_1": 0.89, "Equal_2": 0.7145708582834331, "Equal_3": 0.6467065868263473, "LineComparison_1": 0.984, "LineComparison_2": 0.9401197604790419, "LineComparison_3": 0.874251497005988, "Parallel_1": 0.8657314629258517, "Parallel_2": 0.9519038076152304, "Parallel_3": 0.466, "Perpendicular_1": 0.862, "Perpendicular_2": 0.368, "Perpendicular_3": 0.1523046092184369, "PointLiesOnCircle_1": 0.9938543754175017, "PointLiesOnCircle_2": 0.9949333333333334, "PointLiesOnCircle_3": 0.8057333333333333, "PointLiesOnLine_1": 0.9298597194388778, "PointLiesOnLine_2": 0.4148296593186373, "PointLiesOnLine_3": 0.2435129740518962 }, "eval_runtime": 225.7097, "eval_samples_per_second": 46.52, "eval_steps_per_second": 0.93, "step": 1500 }, { "epoch": 3.002, "grad_norm": 5.954226493835449, "learning_rate": 2e-05, "loss": 0.08397508, "step": 1501 }, { "epoch": 3.004, "grad_norm": 5.859375953674316, "learning_rate": 2e-05, "loss": 0.08713303, "step": 1502 }, { "epoch": 3.006, "grad_norm": 5.438998222351074, "learning_rate": 2e-05, "loss": 0.08872537, "step": 1503 }, { "epoch": 3.008, "grad_norm": 5.106701374053955, "learning_rate": 2e-05, "loss": 0.09446557, "step": 1504 }, { "epoch": 3.01, "grad_norm": 4.43510103225708, "learning_rate": 2e-05, "loss": 0.05183261, "step": 1505 }, { "epoch": 3.012, "grad_norm": 4.304328918457031, "learning_rate": 2e-05, "loss": 0.0607527, "step": 1506 }, { "epoch": 3.014, "grad_norm": 3.329540491104126, "learning_rate": 2e-05, "loss": 0.06685504, "step": 1507 }, { "epoch": 3.016, "grad_norm": 4.029257774353027, "learning_rate": 2e-05, "loss": 0.06452954, "step": 1508 }, { "epoch": 3.018, "grad_norm": 4.3747758865356445, "learning_rate": 2e-05, "loss": 0.06529158, "step": 1509 }, { "epoch": 3.02, "grad_norm": 4.853371620178223, "learning_rate": 2e-05, "loss": 0.06863082, "step": 1510 }, { "epoch": 3.022, "grad_norm": 6.031400203704834, "learning_rate": 2e-05, "loss": 0.10701236, "step": 1511 }, { "epoch": 3.024, "grad_norm": 5.60347318649292, "learning_rate": 2e-05, "loss": 0.08216612, "step": 1512 }, { "epoch": 3.026, "grad_norm": 4.837069034576416, "learning_rate": 2e-05, "loss": 0.0862173, "step": 1513 }, { "epoch": 3.028, "grad_norm": 4.855534076690674, "learning_rate": 2e-05, "loss": 0.0656378, "step": 1514 }, { "epoch": 3.03, "grad_norm": 4.831255912780762, "learning_rate": 2e-05, "loss": 0.05892777, "step": 1515 }, { "epoch": 3.032, "grad_norm": 5.541748523712158, "learning_rate": 2e-05, "loss": 0.07752127, "step": 1516 }, { "epoch": 3.034, "grad_norm": 5.966110706329346, "learning_rate": 2e-05, "loss": 0.0752213, "step": 1517 }, { "epoch": 3.036, "grad_norm": 5.791103363037109, "learning_rate": 2e-05, "loss": 0.10443483, "step": 1518 }, { "epoch": 3.038, "grad_norm": 4.604087829589844, "learning_rate": 2e-05, "loss": 0.06097297, "step": 1519 }, { "epoch": 3.04, "grad_norm": 5.643918514251709, "learning_rate": 2e-05, "loss": 0.10067171, "step": 1520 }, { "epoch": 3.042, "grad_norm": 4.306736946105957, "learning_rate": 2e-05, "loss": 0.08415349, "step": 1521 }, { "epoch": 3.044, "grad_norm": 5.17617130279541, "learning_rate": 2e-05, "loss": 0.08436443, "step": 1522 }, { "epoch": 3.046, "grad_norm": 4.809046268463135, "learning_rate": 2e-05, "loss": 0.07708272, "step": 1523 }, { "epoch": 3.048, "grad_norm": 5.1770853996276855, "learning_rate": 2e-05, "loss": 0.06107355, "step": 1524 }, { "epoch": 3.05, "grad_norm": 5.781944751739502, "learning_rate": 2e-05, "loss": 0.07232336, "step": 1525 }, { "epoch": 3.052, "grad_norm": 6.496420860290527, "learning_rate": 2e-05, "loss": 0.08006763, "step": 1526 }, { "epoch": 3.054, "grad_norm": 7.277970790863037, "learning_rate": 2e-05, "loss": 0.09997952, "step": 1527 }, { "epoch": 3.056, "grad_norm": 4.5335845947265625, "learning_rate": 2e-05, "loss": 0.06629207, "step": 1528 }, { "epoch": 3.058, "grad_norm": 5.207417011260986, "learning_rate": 2e-05, "loss": 0.07902159, "step": 1529 }, { "epoch": 3.06, "grad_norm": 4.861142158508301, "learning_rate": 2e-05, "loss": 0.09225446, "step": 1530 }, { "epoch": 3.062, "grad_norm": 5.014509201049805, "learning_rate": 2e-05, "loss": 0.0645707, "step": 1531 }, { "epoch": 3.064, "grad_norm": 6.220990180969238, "learning_rate": 2e-05, "loss": 0.07108879, "step": 1532 }, { "epoch": 3.066, "grad_norm": 4.648353099822998, "learning_rate": 2e-05, "loss": 0.06391213, "step": 1533 }, { "epoch": 3.068, "grad_norm": 5.097534656524658, "learning_rate": 2e-05, "loss": 0.07811401, "step": 1534 }, { "epoch": 3.07, "grad_norm": 5.329542636871338, "learning_rate": 2e-05, "loss": 0.07603204, "step": 1535 }, { "epoch": 3.072, "grad_norm": 4.7543416023254395, "learning_rate": 2e-05, "loss": 0.07662658, "step": 1536 }, { "epoch": 3.074, "grad_norm": 3.9226162433624268, "learning_rate": 2e-05, "loss": 0.05806372, "step": 1537 }, { "epoch": 3.076, "grad_norm": 5.044088840484619, "learning_rate": 2e-05, "loss": 0.09256425, "step": 1538 }, { "epoch": 3.078, "grad_norm": 5.151412487030029, "learning_rate": 2e-05, "loss": 0.0674841, "step": 1539 }, { "epoch": 3.08, "grad_norm": 6.428744316101074, "learning_rate": 2e-05, "loss": 0.08118065, "step": 1540 }, { "epoch": 3.082, "grad_norm": 5.810997009277344, "learning_rate": 2e-05, "loss": 0.07830968, "step": 1541 }, { "epoch": 3.084, "grad_norm": 5.090029239654541, "learning_rate": 2e-05, "loss": 0.08867124, "step": 1542 }, { "epoch": 3.086, "grad_norm": 4.052005290985107, "learning_rate": 2e-05, "loss": 0.06247907, "step": 1543 }, { "epoch": 3.088, "grad_norm": 4.8386054039001465, "learning_rate": 2e-05, "loss": 0.05970058, "step": 1544 }, { "epoch": 3.09, "grad_norm": 4.873901844024658, "learning_rate": 2e-05, "loss": 0.08232678, "step": 1545 }, { "epoch": 3.092, "grad_norm": 8.644964218139648, "learning_rate": 2e-05, "loss": 0.08377228, "step": 1546 }, { "epoch": 3.094, "grad_norm": 7.475004196166992, "learning_rate": 2e-05, "loss": 0.07825454, "step": 1547 }, { "epoch": 3.096, "grad_norm": 5.3234992027282715, "learning_rate": 2e-05, "loss": 0.07040339, "step": 1548 }, { "epoch": 3.098, "grad_norm": 5.931180477142334, "learning_rate": 2e-05, "loss": 0.06917783, "step": 1549 }, { "epoch": 3.1, "grad_norm": 4.985809326171875, "learning_rate": 2e-05, "loss": 0.08475724, "step": 1550 }, { "epoch": 3.102, "grad_norm": 3.560638904571533, "learning_rate": 2e-05, "loss": 0.06026206, "step": 1551 }, { "epoch": 3.104, "grad_norm": 6.008710861206055, "learning_rate": 2e-05, "loss": 0.07897329, "step": 1552 }, { "epoch": 3.106, "grad_norm": 6.159405708312988, "learning_rate": 2e-05, "loss": 0.0787832, "step": 1553 }, { "epoch": 3.108, "grad_norm": 6.762143611907959, "learning_rate": 2e-05, "loss": 0.06129395, "step": 1554 }, { "epoch": 3.11, "grad_norm": 7.1317667961120605, "learning_rate": 2e-05, "loss": 0.13212141, "step": 1555 }, { "epoch": 3.112, "grad_norm": 6.544918537139893, "learning_rate": 2e-05, "loss": 0.06716696, "step": 1556 }, { "epoch": 3.114, "grad_norm": 5.677177429199219, "learning_rate": 2e-05, "loss": 0.09318746, "step": 1557 }, { "epoch": 3.116, "grad_norm": 5.078478813171387, "learning_rate": 2e-05, "loss": 0.06996292, "step": 1558 }, { "epoch": 3.118, "grad_norm": 4.675025939941406, "learning_rate": 2e-05, "loss": 0.06918916, "step": 1559 }, { "epoch": 3.12, "grad_norm": 5.975096225738525, "learning_rate": 2e-05, "loss": 0.08619317, "step": 1560 }, { "epoch": 3.122, "grad_norm": 7.4363627433776855, "learning_rate": 2e-05, "loss": 0.09147607, "step": 1561 }, { "epoch": 3.124, "grad_norm": 5.758176326751709, "learning_rate": 2e-05, "loss": 0.08612087, "step": 1562 }, { "epoch": 3.126, "grad_norm": 5.386693954467773, "learning_rate": 2e-05, "loss": 0.05459427, "step": 1563 }, { "epoch": 3.128, "grad_norm": 7.655271530151367, "learning_rate": 2e-05, "loss": 0.06947727, "step": 1564 }, { "epoch": 3.13, "grad_norm": 5.116163730621338, "learning_rate": 2e-05, "loss": 0.07873774, "step": 1565 }, { "epoch": 3.132, "grad_norm": 4.474585056304932, "learning_rate": 2e-05, "loss": 0.06994206, "step": 1566 }, { "epoch": 3.134, "grad_norm": 5.926779270172119, "learning_rate": 2e-05, "loss": 0.07481986, "step": 1567 }, { "epoch": 3.136, "grad_norm": 5.130211353302002, "learning_rate": 2e-05, "loss": 0.07095046, "step": 1568 }, { "epoch": 3.138, "grad_norm": 5.6830573081970215, "learning_rate": 2e-05, "loss": 0.07300852, "step": 1569 }, { "epoch": 3.14, "grad_norm": 5.302335739135742, "learning_rate": 2e-05, "loss": 0.08457458, "step": 1570 }, { "epoch": 3.142, "grad_norm": 5.709600925445557, "learning_rate": 2e-05, "loss": 0.06819507, "step": 1571 }, { "epoch": 3.144, "grad_norm": 3.0586142539978027, "learning_rate": 2e-05, "loss": 0.03806618, "step": 1572 }, { "epoch": 3.146, "grad_norm": 4.7708635330200195, "learning_rate": 2e-05, "loss": 0.07436333, "step": 1573 }, { "epoch": 3.148, "grad_norm": 5.26861047744751, "learning_rate": 2e-05, "loss": 0.08809039, "step": 1574 }, { "epoch": 3.15, "grad_norm": 5.7075886726379395, "learning_rate": 2e-05, "loss": 0.06798029, "step": 1575 }, { "epoch": 3.152, "grad_norm": 6.39401912689209, "learning_rate": 2e-05, "loss": 0.08304724, "step": 1576 }, { "epoch": 3.154, "grad_norm": 4.57726526260376, "learning_rate": 2e-05, "loss": 0.07189007, "step": 1577 }, { "epoch": 3.156, "grad_norm": 4.641340255737305, "learning_rate": 2e-05, "loss": 0.05821983, "step": 1578 }, { "epoch": 3.158, "grad_norm": 4.50847864151001, "learning_rate": 2e-05, "loss": 0.05514162, "step": 1579 }, { "epoch": 3.16, "grad_norm": 5.507416725158691, "learning_rate": 2e-05, "loss": 0.08047371, "step": 1580 }, { "epoch": 3.162, "grad_norm": 4.193258285522461, "learning_rate": 2e-05, "loss": 0.07650521, "step": 1581 }, { "epoch": 3.164, "grad_norm": 5.037667274475098, "learning_rate": 2e-05, "loss": 0.08918469, "step": 1582 }, { "epoch": 3.166, "grad_norm": 5.019763946533203, "learning_rate": 2e-05, "loss": 0.06686656, "step": 1583 }, { "epoch": 3.168, "grad_norm": 4.365486145019531, "learning_rate": 2e-05, "loss": 0.06898786, "step": 1584 }, { "epoch": 3.17, "grad_norm": 5.2584075927734375, "learning_rate": 2e-05, "loss": 0.05402671, "step": 1585 }, { "epoch": 3.172, "grad_norm": 6.5365166664123535, "learning_rate": 2e-05, "loss": 0.09304294, "step": 1586 }, { "epoch": 3.174, "grad_norm": 4.743391036987305, "learning_rate": 2e-05, "loss": 0.07228262, "step": 1587 }, { "epoch": 3.176, "grad_norm": 4.7011284828186035, "learning_rate": 2e-05, "loss": 0.06443236, "step": 1588 }, { "epoch": 3.178, "grad_norm": 4.008903503417969, "learning_rate": 2e-05, "loss": 0.05923404, "step": 1589 }, { "epoch": 3.18, "grad_norm": 4.374273300170898, "learning_rate": 2e-05, "loss": 0.0564433, "step": 1590 }, { "epoch": 3.182, "grad_norm": 4.013404846191406, "learning_rate": 2e-05, "loss": 0.05544138, "step": 1591 }, { "epoch": 3.184, "grad_norm": 5.444378852844238, "learning_rate": 2e-05, "loss": 0.08450194, "step": 1592 }, { "epoch": 3.186, "grad_norm": 4.506785869598389, "learning_rate": 2e-05, "loss": 0.06170888, "step": 1593 }, { "epoch": 3.188, "grad_norm": 5.583600997924805, "learning_rate": 2e-05, "loss": 0.07984038, "step": 1594 }, { "epoch": 3.19, "grad_norm": 6.442187786102295, "learning_rate": 2e-05, "loss": 0.08083382, "step": 1595 }, { "epoch": 3.192, "grad_norm": 9.991189002990723, "learning_rate": 2e-05, "loss": 0.08919778, "step": 1596 }, { "epoch": 3.194, "grad_norm": 6.273192405700684, "learning_rate": 2e-05, "loss": 0.0880629, "step": 1597 }, { "epoch": 3.196, "grad_norm": 7.458893299102783, "learning_rate": 2e-05, "loss": 0.10174426, "step": 1598 }, { "epoch": 3.198, "grad_norm": 4.327280521392822, "learning_rate": 2e-05, "loss": 0.08019219, "step": 1599 }, { "epoch": 3.2, "grad_norm": 4.314011573791504, "learning_rate": 2e-05, "loss": 0.0660051, "step": 1600 }, { "epoch": 3.202, "grad_norm": 4.017706871032715, "learning_rate": 2e-05, "loss": 0.08018381, "step": 1601 }, { "epoch": 3.204, "grad_norm": 5.938168525695801, "learning_rate": 2e-05, "loss": 0.07712246, "step": 1602 }, { "epoch": 3.206, "grad_norm": 5.327660083770752, "learning_rate": 2e-05, "loss": 0.11605055, "step": 1603 }, { "epoch": 3.208, "grad_norm": 5.277698516845703, "learning_rate": 2e-05, "loss": 0.06672525, "step": 1604 }, { "epoch": 3.21, "grad_norm": 4.545539379119873, "learning_rate": 2e-05, "loss": 0.05899373, "step": 1605 }, { "epoch": 3.212, "grad_norm": 6.349032402038574, "learning_rate": 2e-05, "loss": 0.07606003, "step": 1606 }, { "epoch": 3.214, "grad_norm": 5.223143100738525, "learning_rate": 2e-05, "loss": 0.04937024, "step": 1607 }, { "epoch": 3.216, "grad_norm": 4.479587078094482, "learning_rate": 2e-05, "loss": 0.07636291, "step": 1608 }, { "epoch": 3.218, "grad_norm": 5.984289169311523, "learning_rate": 2e-05, "loss": 0.06960738, "step": 1609 }, { "epoch": 3.22, "grad_norm": 6.154695510864258, "learning_rate": 2e-05, "loss": 0.07032734, "step": 1610 }, { "epoch": 3.222, "grad_norm": 4.798097133636475, "learning_rate": 2e-05, "loss": 0.06485439, "step": 1611 }, { "epoch": 3.224, "grad_norm": 3.712341070175171, "learning_rate": 2e-05, "loss": 0.05674484, "step": 1612 }, { "epoch": 3.226, "grad_norm": 5.300927639007568, "learning_rate": 2e-05, "loss": 0.07900244, "step": 1613 }, { "epoch": 3.228, "grad_norm": 4.378547668457031, "learning_rate": 2e-05, "loss": 0.06720096, "step": 1614 }, { "epoch": 3.23, "grad_norm": 4.028100967407227, "learning_rate": 2e-05, "loss": 0.05512834, "step": 1615 }, { "epoch": 3.232, "grad_norm": 4.349555492401123, "learning_rate": 2e-05, "loss": 0.06042667, "step": 1616 }, { "epoch": 3.234, "grad_norm": 4.838351726531982, "learning_rate": 2e-05, "loss": 0.06473093, "step": 1617 }, { "epoch": 3.2359999999999998, "grad_norm": 5.635011672973633, "learning_rate": 2e-05, "loss": 0.06772245, "step": 1618 }, { "epoch": 3.238, "grad_norm": 6.90197229385376, "learning_rate": 2e-05, "loss": 0.04455863, "step": 1619 }, { "epoch": 3.24, "grad_norm": 4.897036552429199, "learning_rate": 2e-05, "loss": 0.05769681, "step": 1620 }, { "epoch": 3.242, "grad_norm": 6.395400524139404, "learning_rate": 2e-05, "loss": 0.10632591, "step": 1621 }, { "epoch": 3.2439999999999998, "grad_norm": 5.31171989440918, "learning_rate": 2e-05, "loss": 0.09546394, "step": 1622 }, { "epoch": 3.246, "grad_norm": 4.419897556304932, "learning_rate": 2e-05, "loss": 0.06219834, "step": 1623 }, { "epoch": 3.248, "grad_norm": 4.96039342880249, "learning_rate": 2e-05, "loss": 0.07020237, "step": 1624 }, { "epoch": 3.25, "grad_norm": 4.977518081665039, "learning_rate": 2e-05, "loss": 0.07033151, "step": 1625 }, { "epoch": 3.252, "grad_norm": 6.532220840454102, "learning_rate": 2e-05, "loss": 0.08773537, "step": 1626 }, { "epoch": 3.254, "grad_norm": 6.337274551391602, "learning_rate": 2e-05, "loss": 0.06871706, "step": 1627 }, { "epoch": 3.2560000000000002, "grad_norm": 6.053676128387451, "learning_rate": 2e-05, "loss": 0.09032695, "step": 1628 }, { "epoch": 3.258, "grad_norm": 4.475891590118408, "learning_rate": 2e-05, "loss": 0.06095361, "step": 1629 }, { "epoch": 3.26, "grad_norm": 4.899603366851807, "learning_rate": 2e-05, "loss": 0.06877486, "step": 1630 }, { "epoch": 3.262, "grad_norm": 5.638507843017578, "learning_rate": 2e-05, "loss": 0.07213061, "step": 1631 }, { "epoch": 3.2640000000000002, "grad_norm": 6.0205888748168945, "learning_rate": 2e-05, "loss": 0.08599904, "step": 1632 }, { "epoch": 3.266, "grad_norm": 5.220811367034912, "learning_rate": 2e-05, "loss": 0.0707929, "step": 1633 }, { "epoch": 3.268, "grad_norm": 5.5049004554748535, "learning_rate": 2e-05, "loss": 0.07827805, "step": 1634 }, { "epoch": 3.27, "grad_norm": 4.353661060333252, "learning_rate": 2e-05, "loss": 0.05477132, "step": 1635 }, { "epoch": 3.2720000000000002, "grad_norm": 5.212408065795898, "learning_rate": 2e-05, "loss": 0.07843813, "step": 1636 }, { "epoch": 3.274, "grad_norm": 5.542116641998291, "learning_rate": 2e-05, "loss": 0.0565015, "step": 1637 }, { "epoch": 3.276, "grad_norm": 4.376880645751953, "learning_rate": 2e-05, "loss": 0.06609736, "step": 1638 }, { "epoch": 3.278, "grad_norm": 4.414494514465332, "learning_rate": 2e-05, "loss": 0.0685962, "step": 1639 }, { "epoch": 3.2800000000000002, "grad_norm": 3.461251735687256, "learning_rate": 2e-05, "loss": 0.04715932, "step": 1640 }, { "epoch": 3.282, "grad_norm": 4.443127155303955, "learning_rate": 2e-05, "loss": 0.05397797, "step": 1641 }, { "epoch": 3.284, "grad_norm": 4.153637409210205, "learning_rate": 2e-05, "loss": 0.06840443, "step": 1642 }, { "epoch": 3.286, "grad_norm": 6.415086269378662, "learning_rate": 2e-05, "loss": 0.06870232, "step": 1643 }, { "epoch": 3.288, "grad_norm": 4.458816051483154, "learning_rate": 2e-05, "loss": 0.04688898, "step": 1644 }, { "epoch": 3.29, "grad_norm": 3.7386648654937744, "learning_rate": 2e-05, "loss": 0.05524424, "step": 1645 }, { "epoch": 3.292, "grad_norm": 8.138068199157715, "learning_rate": 2e-05, "loss": 0.06836177, "step": 1646 }, { "epoch": 3.294, "grad_norm": 5.494998931884766, "learning_rate": 2e-05, "loss": 0.07295921, "step": 1647 }, { "epoch": 3.296, "grad_norm": 4.673689842224121, "learning_rate": 2e-05, "loss": 0.06607654, "step": 1648 }, { "epoch": 3.298, "grad_norm": 4.4951629638671875, "learning_rate": 2e-05, "loss": 0.07534494, "step": 1649 }, { "epoch": 3.3, "grad_norm": 5.354193210601807, "learning_rate": 2e-05, "loss": 0.0705111, "step": 1650 }, { "epoch": 3.302, "grad_norm": 5.630919456481934, "learning_rate": 2e-05, "loss": 0.07412417, "step": 1651 }, { "epoch": 3.304, "grad_norm": 5.883426189422607, "learning_rate": 2e-05, "loss": 0.06265645, "step": 1652 }, { "epoch": 3.306, "grad_norm": 4.677311420440674, "learning_rate": 2e-05, "loss": 0.05801614, "step": 1653 }, { "epoch": 3.308, "grad_norm": 4.798431873321533, "learning_rate": 2e-05, "loss": 0.05916409, "step": 1654 }, { "epoch": 3.31, "grad_norm": 7.1742706298828125, "learning_rate": 2e-05, "loss": 0.10347211, "step": 1655 }, { "epoch": 3.312, "grad_norm": 4.982572555541992, "learning_rate": 2e-05, "loss": 0.05943507, "step": 1656 }, { "epoch": 3.314, "grad_norm": 7.458788871765137, "learning_rate": 2e-05, "loss": 0.07478951, "step": 1657 }, { "epoch": 3.316, "grad_norm": 5.338444232940674, "learning_rate": 2e-05, "loss": 0.06565958, "step": 1658 }, { "epoch": 3.318, "grad_norm": 7.690387725830078, "learning_rate": 2e-05, "loss": 0.05389735, "step": 1659 }, { "epoch": 3.32, "grad_norm": 6.303702354431152, "learning_rate": 2e-05, "loss": 0.06961641, "step": 1660 }, { "epoch": 3.322, "grad_norm": 6.0456671714782715, "learning_rate": 2e-05, "loss": 0.07168688, "step": 1661 }, { "epoch": 3.324, "grad_norm": 4.8931803703308105, "learning_rate": 2e-05, "loss": 0.05348688, "step": 1662 }, { "epoch": 3.326, "grad_norm": 6.183485984802246, "learning_rate": 2e-05, "loss": 0.06598251, "step": 1663 }, { "epoch": 3.328, "grad_norm": 6.510396957397461, "learning_rate": 2e-05, "loss": 0.05691151, "step": 1664 }, { "epoch": 3.33, "grad_norm": 9.114551544189453, "learning_rate": 2e-05, "loss": 0.06679255, "step": 1665 }, { "epoch": 3.332, "grad_norm": 6.5299553871154785, "learning_rate": 2e-05, "loss": 0.08125523, "step": 1666 }, { "epoch": 3.334, "grad_norm": 5.970821857452393, "learning_rate": 2e-05, "loss": 0.05996719, "step": 1667 }, { "epoch": 3.336, "grad_norm": 6.405941009521484, "learning_rate": 2e-05, "loss": 0.06937401, "step": 1668 }, { "epoch": 3.338, "grad_norm": 7.213796138763428, "learning_rate": 2e-05, "loss": 0.07022522, "step": 1669 }, { "epoch": 3.34, "grad_norm": 5.0200324058532715, "learning_rate": 2e-05, "loss": 0.08739061, "step": 1670 }, { "epoch": 3.342, "grad_norm": 5.487778186798096, "learning_rate": 2e-05, "loss": 0.05963816, "step": 1671 }, { "epoch": 3.344, "grad_norm": 5.5599470138549805, "learning_rate": 2e-05, "loss": 0.07380339, "step": 1672 }, { "epoch": 3.346, "grad_norm": 5.317736625671387, "learning_rate": 2e-05, "loss": 0.06851473, "step": 1673 }, { "epoch": 3.348, "grad_norm": 4.263504505157471, "learning_rate": 2e-05, "loss": 0.05120286, "step": 1674 }, { "epoch": 3.35, "grad_norm": 4.854349136352539, "learning_rate": 2e-05, "loss": 0.06475846, "step": 1675 }, { "epoch": 3.352, "grad_norm": 6.129194259643555, "learning_rate": 2e-05, "loss": 0.10335606, "step": 1676 }, { "epoch": 3.354, "grad_norm": 4.815998077392578, "learning_rate": 2e-05, "loss": 0.06392691, "step": 1677 }, { "epoch": 3.356, "grad_norm": 5.386107444763184, "learning_rate": 2e-05, "loss": 0.08992685, "step": 1678 }, { "epoch": 3.358, "grad_norm": 7.159002780914307, "learning_rate": 2e-05, "loss": 0.08083749, "step": 1679 }, { "epoch": 3.36, "grad_norm": 5.314292907714844, "learning_rate": 2e-05, "loss": 0.05072993, "step": 1680 }, { "epoch": 3.362, "grad_norm": 6.290984630584717, "learning_rate": 2e-05, "loss": 0.08971697, "step": 1681 }, { "epoch": 3.364, "grad_norm": 5.321578025817871, "learning_rate": 2e-05, "loss": 0.07951367, "step": 1682 }, { "epoch": 3.366, "grad_norm": 5.2393479347229, "learning_rate": 2e-05, "loss": 0.07147329, "step": 1683 }, { "epoch": 3.368, "grad_norm": 4.591468334197998, "learning_rate": 2e-05, "loss": 0.05839431, "step": 1684 }, { "epoch": 3.37, "grad_norm": 4.750542640686035, "learning_rate": 2e-05, "loss": 0.06677815, "step": 1685 }, { "epoch": 3.372, "grad_norm": 5.063066005706787, "learning_rate": 2e-05, "loss": 0.05512688, "step": 1686 }, { "epoch": 3.374, "grad_norm": 4.843785762786865, "learning_rate": 2e-05, "loss": 0.05982836, "step": 1687 }, { "epoch": 3.376, "grad_norm": 8.426904678344727, "learning_rate": 2e-05, "loss": 0.06350225, "step": 1688 }, { "epoch": 3.378, "grad_norm": 4.701199531555176, "learning_rate": 2e-05, "loss": 0.05508811, "step": 1689 }, { "epoch": 3.38, "grad_norm": 4.0099992752075195, "learning_rate": 2e-05, "loss": 0.06111015, "step": 1690 }, { "epoch": 3.382, "grad_norm": 4.423235893249512, "learning_rate": 2e-05, "loss": 0.0505396, "step": 1691 }, { "epoch": 3.384, "grad_norm": 3.7509517669677734, "learning_rate": 2e-05, "loss": 0.05791307, "step": 1692 }, { "epoch": 3.386, "grad_norm": 5.640910625457764, "learning_rate": 2e-05, "loss": 0.05881345, "step": 1693 }, { "epoch": 3.388, "grad_norm": 4.152505874633789, "learning_rate": 2e-05, "loss": 0.04724699, "step": 1694 }, { "epoch": 3.39, "grad_norm": 7.958624362945557, "learning_rate": 2e-05, "loss": 0.07904568, "step": 1695 }, { "epoch": 3.392, "grad_norm": 5.5192131996154785, "learning_rate": 2e-05, "loss": 0.07025786, "step": 1696 }, { "epoch": 3.394, "grad_norm": 5.938628673553467, "learning_rate": 2e-05, "loss": 0.07867709, "step": 1697 }, { "epoch": 3.396, "grad_norm": 5.275633335113525, "learning_rate": 2e-05, "loss": 0.05038371, "step": 1698 }, { "epoch": 3.398, "grad_norm": 5.831193447113037, "learning_rate": 2e-05, "loss": 0.06785424, "step": 1699 }, { "epoch": 3.4, "grad_norm": 5.3761491775512695, "learning_rate": 2e-05, "loss": 0.07065414, "step": 1700 }, { "epoch": 3.402, "grad_norm": 5.092771530151367, "learning_rate": 2e-05, "loss": 0.06623159, "step": 1701 }, { "epoch": 3.404, "grad_norm": 4.969609260559082, "learning_rate": 2e-05, "loss": 0.06064595, "step": 1702 }, { "epoch": 3.406, "grad_norm": 5.350279331207275, "learning_rate": 2e-05, "loss": 0.07781999, "step": 1703 }, { "epoch": 3.408, "grad_norm": 4.419923782348633, "learning_rate": 2e-05, "loss": 0.05432414, "step": 1704 }, { "epoch": 3.41, "grad_norm": 6.060790538787842, "learning_rate": 2e-05, "loss": 0.06576952, "step": 1705 }, { "epoch": 3.412, "grad_norm": 5.440786361694336, "learning_rate": 2e-05, "loss": 0.04410215, "step": 1706 }, { "epoch": 3.414, "grad_norm": 6.081235408782959, "learning_rate": 2e-05, "loss": 0.07224669, "step": 1707 }, { "epoch": 3.416, "grad_norm": 4.939371585845947, "learning_rate": 2e-05, "loss": 0.04239143, "step": 1708 }, { "epoch": 3.418, "grad_norm": 4.43849515914917, "learning_rate": 2e-05, "loss": 0.05761955, "step": 1709 }, { "epoch": 3.42, "grad_norm": 4.581660747528076, "learning_rate": 2e-05, "loss": 0.06314417, "step": 1710 }, { "epoch": 3.422, "grad_norm": 5.456342697143555, "learning_rate": 2e-05, "loss": 0.06284143, "step": 1711 }, { "epoch": 3.424, "grad_norm": 5.812044620513916, "learning_rate": 2e-05, "loss": 0.07971214, "step": 1712 }, { "epoch": 3.426, "grad_norm": 5.905458927154541, "learning_rate": 2e-05, "loss": 0.0699833, "step": 1713 }, { "epoch": 3.428, "grad_norm": 6.458158493041992, "learning_rate": 2e-05, "loss": 0.06248938, "step": 1714 }, { "epoch": 3.43, "grad_norm": 4.967984199523926, "learning_rate": 2e-05, "loss": 0.07101528, "step": 1715 }, { "epoch": 3.432, "grad_norm": 4.676603317260742, "learning_rate": 2e-05, "loss": 0.06720611, "step": 1716 }, { "epoch": 3.434, "grad_norm": 3.4943292140960693, "learning_rate": 2e-05, "loss": 0.0450407, "step": 1717 }, { "epoch": 3.436, "grad_norm": 10.04814624786377, "learning_rate": 2e-05, "loss": 0.08007574, "step": 1718 }, { "epoch": 3.438, "grad_norm": 5.465684413909912, "learning_rate": 2e-05, "loss": 0.08119729, "step": 1719 }, { "epoch": 3.44, "grad_norm": 4.657307147979736, "learning_rate": 2e-05, "loss": 0.07420374, "step": 1720 }, { "epoch": 3.442, "grad_norm": 5.0350141525268555, "learning_rate": 2e-05, "loss": 0.05539635, "step": 1721 }, { "epoch": 3.444, "grad_norm": 5.637006759643555, "learning_rate": 2e-05, "loss": 0.07031047, "step": 1722 }, { "epoch": 3.446, "grad_norm": 4.752379417419434, "learning_rate": 2e-05, "loss": 0.05745683, "step": 1723 }, { "epoch": 3.448, "grad_norm": 5.215643882751465, "learning_rate": 2e-05, "loss": 0.04886284, "step": 1724 }, { "epoch": 3.45, "grad_norm": 5.1540846824646, "learning_rate": 2e-05, "loss": 0.06128926, "step": 1725 }, { "epoch": 3.452, "grad_norm": 7.059103965759277, "learning_rate": 2e-05, "loss": 0.08950848, "step": 1726 }, { "epoch": 3.454, "grad_norm": 5.681922435760498, "learning_rate": 2e-05, "loss": 0.07655244, "step": 1727 }, { "epoch": 3.456, "grad_norm": 4.991319179534912, "learning_rate": 2e-05, "loss": 0.07455518, "step": 1728 }, { "epoch": 3.458, "grad_norm": 5.37566614151001, "learning_rate": 2e-05, "loss": 0.04690168, "step": 1729 }, { "epoch": 3.46, "grad_norm": 4.395920753479004, "learning_rate": 2e-05, "loss": 0.04873832, "step": 1730 }, { "epoch": 3.462, "grad_norm": 4.816198348999023, "learning_rate": 2e-05, "loss": 0.06051288, "step": 1731 }, { "epoch": 3.464, "grad_norm": 4.409997940063477, "learning_rate": 2e-05, "loss": 0.04993621, "step": 1732 }, { "epoch": 3.466, "grad_norm": 5.228540897369385, "learning_rate": 2e-05, "loss": 0.07056217, "step": 1733 }, { "epoch": 3.468, "grad_norm": 5.794247627258301, "learning_rate": 2e-05, "loss": 0.06457858, "step": 1734 }, { "epoch": 3.4699999999999998, "grad_norm": 4.696569442749023, "learning_rate": 2e-05, "loss": 0.04306588, "step": 1735 }, { "epoch": 3.472, "grad_norm": 5.713759899139404, "learning_rate": 2e-05, "loss": 0.06699243, "step": 1736 }, { "epoch": 3.474, "grad_norm": 5.665881633758545, "learning_rate": 2e-05, "loss": 0.04974756, "step": 1737 }, { "epoch": 3.476, "grad_norm": 6.250374794006348, "learning_rate": 2e-05, "loss": 0.08398715, "step": 1738 }, { "epoch": 3.4779999999999998, "grad_norm": 6.42642068862915, "learning_rate": 2e-05, "loss": 0.08046314, "step": 1739 }, { "epoch": 3.48, "grad_norm": 4.439337253570557, "learning_rate": 2e-05, "loss": 0.05921698, "step": 1740 }, { "epoch": 3.482, "grad_norm": 5.514899253845215, "learning_rate": 2e-05, "loss": 0.07660642, "step": 1741 }, { "epoch": 3.484, "grad_norm": 4.16121768951416, "learning_rate": 2e-05, "loss": 0.04689361, "step": 1742 }, { "epoch": 3.4859999999999998, "grad_norm": 4.863931655883789, "learning_rate": 2e-05, "loss": 0.06494551, "step": 1743 }, { "epoch": 3.488, "grad_norm": 5.758450984954834, "learning_rate": 2e-05, "loss": 0.07361363, "step": 1744 }, { "epoch": 3.49, "grad_norm": 5.498981475830078, "learning_rate": 2e-05, "loss": 0.06379999, "step": 1745 }, { "epoch": 3.492, "grad_norm": 4.282353401184082, "learning_rate": 2e-05, "loss": 0.05358336, "step": 1746 }, { "epoch": 3.4939999999999998, "grad_norm": 4.685094833374023, "learning_rate": 2e-05, "loss": 0.05786474, "step": 1747 }, { "epoch": 3.496, "grad_norm": 4.620589733123779, "learning_rate": 2e-05, "loss": 0.06134249, "step": 1748 }, { "epoch": 3.498, "grad_norm": 3.806101083755493, "learning_rate": 2e-05, "loss": 0.0496875, "step": 1749 }, { "epoch": 3.5, "grad_norm": 4.7345356941223145, "learning_rate": 2e-05, "loss": 0.05185562, "step": 1750 }, { "epoch": 3.502, "grad_norm": 4.450376987457275, "learning_rate": 2e-05, "loss": 0.0533569, "step": 1751 }, { "epoch": 3.504, "grad_norm": 3.6496384143829346, "learning_rate": 2e-05, "loss": 0.04863947, "step": 1752 }, { "epoch": 3.5060000000000002, "grad_norm": 6.651088714599609, "learning_rate": 2e-05, "loss": 0.06572546, "step": 1753 }, { "epoch": 3.508, "grad_norm": 6.319100856781006, "learning_rate": 2e-05, "loss": 0.06977685, "step": 1754 }, { "epoch": 3.51, "grad_norm": 5.176578521728516, "learning_rate": 2e-05, "loss": 0.07149006, "step": 1755 }, { "epoch": 3.512, "grad_norm": 6.367733478546143, "learning_rate": 2e-05, "loss": 0.07133694, "step": 1756 }, { "epoch": 3.5140000000000002, "grad_norm": 5.177986145019531, "learning_rate": 2e-05, "loss": 0.09079783, "step": 1757 }, { "epoch": 3.516, "grad_norm": 5.0475358963012695, "learning_rate": 2e-05, "loss": 0.06932561, "step": 1758 }, { "epoch": 3.518, "grad_norm": 5.949672222137451, "learning_rate": 2e-05, "loss": 0.06835262, "step": 1759 }, { "epoch": 3.52, "grad_norm": 6.7915849685668945, "learning_rate": 2e-05, "loss": 0.09965676, "step": 1760 }, { "epoch": 3.5220000000000002, "grad_norm": 5.371845245361328, "learning_rate": 2e-05, "loss": 0.05601543, "step": 1761 }, { "epoch": 3.524, "grad_norm": 4.242400646209717, "learning_rate": 2e-05, "loss": 0.04662568, "step": 1762 }, { "epoch": 3.526, "grad_norm": 6.643728256225586, "learning_rate": 2e-05, "loss": 0.05600107, "step": 1763 }, { "epoch": 3.528, "grad_norm": 5.702877998352051, "learning_rate": 2e-05, "loss": 0.0757223, "step": 1764 }, { "epoch": 3.5300000000000002, "grad_norm": 4.987585067749023, "learning_rate": 2e-05, "loss": 0.07110192, "step": 1765 }, { "epoch": 3.532, "grad_norm": 6.223247528076172, "learning_rate": 2e-05, "loss": 0.06167902, "step": 1766 }, { "epoch": 3.534, "grad_norm": 5.3057169914245605, "learning_rate": 2e-05, "loss": 0.0541944, "step": 1767 }, { "epoch": 3.536, "grad_norm": 5.0586724281311035, "learning_rate": 2e-05, "loss": 0.0513264, "step": 1768 }, { "epoch": 3.5380000000000003, "grad_norm": 6.53510856628418, "learning_rate": 2e-05, "loss": 0.04996534, "step": 1769 }, { "epoch": 3.54, "grad_norm": 4.8632659912109375, "learning_rate": 2e-05, "loss": 0.06607803, "step": 1770 }, { "epoch": 3.542, "grad_norm": 4.24329948425293, "learning_rate": 2e-05, "loss": 0.05569205, "step": 1771 }, { "epoch": 3.544, "grad_norm": 6.1338934898376465, "learning_rate": 2e-05, "loss": 0.08024924, "step": 1772 }, { "epoch": 3.5460000000000003, "grad_norm": 4.805015563964844, "learning_rate": 2e-05, "loss": 0.05180664, "step": 1773 }, { "epoch": 3.548, "grad_norm": 4.744563579559326, "learning_rate": 2e-05, "loss": 0.06555496, "step": 1774 }, { "epoch": 3.55, "grad_norm": 4.458217620849609, "learning_rate": 2e-05, "loss": 0.05802102, "step": 1775 }, { "epoch": 3.552, "grad_norm": 5.351348876953125, "learning_rate": 2e-05, "loss": 0.0593631, "step": 1776 }, { "epoch": 3.5540000000000003, "grad_norm": 4.894783973693848, "learning_rate": 2e-05, "loss": 0.06226364, "step": 1777 }, { "epoch": 3.556, "grad_norm": 4.559128761291504, "learning_rate": 2e-05, "loss": 0.03713622, "step": 1778 }, { "epoch": 3.558, "grad_norm": 5.421731948852539, "learning_rate": 2e-05, "loss": 0.06925963, "step": 1779 }, { "epoch": 3.56, "grad_norm": 3.9011318683624268, "learning_rate": 2e-05, "loss": 0.04258703, "step": 1780 }, { "epoch": 3.5620000000000003, "grad_norm": 4.323372840881348, "learning_rate": 2e-05, "loss": 0.05222411, "step": 1781 }, { "epoch": 3.564, "grad_norm": 5.651595115661621, "learning_rate": 2e-05, "loss": 0.06465665, "step": 1782 }, { "epoch": 3.566, "grad_norm": 4.120786666870117, "learning_rate": 2e-05, "loss": 0.04076482, "step": 1783 }, { "epoch": 3.568, "grad_norm": 4.468491077423096, "learning_rate": 2e-05, "loss": 0.05721128, "step": 1784 }, { "epoch": 3.57, "grad_norm": 5.080191135406494, "learning_rate": 2e-05, "loss": 0.06350212, "step": 1785 }, { "epoch": 3.572, "grad_norm": 5.071846961975098, "learning_rate": 2e-05, "loss": 0.05118635, "step": 1786 }, { "epoch": 3.574, "grad_norm": 4.999566555023193, "learning_rate": 2e-05, "loss": 0.06367949, "step": 1787 }, { "epoch": 3.576, "grad_norm": 7.397152900695801, "learning_rate": 2e-05, "loss": 0.04144837, "step": 1788 }, { "epoch": 3.578, "grad_norm": 5.748132705688477, "learning_rate": 2e-05, "loss": 0.0691671, "step": 1789 }, { "epoch": 3.58, "grad_norm": 6.07114315032959, "learning_rate": 2e-05, "loss": 0.07683832, "step": 1790 }, { "epoch": 3.582, "grad_norm": 6.4092488288879395, "learning_rate": 2e-05, "loss": 0.08102214, "step": 1791 }, { "epoch": 3.584, "grad_norm": 6.289010524749756, "learning_rate": 2e-05, "loss": 0.04834786, "step": 1792 }, { "epoch": 3.586, "grad_norm": 5.349119663238525, "learning_rate": 2e-05, "loss": 0.06233012, "step": 1793 }, { "epoch": 3.588, "grad_norm": 5.073043346405029, "learning_rate": 2e-05, "loss": 0.06713357, "step": 1794 }, { "epoch": 3.59, "grad_norm": 6.109503269195557, "learning_rate": 2e-05, "loss": 0.08102196, "step": 1795 }, { "epoch": 3.592, "grad_norm": 5.553793907165527, "learning_rate": 2e-05, "loss": 0.07561623, "step": 1796 }, { "epoch": 3.594, "grad_norm": 4.862972736358643, "learning_rate": 2e-05, "loss": 0.05075088, "step": 1797 }, { "epoch": 3.596, "grad_norm": 4.819360733032227, "learning_rate": 2e-05, "loss": 0.0940045, "step": 1798 }, { "epoch": 3.598, "grad_norm": 5.868276596069336, "learning_rate": 2e-05, "loss": 0.07605451, "step": 1799 }, { "epoch": 3.6, "grad_norm": 5.577599048614502, "learning_rate": 2e-05, "loss": 0.07491241, "step": 1800 }, { "epoch": 3.602, "grad_norm": 4.03432559967041, "learning_rate": 2e-05, "loss": 0.05573434, "step": 1801 }, { "epoch": 3.604, "grad_norm": 4.659179210662842, "learning_rate": 2e-05, "loss": 0.05912884, "step": 1802 }, { "epoch": 3.606, "grad_norm": 5.629663944244385, "learning_rate": 2e-05, "loss": 0.07126447, "step": 1803 }, { "epoch": 3.608, "grad_norm": 6.103148937225342, "learning_rate": 2e-05, "loss": 0.08218233, "step": 1804 }, { "epoch": 3.61, "grad_norm": 5.10606575012207, "learning_rate": 2e-05, "loss": 0.0637899, "step": 1805 }, { "epoch": 3.612, "grad_norm": 4.186132431030273, "learning_rate": 2e-05, "loss": 0.07303086, "step": 1806 }, { "epoch": 3.614, "grad_norm": 5.684262752532959, "learning_rate": 2e-05, "loss": 0.05378779, "step": 1807 }, { "epoch": 3.616, "grad_norm": 4.826086521148682, "learning_rate": 2e-05, "loss": 0.06850722, "step": 1808 }, { "epoch": 3.618, "grad_norm": 3.701488494873047, "learning_rate": 2e-05, "loss": 0.05088731, "step": 1809 }, { "epoch": 3.62, "grad_norm": 5.375479698181152, "learning_rate": 2e-05, "loss": 0.07195827, "step": 1810 }, { "epoch": 3.622, "grad_norm": 5.262441635131836, "learning_rate": 2e-05, "loss": 0.07890816, "step": 1811 }, { "epoch": 3.624, "grad_norm": 4.6438798904418945, "learning_rate": 2e-05, "loss": 0.07192764, "step": 1812 }, { "epoch": 3.626, "grad_norm": 4.534289836883545, "learning_rate": 2e-05, "loss": 0.06571546, "step": 1813 }, { "epoch": 3.628, "grad_norm": 4.626264572143555, "learning_rate": 2e-05, "loss": 0.07110818, "step": 1814 }, { "epoch": 3.63, "grad_norm": 3.316432476043701, "learning_rate": 2e-05, "loss": 0.04901554, "step": 1815 }, { "epoch": 3.632, "grad_norm": 4.882558822631836, "learning_rate": 2e-05, "loss": 0.09450966, "step": 1816 }, { "epoch": 3.634, "grad_norm": 4.3959126472473145, "learning_rate": 2e-05, "loss": 0.04594856, "step": 1817 }, { "epoch": 3.636, "grad_norm": 4.866504192352295, "learning_rate": 2e-05, "loss": 0.06387977, "step": 1818 }, { "epoch": 3.638, "grad_norm": 5.601128578186035, "learning_rate": 2e-05, "loss": 0.0548332, "step": 1819 }, { "epoch": 3.64, "grad_norm": 6.948201656341553, "learning_rate": 2e-05, "loss": 0.06673107, "step": 1820 }, { "epoch": 3.642, "grad_norm": 4.005955696105957, "learning_rate": 2e-05, "loss": 0.05999362, "step": 1821 }, { "epoch": 3.644, "grad_norm": 6.054996967315674, "learning_rate": 2e-05, "loss": 0.07066963, "step": 1822 }, { "epoch": 3.646, "grad_norm": 6.234635829925537, "learning_rate": 2e-05, "loss": 0.06943196, "step": 1823 }, { "epoch": 3.648, "grad_norm": 4.118714809417725, "learning_rate": 2e-05, "loss": 0.06223981, "step": 1824 }, { "epoch": 3.65, "grad_norm": 4.225829601287842, "learning_rate": 2e-05, "loss": 0.05416602, "step": 1825 }, { "epoch": 3.652, "grad_norm": 4.567686080932617, "learning_rate": 2e-05, "loss": 0.05276055, "step": 1826 }, { "epoch": 3.654, "grad_norm": 5.145322799682617, "learning_rate": 2e-05, "loss": 0.06149843, "step": 1827 }, { "epoch": 3.656, "grad_norm": 5.035682201385498, "learning_rate": 2e-05, "loss": 0.06718871, "step": 1828 }, { "epoch": 3.658, "grad_norm": 4.825553894042969, "learning_rate": 2e-05, "loss": 0.06222571, "step": 1829 }, { "epoch": 3.66, "grad_norm": 6.3249192237854, "learning_rate": 2e-05, "loss": 0.08377388, "step": 1830 }, { "epoch": 3.662, "grad_norm": 5.515347480773926, "learning_rate": 2e-05, "loss": 0.06473257, "step": 1831 }, { "epoch": 3.664, "grad_norm": 3.9980335235595703, "learning_rate": 2e-05, "loss": 0.06992444, "step": 1832 }, { "epoch": 3.666, "grad_norm": 3.916435480117798, "learning_rate": 2e-05, "loss": 0.04474148, "step": 1833 }, { "epoch": 3.668, "grad_norm": 5.297980308532715, "learning_rate": 2e-05, "loss": 0.0630901, "step": 1834 }, { "epoch": 3.67, "grad_norm": 5.502199649810791, "learning_rate": 2e-05, "loss": 0.07296306, "step": 1835 }, { "epoch": 3.672, "grad_norm": 4.691346168518066, "learning_rate": 2e-05, "loss": 0.07221904, "step": 1836 }, { "epoch": 3.674, "grad_norm": 5.042135238647461, "learning_rate": 2e-05, "loss": 0.06640635, "step": 1837 }, { "epoch": 3.676, "grad_norm": 3.767299175262451, "learning_rate": 2e-05, "loss": 0.03254046, "step": 1838 }, { "epoch": 3.678, "grad_norm": 4.414189338684082, "learning_rate": 2e-05, "loss": 0.06866148, "step": 1839 }, { "epoch": 3.68, "grad_norm": 3.3473238945007324, "learning_rate": 2e-05, "loss": 0.03685539, "step": 1840 }, { "epoch": 3.682, "grad_norm": 4.266433238983154, "learning_rate": 2e-05, "loss": 0.04886635, "step": 1841 }, { "epoch": 3.684, "grad_norm": 3.803457260131836, "learning_rate": 2e-05, "loss": 0.04779492, "step": 1842 }, { "epoch": 3.686, "grad_norm": 4.409362316131592, "learning_rate": 2e-05, "loss": 0.06374957, "step": 1843 }, { "epoch": 3.6879999999999997, "grad_norm": 5.625827789306641, "learning_rate": 2e-05, "loss": 0.05488, "step": 1844 }, { "epoch": 3.69, "grad_norm": 5.774028778076172, "learning_rate": 2e-05, "loss": 0.07197587, "step": 1845 }, { "epoch": 3.692, "grad_norm": 5.374508380889893, "learning_rate": 2e-05, "loss": 0.0584962, "step": 1846 }, { "epoch": 3.694, "grad_norm": 5.762722969055176, "learning_rate": 2e-05, "loss": 0.07995509, "step": 1847 }, { "epoch": 3.6959999999999997, "grad_norm": 5.163980484008789, "learning_rate": 2e-05, "loss": 0.08283812, "step": 1848 }, { "epoch": 3.698, "grad_norm": 5.876678943634033, "learning_rate": 2e-05, "loss": 0.06852521, "step": 1849 }, { "epoch": 3.7, "grad_norm": 5.324711799621582, "learning_rate": 2e-05, "loss": 0.06199085, "step": 1850 }, { "epoch": 3.702, "grad_norm": 4.7356343269348145, "learning_rate": 2e-05, "loss": 0.05141234, "step": 1851 }, { "epoch": 3.7039999999999997, "grad_norm": 4.333901405334473, "learning_rate": 2e-05, "loss": 0.04964181, "step": 1852 }, { "epoch": 3.706, "grad_norm": 3.659374713897705, "learning_rate": 2e-05, "loss": 0.0351469, "step": 1853 }, { "epoch": 3.708, "grad_norm": 4.408047199249268, "learning_rate": 2e-05, "loss": 0.05563649, "step": 1854 }, { "epoch": 3.71, "grad_norm": 4.120344638824463, "learning_rate": 2e-05, "loss": 0.04419883, "step": 1855 }, { "epoch": 3.7119999999999997, "grad_norm": 3.182614326477051, "learning_rate": 2e-05, "loss": 0.04067455, "step": 1856 }, { "epoch": 3.714, "grad_norm": 4.517913341522217, "learning_rate": 2e-05, "loss": 0.05360608, "step": 1857 }, { "epoch": 3.716, "grad_norm": 3.6149818897247314, "learning_rate": 2e-05, "loss": 0.0430776, "step": 1858 }, { "epoch": 3.718, "grad_norm": 5.15364933013916, "learning_rate": 2e-05, "loss": 0.05113246, "step": 1859 }, { "epoch": 3.7199999999999998, "grad_norm": 3.8621222972869873, "learning_rate": 2e-05, "loss": 0.06012932, "step": 1860 }, { "epoch": 3.722, "grad_norm": 4.592406749725342, "learning_rate": 2e-05, "loss": 0.04537953, "step": 1861 }, { "epoch": 3.724, "grad_norm": 4.797220706939697, "learning_rate": 2e-05, "loss": 0.04905158, "step": 1862 }, { "epoch": 3.726, "grad_norm": 3.8775906562805176, "learning_rate": 2e-05, "loss": 0.03888229, "step": 1863 }, { "epoch": 3.7279999999999998, "grad_norm": 4.664579391479492, "learning_rate": 2e-05, "loss": 0.08063454, "step": 1864 }, { "epoch": 3.73, "grad_norm": 5.089929580688477, "learning_rate": 2e-05, "loss": 0.07466167, "step": 1865 }, { "epoch": 3.732, "grad_norm": 4.7327165603637695, "learning_rate": 2e-05, "loss": 0.06409083, "step": 1866 }, { "epoch": 3.734, "grad_norm": 6.583540916442871, "learning_rate": 2e-05, "loss": 0.05853298, "step": 1867 }, { "epoch": 3.7359999999999998, "grad_norm": 4.511338233947754, "learning_rate": 2e-05, "loss": 0.07788299, "step": 1868 }, { "epoch": 3.738, "grad_norm": 5.245903491973877, "learning_rate": 2e-05, "loss": 0.0680366, "step": 1869 }, { "epoch": 3.74, "grad_norm": 4.297535419464111, "learning_rate": 2e-05, "loss": 0.06112674, "step": 1870 }, { "epoch": 3.742, "grad_norm": 4.879220008850098, "learning_rate": 2e-05, "loss": 0.07366498, "step": 1871 }, { "epoch": 3.7439999999999998, "grad_norm": 5.118008613586426, "learning_rate": 2e-05, "loss": 0.04615158, "step": 1872 }, { "epoch": 3.746, "grad_norm": 3.975435733795166, "learning_rate": 2e-05, "loss": 0.04737507, "step": 1873 }, { "epoch": 3.748, "grad_norm": 4.455811023712158, "learning_rate": 2e-05, "loss": 0.05743422, "step": 1874 }, { "epoch": 3.75, "grad_norm": 5.500579357147217, "learning_rate": 2e-05, "loss": 0.04762407, "step": 1875 }, { "epoch": 3.752, "grad_norm": 4.774362564086914, "learning_rate": 2e-05, "loss": 0.05708167, "step": 1876 }, { "epoch": 3.754, "grad_norm": 3.0380311012268066, "learning_rate": 2e-05, "loss": 0.03373966, "step": 1877 }, { "epoch": 3.7560000000000002, "grad_norm": 4.439557075500488, "learning_rate": 2e-05, "loss": 0.06649361, "step": 1878 }, { "epoch": 3.758, "grad_norm": 5.849845886230469, "learning_rate": 2e-05, "loss": 0.05086464, "step": 1879 }, { "epoch": 3.76, "grad_norm": 6.178467750549316, "learning_rate": 2e-05, "loss": 0.06493635, "step": 1880 }, { "epoch": 3.762, "grad_norm": 5.690544128417969, "learning_rate": 2e-05, "loss": 0.07903777, "step": 1881 }, { "epoch": 3.7640000000000002, "grad_norm": 4.368018627166748, "learning_rate": 2e-05, "loss": 0.06753272, "step": 1882 }, { "epoch": 3.766, "grad_norm": 4.410281658172607, "learning_rate": 2e-05, "loss": 0.05095603, "step": 1883 }, { "epoch": 3.768, "grad_norm": 4.456419467926025, "learning_rate": 2e-05, "loss": 0.06240604, "step": 1884 }, { "epoch": 3.77, "grad_norm": 4.7514328956604, "learning_rate": 2e-05, "loss": 0.07521722, "step": 1885 }, { "epoch": 3.7720000000000002, "grad_norm": 5.130519390106201, "learning_rate": 2e-05, "loss": 0.07404801, "step": 1886 }, { "epoch": 3.774, "grad_norm": 4.217730522155762, "learning_rate": 2e-05, "loss": 0.05422241, "step": 1887 }, { "epoch": 3.776, "grad_norm": 5.346364974975586, "learning_rate": 2e-05, "loss": 0.06845973, "step": 1888 }, { "epoch": 3.778, "grad_norm": 4.466943264007568, "learning_rate": 2e-05, "loss": 0.05692923, "step": 1889 }, { "epoch": 3.7800000000000002, "grad_norm": 4.942258358001709, "learning_rate": 2e-05, "loss": 0.0716398, "step": 1890 }, { "epoch": 3.782, "grad_norm": 4.490053653717041, "learning_rate": 2e-05, "loss": 0.05725864, "step": 1891 }, { "epoch": 3.784, "grad_norm": 4.151418209075928, "learning_rate": 2e-05, "loss": 0.05481914, "step": 1892 }, { "epoch": 3.786, "grad_norm": 4.206702709197998, "learning_rate": 2e-05, "loss": 0.07093345, "step": 1893 }, { "epoch": 3.7880000000000003, "grad_norm": 4.14017915725708, "learning_rate": 2e-05, "loss": 0.06263759, "step": 1894 }, { "epoch": 3.79, "grad_norm": 4.166224956512451, "learning_rate": 2e-05, "loss": 0.05321131, "step": 1895 }, { "epoch": 3.792, "grad_norm": 4.358842849731445, "learning_rate": 2e-05, "loss": 0.07294746, "step": 1896 }, { "epoch": 3.794, "grad_norm": 4.195767879486084, "learning_rate": 2e-05, "loss": 0.03814392, "step": 1897 }, { "epoch": 3.7960000000000003, "grad_norm": 5.502280235290527, "learning_rate": 2e-05, "loss": 0.06763096, "step": 1898 }, { "epoch": 3.798, "grad_norm": 4.5076212882995605, "learning_rate": 2e-05, "loss": 0.04179053, "step": 1899 }, { "epoch": 3.8, "grad_norm": 4.652360916137695, "learning_rate": 2e-05, "loss": 0.06958556, "step": 1900 }, { "epoch": 3.802, "grad_norm": 4.225364685058594, "learning_rate": 2e-05, "loss": 0.04927856, "step": 1901 }, { "epoch": 3.8040000000000003, "grad_norm": 5.026622772216797, "learning_rate": 2e-05, "loss": 0.06738954, "step": 1902 }, { "epoch": 3.806, "grad_norm": 5.028942584991455, "learning_rate": 2e-05, "loss": 0.04162842, "step": 1903 }, { "epoch": 3.808, "grad_norm": 4.981192588806152, "learning_rate": 2e-05, "loss": 0.05354348, "step": 1904 }, { "epoch": 3.81, "grad_norm": 4.549354553222656, "learning_rate": 2e-05, "loss": 0.05669479, "step": 1905 }, { "epoch": 3.8120000000000003, "grad_norm": 5.029003143310547, "learning_rate": 2e-05, "loss": 0.05986965, "step": 1906 }, { "epoch": 3.814, "grad_norm": 3.137026071548462, "learning_rate": 2e-05, "loss": 0.03462634, "step": 1907 }, { "epoch": 3.816, "grad_norm": 5.903766632080078, "learning_rate": 2e-05, "loss": 0.06771113, "step": 1908 }, { "epoch": 3.818, "grad_norm": 5.99373197555542, "learning_rate": 2e-05, "loss": 0.07192631, "step": 1909 }, { "epoch": 3.82, "grad_norm": 4.048804759979248, "learning_rate": 2e-05, "loss": 0.04340763, "step": 1910 }, { "epoch": 3.822, "grad_norm": 3.361393928527832, "learning_rate": 2e-05, "loss": 0.0407342, "step": 1911 }, { "epoch": 3.824, "grad_norm": 4.608774185180664, "learning_rate": 2e-05, "loss": 0.05921407, "step": 1912 }, { "epoch": 3.826, "grad_norm": 6.527437686920166, "learning_rate": 2e-05, "loss": 0.06753261, "step": 1913 }, { "epoch": 3.828, "grad_norm": 8.546462059020996, "learning_rate": 2e-05, "loss": 0.07130502, "step": 1914 }, { "epoch": 3.83, "grad_norm": 4.629225730895996, "learning_rate": 2e-05, "loss": 0.05908479, "step": 1915 }, { "epoch": 3.832, "grad_norm": 4.9607768058776855, "learning_rate": 2e-05, "loss": 0.07062662, "step": 1916 }, { "epoch": 3.834, "grad_norm": 4.6531662940979, "learning_rate": 2e-05, "loss": 0.04235405, "step": 1917 }, { "epoch": 3.836, "grad_norm": 5.701800346374512, "learning_rate": 2e-05, "loss": 0.07775312, "step": 1918 }, { "epoch": 3.838, "grad_norm": 4.975476264953613, "learning_rate": 2e-05, "loss": 0.06026341, "step": 1919 }, { "epoch": 3.84, "grad_norm": 4.5145263671875, "learning_rate": 2e-05, "loss": 0.06085244, "step": 1920 }, { "epoch": 3.842, "grad_norm": 4.061069965362549, "learning_rate": 2e-05, "loss": 0.04229246, "step": 1921 }, { "epoch": 3.844, "grad_norm": 5.241036891937256, "learning_rate": 2e-05, "loss": 0.06195673, "step": 1922 }, { "epoch": 3.846, "grad_norm": 5.4296979904174805, "learning_rate": 2e-05, "loss": 0.05791047, "step": 1923 }, { "epoch": 3.848, "grad_norm": 3.735692262649536, "learning_rate": 2e-05, "loss": 0.04909524, "step": 1924 }, { "epoch": 3.85, "grad_norm": 4.149910926818848, "learning_rate": 2e-05, "loss": 0.04866936, "step": 1925 }, { "epoch": 3.852, "grad_norm": 4.548980236053467, "learning_rate": 2e-05, "loss": 0.05477653, "step": 1926 }, { "epoch": 3.854, "grad_norm": 5.261826038360596, "learning_rate": 2e-05, "loss": 0.0822985, "step": 1927 }, { "epoch": 3.856, "grad_norm": 4.03402853012085, "learning_rate": 2e-05, "loss": 0.04280536, "step": 1928 }, { "epoch": 3.858, "grad_norm": 4.46622896194458, "learning_rate": 2e-05, "loss": 0.05075065, "step": 1929 }, { "epoch": 3.86, "grad_norm": 4.514848232269287, "learning_rate": 2e-05, "loss": 0.04314257, "step": 1930 }, { "epoch": 3.862, "grad_norm": 5.750722408294678, "learning_rate": 2e-05, "loss": 0.06035237, "step": 1931 }, { "epoch": 3.864, "grad_norm": 5.195761203765869, "learning_rate": 2e-05, "loss": 0.04967444, "step": 1932 }, { "epoch": 3.866, "grad_norm": 4.361180305480957, "learning_rate": 2e-05, "loss": 0.03634356, "step": 1933 }, { "epoch": 3.868, "grad_norm": 5.237107753753662, "learning_rate": 2e-05, "loss": 0.06162471, "step": 1934 }, { "epoch": 3.87, "grad_norm": 6.4367876052856445, "learning_rate": 2e-05, "loss": 0.05802264, "step": 1935 }, { "epoch": 3.872, "grad_norm": 5.603133201599121, "learning_rate": 2e-05, "loss": 0.05284619, "step": 1936 }, { "epoch": 3.874, "grad_norm": 6.062546730041504, "learning_rate": 2e-05, "loss": 0.05874944, "step": 1937 }, { "epoch": 3.876, "grad_norm": 5.439873218536377, "learning_rate": 2e-05, "loss": 0.04426819, "step": 1938 }, { "epoch": 3.878, "grad_norm": 5.635611057281494, "learning_rate": 2e-05, "loss": 0.07388175, "step": 1939 }, { "epoch": 3.88, "grad_norm": 6.322620868682861, "learning_rate": 2e-05, "loss": 0.05660553, "step": 1940 }, { "epoch": 3.882, "grad_norm": 5.917237758636475, "learning_rate": 2e-05, "loss": 0.06455839, "step": 1941 }, { "epoch": 3.884, "grad_norm": 5.430494785308838, "learning_rate": 2e-05, "loss": 0.06062989, "step": 1942 }, { "epoch": 3.886, "grad_norm": 6.11292839050293, "learning_rate": 2e-05, "loss": 0.07694809, "step": 1943 }, { "epoch": 3.888, "grad_norm": 4.382017135620117, "learning_rate": 2e-05, "loss": 0.03964204, "step": 1944 }, { "epoch": 3.89, "grad_norm": 4.592599391937256, "learning_rate": 2e-05, "loss": 0.07914122, "step": 1945 }, { "epoch": 3.892, "grad_norm": 4.502233505249023, "learning_rate": 2e-05, "loss": 0.04914789, "step": 1946 }, { "epoch": 3.894, "grad_norm": 5.339341163635254, "learning_rate": 2e-05, "loss": 0.03365178, "step": 1947 }, { "epoch": 3.896, "grad_norm": 4.545803546905518, "learning_rate": 2e-05, "loss": 0.05197633, "step": 1948 }, { "epoch": 3.898, "grad_norm": 4.67905330657959, "learning_rate": 2e-05, "loss": 0.05348172, "step": 1949 }, { "epoch": 3.9, "grad_norm": 4.033079147338867, "learning_rate": 2e-05, "loss": 0.04147413, "step": 1950 }, { "epoch": 3.902, "grad_norm": 4.924177169799805, "learning_rate": 2e-05, "loss": 0.06341306, "step": 1951 }, { "epoch": 3.904, "grad_norm": 4.910208702087402, "learning_rate": 2e-05, "loss": 0.06936258, "step": 1952 }, { "epoch": 3.906, "grad_norm": 4.295749187469482, "learning_rate": 2e-05, "loss": 0.04492363, "step": 1953 }, { "epoch": 3.908, "grad_norm": 4.704499244689941, "learning_rate": 2e-05, "loss": 0.07343933, "step": 1954 }, { "epoch": 3.91, "grad_norm": 5.122136116027832, "learning_rate": 2e-05, "loss": 0.06493646, "step": 1955 }, { "epoch": 3.912, "grad_norm": 3.4517858028411865, "learning_rate": 2e-05, "loss": 0.03445569, "step": 1956 }, { "epoch": 3.914, "grad_norm": 4.924212455749512, "learning_rate": 2e-05, "loss": 0.06080871, "step": 1957 }, { "epoch": 3.916, "grad_norm": 4.312604904174805, "learning_rate": 2e-05, "loss": 0.05151142, "step": 1958 }, { "epoch": 3.918, "grad_norm": 3.949355363845825, "learning_rate": 2e-05, "loss": 0.04904998, "step": 1959 }, { "epoch": 3.92, "grad_norm": 4.30966329574585, "learning_rate": 2e-05, "loss": 0.05563934, "step": 1960 }, { "epoch": 3.922, "grad_norm": 4.848589897155762, "learning_rate": 2e-05, "loss": 0.06157205, "step": 1961 }, { "epoch": 3.924, "grad_norm": 4.3880205154418945, "learning_rate": 2e-05, "loss": 0.06647885, "step": 1962 }, { "epoch": 3.926, "grad_norm": 4.194542407989502, "learning_rate": 2e-05, "loss": 0.05526104, "step": 1963 }, { "epoch": 3.928, "grad_norm": 5.380399703979492, "learning_rate": 2e-05, "loss": 0.07660615, "step": 1964 }, { "epoch": 3.93, "grad_norm": 5.968742847442627, "learning_rate": 2e-05, "loss": 0.07779554, "step": 1965 }, { "epoch": 3.932, "grad_norm": 5.174100399017334, "learning_rate": 2e-05, "loss": 0.04581838, "step": 1966 }, { "epoch": 3.934, "grad_norm": 6.4657745361328125, "learning_rate": 2e-05, "loss": 0.09916742, "step": 1967 }, { "epoch": 3.936, "grad_norm": 3.961251974105835, "learning_rate": 2e-05, "loss": 0.05596356, "step": 1968 }, { "epoch": 3.9379999999999997, "grad_norm": 5.078394889831543, "learning_rate": 2e-05, "loss": 0.06592689, "step": 1969 }, { "epoch": 3.94, "grad_norm": 4.611362457275391, "learning_rate": 2e-05, "loss": 0.06590241, "step": 1970 }, { "epoch": 3.942, "grad_norm": 4.586516380310059, "learning_rate": 2e-05, "loss": 0.07017249, "step": 1971 }, { "epoch": 3.944, "grad_norm": 3.952863931655884, "learning_rate": 2e-05, "loss": 0.05018761, "step": 1972 }, { "epoch": 3.9459999999999997, "grad_norm": 4.2779130935668945, "learning_rate": 2e-05, "loss": 0.06130858, "step": 1973 }, { "epoch": 3.948, "grad_norm": 5.052500247955322, "learning_rate": 2e-05, "loss": 0.06397467, "step": 1974 }, { "epoch": 3.95, "grad_norm": 5.041876316070557, "learning_rate": 2e-05, "loss": 0.05906052, "step": 1975 }, { "epoch": 3.952, "grad_norm": 5.040378093719482, "learning_rate": 2e-05, "loss": 0.05972778, "step": 1976 }, { "epoch": 3.9539999999999997, "grad_norm": 5.21009635925293, "learning_rate": 2e-05, "loss": 0.05547023, "step": 1977 }, { "epoch": 3.956, "grad_norm": 4.1216816902160645, "learning_rate": 2e-05, "loss": 0.05305094, "step": 1978 }, { "epoch": 3.958, "grad_norm": 4.532718181610107, "learning_rate": 2e-05, "loss": 0.06055233, "step": 1979 }, { "epoch": 3.96, "grad_norm": 4.118321895599365, "learning_rate": 2e-05, "loss": 0.03361683, "step": 1980 }, { "epoch": 3.9619999999999997, "grad_norm": 5.71142053604126, "learning_rate": 2e-05, "loss": 0.07376152, "step": 1981 }, { "epoch": 3.964, "grad_norm": 4.697778224945068, "learning_rate": 2e-05, "loss": 0.04954509, "step": 1982 }, { "epoch": 3.966, "grad_norm": 4.421938896179199, "learning_rate": 2e-05, "loss": 0.05157427, "step": 1983 }, { "epoch": 3.968, "grad_norm": 4.381234645843506, "learning_rate": 2e-05, "loss": 0.05179839, "step": 1984 }, { "epoch": 3.9699999999999998, "grad_norm": 4.958186626434326, "learning_rate": 2e-05, "loss": 0.06459647, "step": 1985 }, { "epoch": 3.972, "grad_norm": 5.1543803215026855, "learning_rate": 2e-05, "loss": 0.05996488, "step": 1986 }, { "epoch": 3.974, "grad_norm": 3.6959214210510254, "learning_rate": 2e-05, "loss": 0.03466905, "step": 1987 }, { "epoch": 3.976, "grad_norm": 4.086817264556885, "learning_rate": 2e-05, "loss": 0.05642924, "step": 1988 }, { "epoch": 3.9779999999999998, "grad_norm": 4.809398174285889, "learning_rate": 2e-05, "loss": 0.07207856, "step": 1989 }, { "epoch": 3.98, "grad_norm": 4.913872718811035, "learning_rate": 2e-05, "loss": 0.06634032, "step": 1990 }, { "epoch": 3.982, "grad_norm": 5.8003830909729, "learning_rate": 2e-05, "loss": 0.05972134, "step": 1991 }, { "epoch": 3.984, "grad_norm": 4.181975364685059, "learning_rate": 2e-05, "loss": 0.04982509, "step": 1992 }, { "epoch": 3.9859999999999998, "grad_norm": 3.7347800731658936, "learning_rate": 2e-05, "loss": 0.04768407, "step": 1993 }, { "epoch": 3.988, "grad_norm": 4.212109565734863, "learning_rate": 2e-05, "loss": 0.04468692, "step": 1994 }, { "epoch": 3.99, "grad_norm": 5.445507526397705, "learning_rate": 2e-05, "loss": 0.05686402, "step": 1995 }, { "epoch": 3.992, "grad_norm": 4.320347309112549, "learning_rate": 2e-05, "loss": 0.04718179, "step": 1996 }, { "epoch": 3.9939999999999998, "grad_norm": 4.698556423187256, "learning_rate": 2e-05, "loss": 0.07051986, "step": 1997 }, { "epoch": 3.996, "grad_norm": 5.363099098205566, "learning_rate": 2e-05, "loss": 0.05213591, "step": 1998 }, { "epoch": 3.998, "grad_norm": 4.749522686004639, "learning_rate": 2e-05, "loss": 0.04798736, "step": 1999 }, { "epoch": 4.0, "grad_norm": 3.9576809406280518, "learning_rate": 2e-05, "loss": 0.04979634, "step": 2000 }, { "epoch": 4.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 0.98, "AngleClassification_3": 0.7784431137724551, "Equal_1": 0.946, "Equal_2": 0.8323353293413174, "Equal_3": 0.7385229540918163, "LineComparison_1": 1.0, "LineComparison_2": 0.9680638722554891, "LineComparison_3": 0.8942115768463074, "Parallel_1": 0.9318637274549099, "Parallel_2": 0.9438877755511023, "Parallel_3": 0.684, "Perpendicular_1": 0.94, "Perpendicular_2": 0.496, "Perpendicular_3": 0.18537074148296592, "PointLiesOnCircle_1": 0.9926519706078825, "PointLiesOnCircle_2": 0.9892000000000001, "PointLiesOnCircle_3": 0.9627333333333333, "PointLiesOnLine_1": 0.9739478957915831, "PointLiesOnLine_2": 0.7054108216432866, "PointLiesOnLine_3": 0.3333333333333333 }, "eval_runtime": 223.6592, "eval_samples_per_second": 46.946, "eval_steps_per_second": 0.939, "step": 2000 }, { "epoch": 4.002, "grad_norm": 4.87682580947876, "learning_rate": 2e-05, "loss": 0.05885933, "step": 2001 }, { "epoch": 4.004, "grad_norm": 4.577075958251953, "learning_rate": 2e-05, "loss": 0.05700376, "step": 2002 }, { "epoch": 4.006, "grad_norm": 13.10959529876709, "learning_rate": 2e-05, "loss": 0.06024166, "step": 2003 }, { "epoch": 4.008, "grad_norm": 3.8382177352905273, "learning_rate": 2e-05, "loss": 0.05115245, "step": 2004 }, { "epoch": 4.01, "grad_norm": 4.437366962432861, "learning_rate": 2e-05, "loss": 0.05110224, "step": 2005 }, { "epoch": 4.012, "grad_norm": 5.228863716125488, "learning_rate": 2e-05, "loss": 0.07374594, "step": 2006 }, { "epoch": 4.014, "grad_norm": 4.500857353210449, "learning_rate": 2e-05, "loss": 0.05655966, "step": 2007 }, { "epoch": 4.016, "grad_norm": 57.15351104736328, "learning_rate": 2e-05, "loss": 0.0582548, "step": 2008 }, { "epoch": 4.018, "grad_norm": 8.000506401062012, "learning_rate": 2e-05, "loss": 0.06218388, "step": 2009 }, { "epoch": 4.02, "grad_norm": 33.0832633972168, "learning_rate": 2e-05, "loss": 0.06539743, "step": 2010 }, { "epoch": 4.022, "grad_norm": 4.713083744049072, "learning_rate": 2e-05, "loss": 0.07962072, "step": 2011 }, { "epoch": 4.024, "grad_norm": 3.576230049133301, "learning_rate": 2e-05, "loss": 0.05096898, "step": 2012 }, { "epoch": 4.026, "grad_norm": 3.1422133445739746, "learning_rate": 2e-05, "loss": 0.0723597, "step": 2013 }, { "epoch": 4.028, "grad_norm": 4.242262840270996, "learning_rate": 2e-05, "loss": 0.07169779, "step": 2014 }, { "epoch": 4.03, "grad_norm": 2.4443042278289795, "learning_rate": 2e-05, "loss": 0.08562068, "step": 2015 }, { "epoch": 4.032, "grad_norm": 2.5326335430145264, "learning_rate": 2e-05, "loss": 0.06857469, "step": 2016 }, { "epoch": 4.034, "grad_norm": 1.805072546005249, "learning_rate": 2e-05, "loss": 0.05037333, "step": 2017 }, { "epoch": 4.036, "grad_norm": 3.495650053024292, "learning_rate": 2e-05, "loss": 0.09807852, "step": 2018 }, { "epoch": 4.038, "grad_norm": 1.743381381034851, "learning_rate": 2e-05, "loss": 0.06241225, "step": 2019 }, { "epoch": 4.04, "grad_norm": 2.9195055961608887, "learning_rate": 2e-05, "loss": 0.06386232, "step": 2020 }, { "epoch": 4.042, "grad_norm": 2.4798526763916016, "learning_rate": 2e-05, "loss": 0.06865426, "step": 2021 }, { "epoch": 4.044, "grad_norm": 2.7420737743377686, "learning_rate": 2e-05, "loss": 0.06213267, "step": 2022 }, { "epoch": 4.046, "grad_norm": 1.8497133255004883, "learning_rate": 2e-05, "loss": 0.06332147, "step": 2023 }, { "epoch": 4.048, "grad_norm": 2.5937910079956055, "learning_rate": 2e-05, "loss": 0.08703558, "step": 2024 }, { "epoch": 4.05, "grad_norm": 3.5940914154052734, "learning_rate": 2e-05, "loss": 0.10185431, "step": 2025 }, { "epoch": 4.052, "grad_norm": 1.8496922254562378, "learning_rate": 2e-05, "loss": 0.05430729, "step": 2026 }, { "epoch": 4.054, "grad_norm": 1.6253150701522827, "learning_rate": 2e-05, "loss": 0.05013484, "step": 2027 }, { "epoch": 4.056, "grad_norm": 1.8880776166915894, "learning_rate": 2e-05, "loss": 0.06977751, "step": 2028 }, { "epoch": 4.058, "grad_norm": 3.538519859313965, "learning_rate": 2e-05, "loss": 0.09098226, "step": 2029 }, { "epoch": 4.06, "grad_norm": 1.6160287857055664, "learning_rate": 2e-05, "loss": 0.06357321, "step": 2030 }, { "epoch": 4.062, "grad_norm": 1.4843064546585083, "learning_rate": 2e-05, "loss": 0.06167606, "step": 2031 }, { "epoch": 4.064, "grad_norm": 2.1245222091674805, "learning_rate": 2e-05, "loss": 0.0667711, "step": 2032 }, { "epoch": 4.066, "grad_norm": 2.2944188117980957, "learning_rate": 2e-05, "loss": 0.07098155, "step": 2033 }, { "epoch": 4.068, "grad_norm": 2.167443037033081, "learning_rate": 2e-05, "loss": 0.06893489, "step": 2034 }, { "epoch": 4.07, "grad_norm": 1.9996265172958374, "learning_rate": 2e-05, "loss": 0.06718732, "step": 2035 }, { "epoch": 4.072, "grad_norm": 2.0243980884552, "learning_rate": 2e-05, "loss": 0.06886362, "step": 2036 }, { "epoch": 4.074, "grad_norm": 2.7469656467437744, "learning_rate": 2e-05, "loss": 0.06434768, "step": 2037 }, { "epoch": 4.076, "grad_norm": 2.481520175933838, "learning_rate": 2e-05, "loss": 0.08091324, "step": 2038 }, { "epoch": 4.078, "grad_norm": 2.7998697757720947, "learning_rate": 2e-05, "loss": 0.08926216, "step": 2039 }, { "epoch": 4.08, "grad_norm": 2.5998570919036865, "learning_rate": 2e-05, "loss": 0.10259391, "step": 2040 }, { "epoch": 4.082, "grad_norm": 2.2952234745025635, "learning_rate": 2e-05, "loss": 0.06699659, "step": 2041 }, { "epoch": 4.084, "grad_norm": 2.380432367324829, "learning_rate": 2e-05, "loss": 0.08693619, "step": 2042 }, { "epoch": 4.086, "grad_norm": 2.7440905570983887, "learning_rate": 2e-05, "loss": 0.08284201, "step": 2043 }, { "epoch": 4.088, "grad_norm": 2.5323734283447266, "learning_rate": 2e-05, "loss": 0.08309086, "step": 2044 }, { "epoch": 4.09, "grad_norm": 2.363617181777954, "learning_rate": 2e-05, "loss": 0.06716576, "step": 2045 }, { "epoch": 4.092, "grad_norm": 1.534680724143982, "learning_rate": 2e-05, "loss": 0.05576162, "step": 2046 }, { "epoch": 4.094, "grad_norm": 2.593501091003418, "learning_rate": 2e-05, "loss": 0.08169766, "step": 2047 }, { "epoch": 4.096, "grad_norm": 1.9508519172668457, "learning_rate": 2e-05, "loss": 0.09850444, "step": 2048 }, { "epoch": 4.098, "grad_norm": 2.4077203273773193, "learning_rate": 2e-05, "loss": 0.07383195, "step": 2049 }, { "epoch": 4.1, "grad_norm": 2.6462998390197754, "learning_rate": 2e-05, "loss": 0.05750649, "step": 2050 }, { "epoch": 4.102, "grad_norm": 3.3773715496063232, "learning_rate": 2e-05, "loss": 0.06155496, "step": 2051 }, { "epoch": 4.104, "grad_norm": 2.1335482597351074, "learning_rate": 2e-05, "loss": 0.05890286, "step": 2052 }, { "epoch": 4.106, "grad_norm": 1.8669164180755615, "learning_rate": 2e-05, "loss": 0.08220725, "step": 2053 }, { "epoch": 4.108, "grad_norm": 2.034297227859497, "learning_rate": 2e-05, "loss": 0.06804092, "step": 2054 }, { "epoch": 4.11, "grad_norm": 2.931027412414551, "learning_rate": 2e-05, "loss": 0.09182949, "step": 2055 }, { "epoch": 4.112, "grad_norm": 4.534605503082275, "learning_rate": 2e-05, "loss": 0.0764512, "step": 2056 }, { "epoch": 4.114, "grad_norm": 1.9897863864898682, "learning_rate": 2e-05, "loss": 0.06398592, "step": 2057 }, { "epoch": 4.116, "grad_norm": 2.5322673320770264, "learning_rate": 2e-05, "loss": 0.07038671, "step": 2058 }, { "epoch": 4.118, "grad_norm": 1.61471426486969, "learning_rate": 2e-05, "loss": 0.05462667, "step": 2059 }, { "epoch": 4.12, "grad_norm": 2.2182600498199463, "learning_rate": 2e-05, "loss": 0.07155888, "step": 2060 }, { "epoch": 4.122, "grad_norm": 3.5120480060577393, "learning_rate": 2e-05, "loss": 0.08027807, "step": 2061 }, { "epoch": 4.124, "grad_norm": 3.1703994274139404, "learning_rate": 2e-05, "loss": 0.06480147, "step": 2062 }, { "epoch": 4.126, "grad_norm": 3.2683236598968506, "learning_rate": 2e-05, "loss": 0.07983235, "step": 2063 }, { "epoch": 4.128, "grad_norm": 2.015371322631836, "learning_rate": 2e-05, "loss": 0.05325734, "step": 2064 }, { "epoch": 4.13, "grad_norm": 2.296186685562134, "learning_rate": 2e-05, "loss": 0.07470506, "step": 2065 }, { "epoch": 4.132, "grad_norm": 2.5576698780059814, "learning_rate": 2e-05, "loss": 0.04827991, "step": 2066 }, { "epoch": 4.134, "grad_norm": 2.7494218349456787, "learning_rate": 2e-05, "loss": 0.08124104, "step": 2067 }, { "epoch": 4.136, "grad_norm": 3.5427353382110596, "learning_rate": 2e-05, "loss": 0.05170012, "step": 2068 }, { "epoch": 4.138, "grad_norm": 2.4055335521698, "learning_rate": 2e-05, "loss": 0.08799072, "step": 2069 }, { "epoch": 4.14, "grad_norm": 2.170450210571289, "learning_rate": 2e-05, "loss": 0.05958853, "step": 2070 }, { "epoch": 4.142, "grad_norm": 2.015624523162842, "learning_rate": 2e-05, "loss": 0.0841803, "step": 2071 }, { "epoch": 4.144, "grad_norm": 1.3869465589523315, "learning_rate": 2e-05, "loss": 0.05039412, "step": 2072 }, { "epoch": 4.146, "grad_norm": 2.3391127586364746, "learning_rate": 2e-05, "loss": 0.06459662, "step": 2073 }, { "epoch": 4.148, "grad_norm": 1.7965855598449707, "learning_rate": 2e-05, "loss": 0.0795145, "step": 2074 }, { "epoch": 4.15, "grad_norm": 2.2106170654296875, "learning_rate": 2e-05, "loss": 0.06557088, "step": 2075 }, { "epoch": 4.152, "grad_norm": 1.9331780672073364, "learning_rate": 2e-05, "loss": 0.08635149, "step": 2076 }, { "epoch": 4.154, "grad_norm": 1.7590290307998657, "learning_rate": 2e-05, "loss": 0.0543587, "step": 2077 }, { "epoch": 4.156, "grad_norm": 2.947974681854248, "learning_rate": 2e-05, "loss": 0.05047835, "step": 2078 }, { "epoch": 4.158, "grad_norm": 2.626875162124634, "learning_rate": 2e-05, "loss": 0.0750833, "step": 2079 }, { "epoch": 4.16, "grad_norm": 3.0683748722076416, "learning_rate": 2e-05, "loss": 0.09185554, "step": 2080 }, { "epoch": 4.162, "grad_norm": 1.969033122062683, "learning_rate": 2e-05, "loss": 0.0946613, "step": 2081 }, { "epoch": 4.164, "grad_norm": 2.180485725402832, "learning_rate": 2e-05, "loss": 0.06498498, "step": 2082 }, { "epoch": 4.166, "grad_norm": 2.3249173164367676, "learning_rate": 2e-05, "loss": 0.05043422, "step": 2083 }, { "epoch": 4.168, "grad_norm": 1.2160170078277588, "learning_rate": 2e-05, "loss": 0.04425203, "step": 2084 }, { "epoch": 4.17, "grad_norm": 2.599290132522583, "learning_rate": 2e-05, "loss": 0.06444152, "step": 2085 }, { "epoch": 4.172, "grad_norm": 2.4178757667541504, "learning_rate": 2e-05, "loss": 0.09480008, "step": 2086 }, { "epoch": 4.174, "grad_norm": 2.076537609100342, "learning_rate": 2e-05, "loss": 0.08658104, "step": 2087 }, { "epoch": 4.176, "grad_norm": 2.091676950454712, "learning_rate": 2e-05, "loss": 0.07611567, "step": 2088 }, { "epoch": 4.178, "grad_norm": 2.880066156387329, "learning_rate": 2e-05, "loss": 0.08253571, "step": 2089 }, { "epoch": 4.18, "grad_norm": 1.829533338546753, "learning_rate": 2e-05, "loss": 0.06991093, "step": 2090 }, { "epoch": 4.182, "grad_norm": 2.014244318008423, "learning_rate": 2e-05, "loss": 0.07727417, "step": 2091 }, { "epoch": 4.184, "grad_norm": 1.5599557161331177, "learning_rate": 2e-05, "loss": 0.05468753, "step": 2092 }, { "epoch": 4.186, "grad_norm": 1.6581332683563232, "learning_rate": 2e-05, "loss": 0.05904954, "step": 2093 }, { "epoch": 4.188, "grad_norm": 1.7423419952392578, "learning_rate": 2e-05, "loss": 0.06548748, "step": 2094 }, { "epoch": 4.19, "grad_norm": 1.3116430044174194, "learning_rate": 2e-05, "loss": 0.0457895, "step": 2095 }, { "epoch": 4.192, "grad_norm": 2.50366473197937, "learning_rate": 2e-05, "loss": 0.07519452, "step": 2096 }, { "epoch": 4.194, "grad_norm": 2.794949769973755, "learning_rate": 2e-05, "loss": 0.0696205, "step": 2097 }, { "epoch": 4.196, "grad_norm": 1.944620132446289, "learning_rate": 2e-05, "loss": 0.05766869, "step": 2098 }, { "epoch": 4.198, "grad_norm": 2.2298715114593506, "learning_rate": 2e-05, "loss": 0.05986073, "step": 2099 }, { "epoch": 4.2, "grad_norm": 2.2252049446105957, "learning_rate": 2e-05, "loss": 0.06690154, "step": 2100 }, { "epoch": 4.202, "grad_norm": 2.0310893058776855, "learning_rate": 2e-05, "loss": 0.07037569, "step": 2101 }, { "epoch": 4.204, "grad_norm": 1.2759891748428345, "learning_rate": 2e-05, "loss": 0.04330191, "step": 2102 }, { "epoch": 4.206, "grad_norm": 3.190371036529541, "learning_rate": 2e-05, "loss": 0.06773426, "step": 2103 }, { "epoch": 4.208, "grad_norm": 1.6134597063064575, "learning_rate": 2e-05, "loss": 0.05151179, "step": 2104 }, { "epoch": 4.21, "grad_norm": 2.1029112339019775, "learning_rate": 2e-05, "loss": 0.06947412, "step": 2105 }, { "epoch": 4.212, "grad_norm": 1.3123047351837158, "learning_rate": 2e-05, "loss": 0.05057207, "step": 2106 }, { "epoch": 4.214, "grad_norm": 1.9851627349853516, "learning_rate": 2e-05, "loss": 0.06849574, "step": 2107 }, { "epoch": 4.216, "grad_norm": 1.4697065353393555, "learning_rate": 2e-05, "loss": 0.05435709, "step": 2108 }, { "epoch": 4.218, "grad_norm": 1.722690463066101, "learning_rate": 2e-05, "loss": 0.04967844, "step": 2109 }, { "epoch": 4.22, "grad_norm": 1.4455434083938599, "learning_rate": 2e-05, "loss": 0.05291413, "step": 2110 }, { "epoch": 4.222, "grad_norm": 1.4631705284118652, "learning_rate": 2e-05, "loss": 0.04949733, "step": 2111 }, { "epoch": 4.224, "grad_norm": 1.6640645265579224, "learning_rate": 2e-05, "loss": 0.06776321, "step": 2112 }, { "epoch": 4.226, "grad_norm": 2.497854471206665, "learning_rate": 2e-05, "loss": 0.095273, "step": 2113 }, { "epoch": 4.228, "grad_norm": 3.756927967071533, "learning_rate": 2e-05, "loss": 0.07572605, "step": 2114 }, { "epoch": 4.23, "grad_norm": 2.8405256271362305, "learning_rate": 2e-05, "loss": 0.04320454, "step": 2115 }, { "epoch": 4.232, "grad_norm": 3.5213704109191895, "learning_rate": 2e-05, "loss": 0.07051377, "step": 2116 }, { "epoch": 4.234, "grad_norm": 1.9120938777923584, "learning_rate": 2e-05, "loss": 0.04904056, "step": 2117 }, { "epoch": 4.236, "grad_norm": 2.536379098892212, "learning_rate": 2e-05, "loss": 0.06991646, "step": 2118 }, { "epoch": 4.2379999999999995, "grad_norm": 1.3452427387237549, "learning_rate": 2e-05, "loss": 0.04128489, "step": 2119 }, { "epoch": 4.24, "grad_norm": 1.7226845026016235, "learning_rate": 2e-05, "loss": 0.05443864, "step": 2120 }, { "epoch": 4.242, "grad_norm": 2.233675003051758, "learning_rate": 2e-05, "loss": 0.07435696, "step": 2121 }, { "epoch": 4.244, "grad_norm": 1.4224317073822021, "learning_rate": 2e-05, "loss": 0.0513529, "step": 2122 }, { "epoch": 4.246, "grad_norm": 2.3233835697174072, "learning_rate": 2e-05, "loss": 0.07225094, "step": 2123 }, { "epoch": 4.248, "grad_norm": 1.8742501735687256, "learning_rate": 2e-05, "loss": 0.06230294, "step": 2124 }, { "epoch": 4.25, "grad_norm": 1.8674055337905884, "learning_rate": 2e-05, "loss": 0.05324207, "step": 2125 }, { "epoch": 4.252, "grad_norm": 0.9802767634391785, "learning_rate": 2e-05, "loss": 0.02811433, "step": 2126 }, { "epoch": 4.254, "grad_norm": 2.4101953506469727, "learning_rate": 2e-05, "loss": 0.0640424, "step": 2127 }, { "epoch": 4.256, "grad_norm": 2.3560965061187744, "learning_rate": 2e-05, "loss": 0.07676764, "step": 2128 }, { "epoch": 4.258, "grad_norm": 1.9370006322860718, "learning_rate": 2e-05, "loss": 0.04682978, "step": 2129 }, { "epoch": 4.26, "grad_norm": 1.4009041786193848, "learning_rate": 2e-05, "loss": 0.04357517, "step": 2130 }, { "epoch": 4.2620000000000005, "grad_norm": 1.6257710456848145, "learning_rate": 2e-05, "loss": 0.05799401, "step": 2131 }, { "epoch": 4.264, "grad_norm": 1.8044545650482178, "learning_rate": 2e-05, "loss": 0.05761299, "step": 2132 }, { "epoch": 4.266, "grad_norm": 2.374757766723633, "learning_rate": 2e-05, "loss": 0.05877838, "step": 2133 }, { "epoch": 4.268, "grad_norm": 2.391726493835449, "learning_rate": 2e-05, "loss": 0.07936852, "step": 2134 }, { "epoch": 4.27, "grad_norm": 1.6041451692581177, "learning_rate": 2e-05, "loss": 0.0719478, "step": 2135 }, { "epoch": 4.272, "grad_norm": 2.387320041656494, "learning_rate": 2e-05, "loss": 0.06234674, "step": 2136 }, { "epoch": 4.274, "grad_norm": 1.5251657962799072, "learning_rate": 2e-05, "loss": 0.04178799, "step": 2137 }, { "epoch": 4.276, "grad_norm": 3.068707227706909, "learning_rate": 2e-05, "loss": 0.08280512, "step": 2138 }, { "epoch": 4.2780000000000005, "grad_norm": 2.4442038536071777, "learning_rate": 2e-05, "loss": 0.07348214, "step": 2139 }, { "epoch": 4.28, "grad_norm": 1.566960096359253, "learning_rate": 2e-05, "loss": 0.06027886, "step": 2140 }, { "epoch": 4.282, "grad_norm": 2.2830843925476074, "learning_rate": 2e-05, "loss": 0.06133959, "step": 2141 }, { "epoch": 4.284, "grad_norm": 2.984172821044922, "learning_rate": 2e-05, "loss": 0.06118892, "step": 2142 }, { "epoch": 4.286, "grad_norm": 1.568138599395752, "learning_rate": 2e-05, "loss": 0.06064623, "step": 2143 }, { "epoch": 4.288, "grad_norm": 1.4556996822357178, "learning_rate": 2e-05, "loss": 0.06521779, "step": 2144 }, { "epoch": 4.29, "grad_norm": 1.815783977508545, "learning_rate": 2e-05, "loss": 0.06455804, "step": 2145 }, { "epoch": 4.292, "grad_norm": 2.1943249702453613, "learning_rate": 2e-05, "loss": 0.0650651, "step": 2146 }, { "epoch": 4.294, "grad_norm": 2.6643364429473877, "learning_rate": 2e-05, "loss": 0.06274134, "step": 2147 }, { "epoch": 4.296, "grad_norm": 2.5362017154693604, "learning_rate": 2e-05, "loss": 0.06957991, "step": 2148 }, { "epoch": 4.298, "grad_norm": 2.280350923538208, "learning_rate": 2e-05, "loss": 0.07009935, "step": 2149 }, { "epoch": 4.3, "grad_norm": 2.6491894721984863, "learning_rate": 2e-05, "loss": 0.05718841, "step": 2150 }, { "epoch": 4.302, "grad_norm": 2.2215709686279297, "learning_rate": 2e-05, "loss": 0.06770701, "step": 2151 }, { "epoch": 4.304, "grad_norm": 1.2758034467697144, "learning_rate": 2e-05, "loss": 0.05514861, "step": 2152 }, { "epoch": 4.306, "grad_norm": 1.4381458759307861, "learning_rate": 2e-05, "loss": 0.04661397, "step": 2153 }, { "epoch": 4.308, "grad_norm": 1.3977556228637695, "learning_rate": 2e-05, "loss": 0.035157, "step": 2154 }, { "epoch": 4.31, "grad_norm": 3.6932520866394043, "learning_rate": 2e-05, "loss": 0.07413986, "step": 2155 }, { "epoch": 4.312, "grad_norm": 1.746433973312378, "learning_rate": 2e-05, "loss": 0.059032, "step": 2156 }, { "epoch": 4.314, "grad_norm": 1.339585304260254, "learning_rate": 2e-05, "loss": 0.05559451, "step": 2157 }, { "epoch": 4.316, "grad_norm": 2.1844289302825928, "learning_rate": 2e-05, "loss": 0.06665906, "step": 2158 }, { "epoch": 4.318, "grad_norm": 1.8123836517333984, "learning_rate": 2e-05, "loss": 0.05686476, "step": 2159 }, { "epoch": 4.32, "grad_norm": 1.2558856010437012, "learning_rate": 2e-05, "loss": 0.05555471, "step": 2160 }, { "epoch": 4.322, "grad_norm": 2.233225107192993, "learning_rate": 2e-05, "loss": 0.04429844, "step": 2161 }, { "epoch": 4.324, "grad_norm": 1.5910862684249878, "learning_rate": 2e-05, "loss": 0.05141015, "step": 2162 }, { "epoch": 4.326, "grad_norm": 2.0568971633911133, "learning_rate": 2e-05, "loss": 0.06358062, "step": 2163 }, { "epoch": 4.328, "grad_norm": 2.15842604637146, "learning_rate": 2e-05, "loss": 0.05435416, "step": 2164 }, { "epoch": 4.33, "grad_norm": 2.0609073638916016, "learning_rate": 2e-05, "loss": 0.09611841, "step": 2165 }, { "epoch": 4.332, "grad_norm": 2.6531975269317627, "learning_rate": 2e-05, "loss": 0.03329624, "step": 2166 }, { "epoch": 4.334, "grad_norm": 2.7568368911743164, "learning_rate": 2e-05, "loss": 0.08146547, "step": 2167 }, { "epoch": 4.336, "grad_norm": 2.043020248413086, "learning_rate": 2e-05, "loss": 0.04607582, "step": 2168 }, { "epoch": 4.338, "grad_norm": 2.695254325866699, "learning_rate": 2e-05, "loss": 0.06282659, "step": 2169 }, { "epoch": 4.34, "grad_norm": 2.703526496887207, "learning_rate": 2e-05, "loss": 0.06143557, "step": 2170 }, { "epoch": 4.342, "grad_norm": 1.7367357015609741, "learning_rate": 2e-05, "loss": 0.05527718, "step": 2171 }, { "epoch": 4.344, "grad_norm": 3.0884509086608887, "learning_rate": 2e-05, "loss": 0.06860717, "step": 2172 }, { "epoch": 4.346, "grad_norm": 3.0278186798095703, "learning_rate": 2e-05, "loss": 0.06128006, "step": 2173 }, { "epoch": 4.348, "grad_norm": 1.9896849393844604, "learning_rate": 2e-05, "loss": 0.07287191, "step": 2174 }, { "epoch": 4.35, "grad_norm": 3.444230556488037, "learning_rate": 2e-05, "loss": 0.08046679, "step": 2175 }, { "epoch": 4.352, "grad_norm": 3.0761396884918213, "learning_rate": 2e-05, "loss": 0.06595425, "step": 2176 }, { "epoch": 4.354, "grad_norm": 1.824507474899292, "learning_rate": 2e-05, "loss": 0.05125204, "step": 2177 }, { "epoch": 4.356, "grad_norm": 2.589108943939209, "learning_rate": 2e-05, "loss": 0.06869285, "step": 2178 }, { "epoch": 4.358, "grad_norm": 1.6224157810211182, "learning_rate": 2e-05, "loss": 0.04536261, "step": 2179 }, { "epoch": 4.36, "grad_norm": 1.8363970518112183, "learning_rate": 2e-05, "loss": 0.0538354, "step": 2180 }, { "epoch": 4.362, "grad_norm": 1.5119564533233643, "learning_rate": 2e-05, "loss": 0.05499202, "step": 2181 }, { "epoch": 4.364, "grad_norm": 2.467362880706787, "learning_rate": 2e-05, "loss": 0.07913838, "step": 2182 }, { "epoch": 4.366, "grad_norm": 2.4719889163970947, "learning_rate": 2e-05, "loss": 0.03990192, "step": 2183 }, { "epoch": 4.368, "grad_norm": 2.276611328125, "learning_rate": 2e-05, "loss": 0.06973057, "step": 2184 }, { "epoch": 4.37, "grad_norm": 1.7673412561416626, "learning_rate": 2e-05, "loss": 0.04764725, "step": 2185 }, { "epoch": 4.372, "grad_norm": 1.6289145946502686, "learning_rate": 2e-05, "loss": 0.05207074, "step": 2186 }, { "epoch": 4.374, "grad_norm": 2.181138515472412, "learning_rate": 2e-05, "loss": 0.07089337, "step": 2187 }, { "epoch": 4.376, "grad_norm": 1.2623659372329712, "learning_rate": 2e-05, "loss": 0.04139027, "step": 2188 }, { "epoch": 4.378, "grad_norm": 1.4012755155563354, "learning_rate": 2e-05, "loss": 0.0553776, "step": 2189 }, { "epoch": 4.38, "grad_norm": 1.7510896921157837, "learning_rate": 2e-05, "loss": 0.0533141, "step": 2190 }, { "epoch": 4.382, "grad_norm": 2.804046869277954, "learning_rate": 2e-05, "loss": 0.08163269, "step": 2191 }, { "epoch": 4.384, "grad_norm": 2.889782667160034, "learning_rate": 2e-05, "loss": 0.07161613, "step": 2192 }, { "epoch": 4.386, "grad_norm": 1.2898098230361938, "learning_rate": 2e-05, "loss": 0.05927003, "step": 2193 }, { "epoch": 4.388, "grad_norm": 2.9435925483703613, "learning_rate": 2e-05, "loss": 0.06617457, "step": 2194 }, { "epoch": 4.39, "grad_norm": 1.2243132591247559, "learning_rate": 2e-05, "loss": 0.05156112, "step": 2195 }, { "epoch": 4.392, "grad_norm": 1.2882531881332397, "learning_rate": 2e-05, "loss": 0.03877871, "step": 2196 }, { "epoch": 4.394, "grad_norm": 1.0483880043029785, "learning_rate": 2e-05, "loss": 0.04459372, "step": 2197 }, { "epoch": 4.396, "grad_norm": 1.307885766029358, "learning_rate": 2e-05, "loss": 0.05245619, "step": 2198 }, { "epoch": 4.398, "grad_norm": 1.6140661239624023, "learning_rate": 2e-05, "loss": 0.03918041, "step": 2199 }, { "epoch": 4.4, "grad_norm": 2.105602264404297, "learning_rate": 2e-05, "loss": 0.04122904, "step": 2200 }, { "epoch": 4.402, "grad_norm": 1.8410885334014893, "learning_rate": 2e-05, "loss": 0.06911102, "step": 2201 }, { "epoch": 4.404, "grad_norm": 1.811063528060913, "learning_rate": 2e-05, "loss": 0.04315716, "step": 2202 }, { "epoch": 4.406, "grad_norm": 1.459837794303894, "learning_rate": 2e-05, "loss": 0.04318622, "step": 2203 }, { "epoch": 4.408, "grad_norm": 1.4175668954849243, "learning_rate": 2e-05, "loss": 0.06464553, "step": 2204 }, { "epoch": 4.41, "grad_norm": 2.9506874084472656, "learning_rate": 2e-05, "loss": 0.0487679, "step": 2205 }, { "epoch": 4.412, "grad_norm": 1.5062443017959595, "learning_rate": 2e-05, "loss": 0.03637538, "step": 2206 }, { "epoch": 4.414, "grad_norm": 1.4554049968719482, "learning_rate": 2e-05, "loss": 0.04301089, "step": 2207 }, { "epoch": 4.416, "grad_norm": 1.9715466499328613, "learning_rate": 2e-05, "loss": 0.04633316, "step": 2208 }, { "epoch": 4.418, "grad_norm": 1.1670140027999878, "learning_rate": 2e-05, "loss": 0.03505767, "step": 2209 }, { "epoch": 4.42, "grad_norm": 1.5463470220565796, "learning_rate": 2e-05, "loss": 0.05089226, "step": 2210 }, { "epoch": 4.422, "grad_norm": 2.2836222648620605, "learning_rate": 2e-05, "loss": 0.04239431, "step": 2211 }, { "epoch": 4.424, "grad_norm": 1.6157147884368896, "learning_rate": 2e-05, "loss": 0.05486425, "step": 2212 }, { "epoch": 4.426, "grad_norm": 1.8057782649993896, "learning_rate": 2e-05, "loss": 0.07029882, "step": 2213 }, { "epoch": 4.428, "grad_norm": 1.3141433000564575, "learning_rate": 2e-05, "loss": 0.03010893, "step": 2214 }, { "epoch": 4.43, "grad_norm": 2.2373409271240234, "learning_rate": 2e-05, "loss": 0.05205792, "step": 2215 }, { "epoch": 4.432, "grad_norm": 3.2119343280792236, "learning_rate": 2e-05, "loss": 0.08154503, "step": 2216 }, { "epoch": 4.434, "grad_norm": 1.5069502592086792, "learning_rate": 2e-05, "loss": 0.05093011, "step": 2217 }, { "epoch": 4.436, "grad_norm": 1.2729697227478027, "learning_rate": 2e-05, "loss": 0.03538412, "step": 2218 }, { "epoch": 4.438, "grad_norm": 1.1460471153259277, "learning_rate": 2e-05, "loss": 0.0347873, "step": 2219 }, { "epoch": 4.44, "grad_norm": 1.7281122207641602, "learning_rate": 2e-05, "loss": 0.07623666, "step": 2220 }, { "epoch": 4.442, "grad_norm": 1.671638011932373, "learning_rate": 2e-05, "loss": 0.0428994, "step": 2221 }, { "epoch": 4.444, "grad_norm": 2.163966655731201, "learning_rate": 2e-05, "loss": 0.06253745, "step": 2222 }, { "epoch": 4.446, "grad_norm": 1.8242230415344238, "learning_rate": 2e-05, "loss": 0.05540806, "step": 2223 }, { "epoch": 4.448, "grad_norm": 2.4013001918792725, "learning_rate": 2e-05, "loss": 0.04804887, "step": 2224 }, { "epoch": 4.45, "grad_norm": 1.0654358863830566, "learning_rate": 2e-05, "loss": 0.03904779, "step": 2225 }, { "epoch": 4.452, "grad_norm": 1.4060930013656616, "learning_rate": 2e-05, "loss": 0.04274734, "step": 2226 }, { "epoch": 4.454, "grad_norm": 1.313794732093811, "learning_rate": 2e-05, "loss": 0.05259756, "step": 2227 }, { "epoch": 4.456, "grad_norm": 2.7005715370178223, "learning_rate": 2e-05, "loss": 0.07153238, "step": 2228 }, { "epoch": 4.458, "grad_norm": 1.6268199682235718, "learning_rate": 2e-05, "loss": 0.04180001, "step": 2229 }, { "epoch": 4.46, "grad_norm": 2.5034005641937256, "learning_rate": 2e-05, "loss": 0.06206177, "step": 2230 }, { "epoch": 4.462, "grad_norm": 1.5140177011489868, "learning_rate": 2e-05, "loss": 0.04124136, "step": 2231 }, { "epoch": 4.464, "grad_norm": 1.5407781600952148, "learning_rate": 2e-05, "loss": 0.06411501, "step": 2232 }, { "epoch": 4.466, "grad_norm": 1.6786034107208252, "learning_rate": 2e-05, "loss": 0.04331366, "step": 2233 }, { "epoch": 4.468, "grad_norm": 1.310099720954895, "learning_rate": 2e-05, "loss": 0.04765248, "step": 2234 }, { "epoch": 4.47, "grad_norm": 1.5286815166473389, "learning_rate": 2e-05, "loss": 0.06607872, "step": 2235 }, { "epoch": 4.4719999999999995, "grad_norm": 1.864086389541626, "learning_rate": 2e-05, "loss": 0.06159226, "step": 2236 }, { "epoch": 4.474, "grad_norm": 2.2428743839263916, "learning_rate": 2e-05, "loss": 0.07333539, "step": 2237 }, { "epoch": 4.476, "grad_norm": 1.565376877784729, "learning_rate": 2e-05, "loss": 0.07345209, "step": 2238 }, { "epoch": 4.478, "grad_norm": 1.6671366691589355, "learning_rate": 2e-05, "loss": 0.06513897, "step": 2239 }, { "epoch": 4.48, "grad_norm": 1.5176079273223877, "learning_rate": 2e-05, "loss": 0.06410583, "step": 2240 }, { "epoch": 4.482, "grad_norm": 1.4103423357009888, "learning_rate": 2e-05, "loss": 0.03861617, "step": 2241 }, { "epoch": 4.484, "grad_norm": 1.6178919076919556, "learning_rate": 2e-05, "loss": 0.04136807, "step": 2242 }, { "epoch": 4.486, "grad_norm": 1.8763679265975952, "learning_rate": 2e-05, "loss": 0.03785964, "step": 2243 }, { "epoch": 4.4879999999999995, "grad_norm": 1.423840880393982, "learning_rate": 2e-05, "loss": 0.05916028, "step": 2244 }, { "epoch": 4.49, "grad_norm": 1.3803223371505737, "learning_rate": 2e-05, "loss": 0.04570113, "step": 2245 }, { "epoch": 4.492, "grad_norm": 1.5447231531143188, "learning_rate": 2e-05, "loss": 0.04154298, "step": 2246 }, { "epoch": 4.494, "grad_norm": 1.5305242538452148, "learning_rate": 2e-05, "loss": 0.05297501, "step": 2247 }, { "epoch": 4.496, "grad_norm": 2.229564905166626, "learning_rate": 2e-05, "loss": 0.06438207, "step": 2248 }, { "epoch": 4.498, "grad_norm": 2.015815019607544, "learning_rate": 2e-05, "loss": 0.05926164, "step": 2249 }, { "epoch": 4.5, "grad_norm": 1.980627179145813, "learning_rate": 2e-05, "loss": 0.05198442, "step": 2250 }, { "epoch": 4.502, "grad_norm": 1.3818317651748657, "learning_rate": 2e-05, "loss": 0.03457716, "step": 2251 }, { "epoch": 4.504, "grad_norm": 1.6316359043121338, "learning_rate": 2e-05, "loss": 0.0562136, "step": 2252 }, { "epoch": 4.506, "grad_norm": 1.1971443891525269, "learning_rate": 2e-05, "loss": 0.03458899, "step": 2253 }, { "epoch": 4.508, "grad_norm": 1.39759361743927, "learning_rate": 2e-05, "loss": 0.03321704, "step": 2254 }, { "epoch": 4.51, "grad_norm": 2.4832005500793457, "learning_rate": 2e-05, "loss": 0.08104795, "step": 2255 }, { "epoch": 4.5120000000000005, "grad_norm": 2.107149362564087, "learning_rate": 2e-05, "loss": 0.06261372, "step": 2256 }, { "epoch": 4.514, "grad_norm": 1.4869227409362793, "learning_rate": 2e-05, "loss": 0.05662797, "step": 2257 }, { "epoch": 4.516, "grad_norm": 1.576983094215393, "learning_rate": 2e-05, "loss": 0.04937998, "step": 2258 }, { "epoch": 4.518, "grad_norm": 1.7932771444320679, "learning_rate": 2e-05, "loss": 0.07361036, "step": 2259 }, { "epoch": 4.52, "grad_norm": 1.2041795253753662, "learning_rate": 2e-05, "loss": 0.04456168, "step": 2260 }, { "epoch": 4.522, "grad_norm": 1.6391611099243164, "learning_rate": 2e-05, "loss": 0.04282523, "step": 2261 }, { "epoch": 4.524, "grad_norm": 1.7272435426712036, "learning_rate": 2e-05, "loss": 0.06249442, "step": 2262 }, { "epoch": 4.526, "grad_norm": 1.400423526763916, "learning_rate": 2e-05, "loss": 0.04406253, "step": 2263 }, { "epoch": 4.5280000000000005, "grad_norm": 1.65554678440094, "learning_rate": 2e-05, "loss": 0.03972442, "step": 2264 }, { "epoch": 4.53, "grad_norm": 1.5990946292877197, "learning_rate": 2e-05, "loss": 0.05160534, "step": 2265 }, { "epoch": 4.532, "grad_norm": 1.848978877067566, "learning_rate": 2e-05, "loss": 0.07042683, "step": 2266 }, { "epoch": 4.534, "grad_norm": 1.1840606927871704, "learning_rate": 2e-05, "loss": 0.04732277, "step": 2267 }, { "epoch": 4.536, "grad_norm": 1.82040536403656, "learning_rate": 2e-05, "loss": 0.05201408, "step": 2268 }, { "epoch": 4.538, "grad_norm": 1.4673718214035034, "learning_rate": 2e-05, "loss": 0.04437625, "step": 2269 }, { "epoch": 4.54, "grad_norm": 1.1988884210586548, "learning_rate": 2e-05, "loss": 0.03644849, "step": 2270 }, { "epoch": 4.542, "grad_norm": 1.7856346368789673, "learning_rate": 2e-05, "loss": 0.05196008, "step": 2271 }, { "epoch": 4.5440000000000005, "grad_norm": 1.848619818687439, "learning_rate": 2e-05, "loss": 0.06458001, "step": 2272 }, { "epoch": 4.546, "grad_norm": 0.7769522070884705, "learning_rate": 2e-05, "loss": 0.02994782, "step": 2273 }, { "epoch": 4.548, "grad_norm": 1.5994452238082886, "learning_rate": 2e-05, "loss": 0.04965055, "step": 2274 }, { "epoch": 4.55, "grad_norm": 2.9101972579956055, "learning_rate": 2e-05, "loss": 0.05279705, "step": 2275 }, { "epoch": 4.552, "grad_norm": 1.5274198055267334, "learning_rate": 2e-05, "loss": 0.05277833, "step": 2276 }, { "epoch": 4.554, "grad_norm": 1.6617462635040283, "learning_rate": 2e-05, "loss": 0.05835313, "step": 2277 }, { "epoch": 4.556, "grad_norm": 1.907563328742981, "learning_rate": 2e-05, "loss": 0.05223612, "step": 2278 }, { "epoch": 4.558, "grad_norm": 1.1127711534500122, "learning_rate": 2e-05, "loss": 0.03404124, "step": 2279 }, { "epoch": 4.5600000000000005, "grad_norm": 2.073880434036255, "learning_rate": 2e-05, "loss": 0.05908792, "step": 2280 }, { "epoch": 4.562, "grad_norm": 1.704450011253357, "learning_rate": 2e-05, "loss": 0.05699074, "step": 2281 }, { "epoch": 4.564, "grad_norm": 1.6895112991333008, "learning_rate": 2e-05, "loss": 0.04530311, "step": 2282 }, { "epoch": 4.566, "grad_norm": 1.96432363986969, "learning_rate": 2e-05, "loss": 0.04813468, "step": 2283 }, { "epoch": 4.568, "grad_norm": 1.4343494176864624, "learning_rate": 2e-05, "loss": 0.05354512, "step": 2284 }, { "epoch": 4.57, "grad_norm": 1.888863444328308, "learning_rate": 2e-05, "loss": 0.06389432, "step": 2285 }, { "epoch": 4.572, "grad_norm": 1.09622061252594, "learning_rate": 2e-05, "loss": 0.03841647, "step": 2286 }, { "epoch": 4.574, "grad_norm": 1.7652240991592407, "learning_rate": 2e-05, "loss": 0.04767475, "step": 2287 }, { "epoch": 4.576, "grad_norm": 1.4330079555511475, "learning_rate": 2e-05, "loss": 0.04378203, "step": 2288 }, { "epoch": 4.578, "grad_norm": 2.0666756629943848, "learning_rate": 2e-05, "loss": 0.04078595, "step": 2289 }, { "epoch": 4.58, "grad_norm": 2.1657555103302, "learning_rate": 2e-05, "loss": 0.05218465, "step": 2290 }, { "epoch": 4.582, "grad_norm": 1.7143996953964233, "learning_rate": 2e-05, "loss": 0.04939847, "step": 2291 }, { "epoch": 4.584, "grad_norm": 2.081948757171631, "learning_rate": 2e-05, "loss": 0.05028074, "step": 2292 }, { "epoch": 4.586, "grad_norm": 1.2868016958236694, "learning_rate": 2e-05, "loss": 0.04015199, "step": 2293 }, { "epoch": 4.588, "grad_norm": 1.3926664590835571, "learning_rate": 2e-05, "loss": 0.0334639, "step": 2294 }, { "epoch": 4.59, "grad_norm": 1.3289328813552856, "learning_rate": 2e-05, "loss": 0.04458878, "step": 2295 }, { "epoch": 4.592, "grad_norm": 2.737997055053711, "learning_rate": 2e-05, "loss": 0.04453328, "step": 2296 }, { "epoch": 4.594, "grad_norm": 1.2142332792282104, "learning_rate": 2e-05, "loss": 0.04507162, "step": 2297 }, { "epoch": 4.596, "grad_norm": 1.8497934341430664, "learning_rate": 2e-05, "loss": 0.0485746, "step": 2298 }, { "epoch": 4.598, "grad_norm": 2.921985387802124, "learning_rate": 2e-05, "loss": 0.07296225, "step": 2299 }, { "epoch": 4.6, "grad_norm": 2.357994318008423, "learning_rate": 2e-05, "loss": 0.07617038, "step": 2300 }, { "epoch": 4.602, "grad_norm": 1.4965320825576782, "learning_rate": 2e-05, "loss": 0.04823328, "step": 2301 }, { "epoch": 4.604, "grad_norm": 2.207876682281494, "learning_rate": 2e-05, "loss": 0.05064952, "step": 2302 }, { "epoch": 4.606, "grad_norm": 1.7099372148513794, "learning_rate": 2e-05, "loss": 0.07443255, "step": 2303 }, { "epoch": 4.608, "grad_norm": 1.7424834966659546, "learning_rate": 2e-05, "loss": 0.05599072, "step": 2304 }, { "epoch": 4.61, "grad_norm": 0.7347729802131653, "learning_rate": 2e-05, "loss": 0.02498679, "step": 2305 }, { "epoch": 4.612, "grad_norm": 1.2767810821533203, "learning_rate": 2e-05, "loss": 0.02526926, "step": 2306 }, { "epoch": 4.614, "grad_norm": 2.2865049839019775, "learning_rate": 2e-05, "loss": 0.05545616, "step": 2307 }, { "epoch": 4.616, "grad_norm": 1.1545779705047607, "learning_rate": 2e-05, "loss": 0.04497127, "step": 2308 }, { "epoch": 4.618, "grad_norm": 1.5742032527923584, "learning_rate": 2e-05, "loss": 0.03688892, "step": 2309 }, { "epoch": 4.62, "grad_norm": 2.337991237640381, "learning_rate": 2e-05, "loss": 0.08231428, "step": 2310 }, { "epoch": 4.622, "grad_norm": 1.6150590181350708, "learning_rate": 2e-05, "loss": 0.06120301, "step": 2311 }, { "epoch": 4.624, "grad_norm": 1.6267143487930298, "learning_rate": 2e-05, "loss": 0.04997005, "step": 2312 }, { "epoch": 4.626, "grad_norm": 1.6490769386291504, "learning_rate": 2e-05, "loss": 0.03661022, "step": 2313 }, { "epoch": 4.628, "grad_norm": 3.342787265777588, "learning_rate": 2e-05, "loss": 0.04789671, "step": 2314 }, { "epoch": 4.63, "grad_norm": 0.936518669128418, "learning_rate": 2e-05, "loss": 0.02918639, "step": 2315 }, { "epoch": 4.632, "grad_norm": 1.6577264070510864, "learning_rate": 2e-05, "loss": 0.06039897, "step": 2316 }, { "epoch": 4.634, "grad_norm": 2.343137741088867, "learning_rate": 2e-05, "loss": 0.05851085, "step": 2317 }, { "epoch": 4.636, "grad_norm": 1.8764770030975342, "learning_rate": 2e-05, "loss": 0.05424952, "step": 2318 }, { "epoch": 4.638, "grad_norm": 2.5574026107788086, "learning_rate": 2e-05, "loss": 0.06575814, "step": 2319 }, { "epoch": 4.64, "grad_norm": 1.8950480222702026, "learning_rate": 2e-05, "loss": 0.04771821, "step": 2320 }, { "epoch": 4.642, "grad_norm": 1.338828444480896, "learning_rate": 2e-05, "loss": 0.04430074, "step": 2321 }, { "epoch": 4.644, "grad_norm": 2.097791910171509, "learning_rate": 2e-05, "loss": 0.05703321, "step": 2322 }, { "epoch": 4.646, "grad_norm": 1.727583885192871, "learning_rate": 2e-05, "loss": 0.04203621, "step": 2323 }, { "epoch": 4.648, "grad_norm": 1.948989748954773, "learning_rate": 2e-05, "loss": 0.06082341, "step": 2324 }, { "epoch": 4.65, "grad_norm": 13.217486381530762, "learning_rate": 2e-05, "loss": 0.06096215, "step": 2325 }, { "epoch": 4.652, "grad_norm": 1.6092791557312012, "learning_rate": 2e-05, "loss": 0.06452105, "step": 2326 }, { "epoch": 4.654, "grad_norm": 1.8264143466949463, "learning_rate": 2e-05, "loss": 0.0483638, "step": 2327 }, { "epoch": 4.656, "grad_norm": 0.9609335660934448, "learning_rate": 2e-05, "loss": 0.03900202, "step": 2328 }, { "epoch": 4.658, "grad_norm": 2.192713737487793, "learning_rate": 2e-05, "loss": 0.05943859, "step": 2329 }, { "epoch": 4.66, "grad_norm": 3.4982011318206787, "learning_rate": 2e-05, "loss": 0.04005817, "step": 2330 }, { "epoch": 4.662, "grad_norm": 1.7967684268951416, "learning_rate": 2e-05, "loss": 0.04493996, "step": 2331 }, { "epoch": 4.664, "grad_norm": 2.371889352798462, "learning_rate": 2e-05, "loss": 0.06003476, "step": 2332 }, { "epoch": 4.666, "grad_norm": 1.3634529113769531, "learning_rate": 2e-05, "loss": 0.05062151, "step": 2333 }, { "epoch": 4.668, "grad_norm": 1.332409381866455, "learning_rate": 2e-05, "loss": 0.0533966, "step": 2334 }, { "epoch": 4.67, "grad_norm": 1.430114507675171, "learning_rate": 2e-05, "loss": 0.04422707, "step": 2335 }, { "epoch": 4.672, "grad_norm": 1.3726091384887695, "learning_rate": 2e-05, "loss": 0.03934011, "step": 2336 }, { "epoch": 4.674, "grad_norm": 2.0893781185150146, "learning_rate": 2e-05, "loss": 0.06843726, "step": 2337 }, { "epoch": 4.676, "grad_norm": 1.3907067775726318, "learning_rate": 2e-05, "loss": 0.03692862, "step": 2338 }, { "epoch": 4.678, "grad_norm": 2.273555040359497, "learning_rate": 2e-05, "loss": 0.04719747, "step": 2339 }, { "epoch": 4.68, "grad_norm": 1.213905692100525, "learning_rate": 2e-05, "loss": 0.03707946, "step": 2340 }, { "epoch": 4.682, "grad_norm": 1.5203443765640259, "learning_rate": 2e-05, "loss": 0.04167219, "step": 2341 }, { "epoch": 4.684, "grad_norm": 2.1066322326660156, "learning_rate": 2e-05, "loss": 0.06504886, "step": 2342 }, { "epoch": 4.686, "grad_norm": 1.336147665977478, "learning_rate": 2e-05, "loss": 0.04607446, "step": 2343 }, { "epoch": 4.688, "grad_norm": 1.2888507843017578, "learning_rate": 2e-05, "loss": 0.04543348, "step": 2344 }, { "epoch": 4.6899999999999995, "grad_norm": 1.7715270519256592, "learning_rate": 2e-05, "loss": 0.06024166, "step": 2345 }, { "epoch": 4.692, "grad_norm": 1.6171724796295166, "learning_rate": 2e-05, "loss": 0.04909429, "step": 2346 }, { "epoch": 4.694, "grad_norm": 1.1351345777511597, "learning_rate": 2e-05, "loss": 0.04079337, "step": 2347 }, { "epoch": 4.696, "grad_norm": 1.4784103631973267, "learning_rate": 2e-05, "loss": 0.04344939, "step": 2348 }, { "epoch": 4.698, "grad_norm": 1.6253507137298584, "learning_rate": 2e-05, "loss": 0.06867707, "step": 2349 }, { "epoch": 4.7, "grad_norm": 1.2361396551132202, "learning_rate": 2e-05, "loss": 0.03865194, "step": 2350 }, { "epoch": 4.702, "grad_norm": 1.5870065689086914, "learning_rate": 2e-05, "loss": 0.05209378, "step": 2351 }, { "epoch": 4.704, "grad_norm": 2.605103015899658, "learning_rate": 2e-05, "loss": 0.06624413, "step": 2352 }, { "epoch": 4.7059999999999995, "grad_norm": 2.3529393672943115, "learning_rate": 2e-05, "loss": 0.06027924, "step": 2353 }, { "epoch": 4.708, "grad_norm": 1.725293517112732, "learning_rate": 2e-05, "loss": 0.06173619, "step": 2354 }, { "epoch": 4.71, "grad_norm": 2.030322313308716, "learning_rate": 2e-05, "loss": 0.03163655, "step": 2355 }, { "epoch": 4.712, "grad_norm": 1.6363118886947632, "learning_rate": 2e-05, "loss": 0.05471949, "step": 2356 }, { "epoch": 4.714, "grad_norm": 1.0442521572113037, "learning_rate": 2e-05, "loss": 0.03488077, "step": 2357 }, { "epoch": 4.716, "grad_norm": 1.4695229530334473, "learning_rate": 2e-05, "loss": 0.0476706, "step": 2358 }, { "epoch": 4.718, "grad_norm": 1.5287270545959473, "learning_rate": 2e-05, "loss": 0.04342977, "step": 2359 }, { "epoch": 4.72, "grad_norm": 1.5241608619689941, "learning_rate": 2e-05, "loss": 0.05294701, "step": 2360 }, { "epoch": 4.7219999999999995, "grad_norm": 1.871067762374878, "learning_rate": 2e-05, "loss": 0.04774662, "step": 2361 }, { "epoch": 4.724, "grad_norm": 1.4351931810379028, "learning_rate": 2e-05, "loss": 0.04760946, "step": 2362 }, { "epoch": 4.726, "grad_norm": 1.4001541137695312, "learning_rate": 2e-05, "loss": 0.04679386, "step": 2363 }, { "epoch": 4.728, "grad_norm": 2.3986926078796387, "learning_rate": 2e-05, "loss": 0.05356419, "step": 2364 }, { "epoch": 4.73, "grad_norm": 1.9251970052719116, "learning_rate": 2e-05, "loss": 0.05533142, "step": 2365 }, { "epoch": 4.732, "grad_norm": 1.4066749811172485, "learning_rate": 2e-05, "loss": 0.04528087, "step": 2366 }, { "epoch": 4.734, "grad_norm": 2.020902633666992, "learning_rate": 2e-05, "loss": 0.04857261, "step": 2367 }, { "epoch": 4.736, "grad_norm": 3.061574697494507, "learning_rate": 2e-05, "loss": 0.06192244, "step": 2368 }, { "epoch": 4.7379999999999995, "grad_norm": 2.2626988887786865, "learning_rate": 2e-05, "loss": 0.04917062, "step": 2369 }, { "epoch": 4.74, "grad_norm": 1.5246493816375732, "learning_rate": 2e-05, "loss": 0.04136902, "step": 2370 }, { "epoch": 4.742, "grad_norm": 1.546585202217102, "learning_rate": 2e-05, "loss": 0.05909882, "step": 2371 }, { "epoch": 4.744, "grad_norm": 1.480355978012085, "learning_rate": 2e-05, "loss": 0.04314402, "step": 2372 }, { "epoch": 4.746, "grad_norm": 4.270092010498047, "learning_rate": 2e-05, "loss": 0.04397022, "step": 2373 }, { "epoch": 4.748, "grad_norm": 2.0741140842437744, "learning_rate": 2e-05, "loss": 0.0657267, "step": 2374 }, { "epoch": 4.75, "grad_norm": 1.6849968433380127, "learning_rate": 2e-05, "loss": 0.06969468, "step": 2375 }, { "epoch": 4.752, "grad_norm": 1.5144706964492798, "learning_rate": 2e-05, "loss": 0.05276833, "step": 2376 }, { "epoch": 4.754, "grad_norm": 1.8581568002700806, "learning_rate": 2e-05, "loss": 0.05863862, "step": 2377 }, { "epoch": 4.756, "grad_norm": 2.0459375381469727, "learning_rate": 2e-05, "loss": 0.0628389, "step": 2378 }, { "epoch": 4.758, "grad_norm": 1.4764527082443237, "learning_rate": 2e-05, "loss": 0.04522754, "step": 2379 }, { "epoch": 4.76, "grad_norm": 1.7862316370010376, "learning_rate": 2e-05, "loss": 0.05123893, "step": 2380 }, { "epoch": 4.7620000000000005, "grad_norm": 1.183058261871338, "learning_rate": 2e-05, "loss": 0.046175, "step": 2381 }, { "epoch": 4.764, "grad_norm": 0.9493065476417542, "learning_rate": 2e-05, "loss": 0.03909788, "step": 2382 }, { "epoch": 4.766, "grad_norm": 2.541240692138672, "learning_rate": 2e-05, "loss": 0.05713121, "step": 2383 }, { "epoch": 4.768, "grad_norm": 1.4289741516113281, "learning_rate": 2e-05, "loss": 0.04066955, "step": 2384 }, { "epoch": 4.77, "grad_norm": 1.8875654935836792, "learning_rate": 2e-05, "loss": 0.05503446, "step": 2385 }, { "epoch": 4.772, "grad_norm": 1.7568116188049316, "learning_rate": 2e-05, "loss": 0.04405501, "step": 2386 }, { "epoch": 4.774, "grad_norm": 1.398704171180725, "learning_rate": 2e-05, "loss": 0.04999311, "step": 2387 }, { "epoch": 4.776, "grad_norm": 1.6185884475708008, "learning_rate": 2e-05, "loss": 0.05588569, "step": 2388 }, { "epoch": 4.7780000000000005, "grad_norm": 1.4545282125473022, "learning_rate": 2e-05, "loss": 0.05736038, "step": 2389 }, { "epoch": 4.78, "grad_norm": 1.7647802829742432, "learning_rate": 2e-05, "loss": 0.048616, "step": 2390 }, { "epoch": 4.782, "grad_norm": 1.4688326120376587, "learning_rate": 2e-05, "loss": 0.05637769, "step": 2391 }, { "epoch": 4.784, "grad_norm": 1.0764333009719849, "learning_rate": 2e-05, "loss": 0.04401714, "step": 2392 }, { "epoch": 4.786, "grad_norm": 1.344368815422058, "learning_rate": 2e-05, "loss": 0.03968214, "step": 2393 }, { "epoch": 4.788, "grad_norm": 0.8727118968963623, "learning_rate": 2e-05, "loss": 0.03342715, "step": 2394 }, { "epoch": 4.79, "grad_norm": 1.1258989572525024, "learning_rate": 2e-05, "loss": 0.04955027, "step": 2395 }, { "epoch": 4.792, "grad_norm": 1.3691527843475342, "learning_rate": 2e-05, "loss": 0.03789597, "step": 2396 }, { "epoch": 4.7940000000000005, "grad_norm": 1.202214002609253, "learning_rate": 2e-05, "loss": 0.04922778, "step": 2397 }, { "epoch": 4.796, "grad_norm": 0.9750478863716125, "learning_rate": 2e-05, "loss": 0.0361733, "step": 2398 }, { "epoch": 4.798, "grad_norm": 1.3132562637329102, "learning_rate": 2e-05, "loss": 0.04116097, "step": 2399 }, { "epoch": 4.8, "grad_norm": 1.7972034215927124, "learning_rate": 2e-05, "loss": 0.05484334, "step": 2400 }, { "epoch": 4.802, "grad_norm": 0.8271289467811584, "learning_rate": 2e-05, "loss": 0.03021875, "step": 2401 }, { "epoch": 4.804, "grad_norm": 1.2940322160720825, "learning_rate": 2e-05, "loss": 0.03482784, "step": 2402 }, { "epoch": 4.806, "grad_norm": 0.9828287959098816, "learning_rate": 2e-05, "loss": 0.04179944, "step": 2403 }, { "epoch": 4.808, "grad_norm": 1.063576102256775, "learning_rate": 2e-05, "loss": 0.04457291, "step": 2404 }, { "epoch": 4.8100000000000005, "grad_norm": 1.6383423805236816, "learning_rate": 2e-05, "loss": 0.05698747, "step": 2405 }, { "epoch": 4.812, "grad_norm": 1.5289560556411743, "learning_rate": 2e-05, "loss": 0.05217171, "step": 2406 }, { "epoch": 4.814, "grad_norm": 1.3988386392593384, "learning_rate": 2e-05, "loss": 0.03368045, "step": 2407 }, { "epoch": 4.816, "grad_norm": 1.503707766532898, "learning_rate": 2e-05, "loss": 0.05029517, "step": 2408 }, { "epoch": 4.818, "grad_norm": 1.2330231666564941, "learning_rate": 2e-05, "loss": 0.04768985, "step": 2409 }, { "epoch": 4.82, "grad_norm": 1.9983141422271729, "learning_rate": 2e-05, "loss": 0.04722229, "step": 2410 }, { "epoch": 4.822, "grad_norm": 1.5647661685943604, "learning_rate": 2e-05, "loss": 0.04295033, "step": 2411 }, { "epoch": 4.824, "grad_norm": 1.2768006324768066, "learning_rate": 2e-05, "loss": 0.04663468, "step": 2412 }, { "epoch": 4.826, "grad_norm": 1.2094746828079224, "learning_rate": 2e-05, "loss": 0.04576386, "step": 2413 }, { "epoch": 4.828, "grad_norm": 2.035341262817383, "learning_rate": 2e-05, "loss": 0.06920949, "step": 2414 }, { "epoch": 4.83, "grad_norm": 1.2905845642089844, "learning_rate": 2e-05, "loss": 0.04321614, "step": 2415 }, { "epoch": 4.832, "grad_norm": 1.356903314590454, "learning_rate": 2e-05, "loss": 0.03513716, "step": 2416 }, { "epoch": 4.834, "grad_norm": 1.9090627431869507, "learning_rate": 2e-05, "loss": 0.04502533, "step": 2417 }, { "epoch": 4.836, "grad_norm": 1.7486882209777832, "learning_rate": 2e-05, "loss": 0.05252524, "step": 2418 }, { "epoch": 4.838, "grad_norm": 1.4587241411209106, "learning_rate": 2e-05, "loss": 0.05292211, "step": 2419 }, { "epoch": 4.84, "grad_norm": 1.4659751653671265, "learning_rate": 2e-05, "loss": 0.05041685, "step": 2420 }, { "epoch": 4.842, "grad_norm": 1.2260714769363403, "learning_rate": 2e-05, "loss": 0.03760161, "step": 2421 }, { "epoch": 4.844, "grad_norm": 1.8238565921783447, "learning_rate": 2e-05, "loss": 0.04717803, "step": 2422 }, { "epoch": 4.846, "grad_norm": 1.713142991065979, "learning_rate": 2e-05, "loss": 0.0471909, "step": 2423 }, { "epoch": 4.848, "grad_norm": 1.2333279848098755, "learning_rate": 2e-05, "loss": 0.04788278, "step": 2424 }, { "epoch": 4.85, "grad_norm": 1.4566587209701538, "learning_rate": 2e-05, "loss": 0.04854037, "step": 2425 }, { "epoch": 4.852, "grad_norm": 1.2099180221557617, "learning_rate": 2e-05, "loss": 0.05093442, "step": 2426 }, { "epoch": 4.854, "grad_norm": 1.439063549041748, "learning_rate": 2e-05, "loss": 0.04717848, "step": 2427 }, { "epoch": 4.856, "grad_norm": 1.0836939811706543, "learning_rate": 2e-05, "loss": 0.03817945, "step": 2428 }, { "epoch": 4.858, "grad_norm": 1.3742403984069824, "learning_rate": 2e-05, "loss": 0.05699654, "step": 2429 }, { "epoch": 4.86, "grad_norm": 1.637795329093933, "learning_rate": 2e-05, "loss": 0.05274617, "step": 2430 }, { "epoch": 4.862, "grad_norm": 1.3152008056640625, "learning_rate": 2e-05, "loss": 0.04993754, "step": 2431 }, { "epoch": 4.864, "grad_norm": 1.941238522529602, "learning_rate": 2e-05, "loss": 0.05275572, "step": 2432 }, { "epoch": 4.866, "grad_norm": 1.5748775005340576, "learning_rate": 2e-05, "loss": 0.04504102, "step": 2433 }, { "epoch": 4.868, "grad_norm": 1.11865234375, "learning_rate": 2e-05, "loss": 0.02234134, "step": 2434 }, { "epoch": 4.87, "grad_norm": 2.032224655151367, "learning_rate": 2e-05, "loss": 0.04748577, "step": 2435 }, { "epoch": 4.872, "grad_norm": 1.541551113128662, "learning_rate": 2e-05, "loss": 0.05044394, "step": 2436 }, { "epoch": 4.874, "grad_norm": 1.0707684755325317, "learning_rate": 2e-05, "loss": 0.03879319, "step": 2437 }, { "epoch": 4.876, "grad_norm": 1.0110793113708496, "learning_rate": 2e-05, "loss": 0.03082216, "step": 2438 }, { "epoch": 4.878, "grad_norm": 1.2891108989715576, "learning_rate": 2e-05, "loss": 0.04097469, "step": 2439 }, { "epoch": 4.88, "grad_norm": 1.5080897808074951, "learning_rate": 2e-05, "loss": 0.05637329, "step": 2440 }, { "epoch": 4.882, "grad_norm": 1.4517652988433838, "learning_rate": 2e-05, "loss": 0.04256181, "step": 2441 }, { "epoch": 4.884, "grad_norm": 1.6093480587005615, "learning_rate": 2e-05, "loss": 0.04163637, "step": 2442 }, { "epoch": 4.886, "grad_norm": 1.2815158367156982, "learning_rate": 2e-05, "loss": 0.0369774, "step": 2443 }, { "epoch": 4.888, "grad_norm": 1.383325457572937, "learning_rate": 2e-05, "loss": 0.03461573, "step": 2444 }, { "epoch": 4.89, "grad_norm": 1.1101199388504028, "learning_rate": 2e-05, "loss": 0.03874564, "step": 2445 }, { "epoch": 4.892, "grad_norm": 1.5300559997558594, "learning_rate": 2e-05, "loss": 0.05602445, "step": 2446 }, { "epoch": 4.894, "grad_norm": 2.2490646839141846, "learning_rate": 2e-05, "loss": 0.07521484, "step": 2447 }, { "epoch": 4.896, "grad_norm": 2.1896135807037354, "learning_rate": 2e-05, "loss": 0.06365304, "step": 2448 }, { "epoch": 4.898, "grad_norm": 1.0276954174041748, "learning_rate": 2e-05, "loss": 0.03177486, "step": 2449 }, { "epoch": 4.9, "grad_norm": 1.3365074396133423, "learning_rate": 2e-05, "loss": 0.03707567, "step": 2450 }, { "epoch": 4.902, "grad_norm": 1.5583385229110718, "learning_rate": 2e-05, "loss": 0.0411016, "step": 2451 }, { "epoch": 4.904, "grad_norm": 2.40421462059021, "learning_rate": 2e-05, "loss": 0.05268388, "step": 2452 }, { "epoch": 4.906, "grad_norm": 1.3850958347320557, "learning_rate": 2e-05, "loss": 0.04084206, "step": 2453 }, { "epoch": 4.908, "grad_norm": 0.9724360704421997, "learning_rate": 2e-05, "loss": 0.03289589, "step": 2454 }, { "epoch": 4.91, "grad_norm": 1.1916927099227905, "learning_rate": 2e-05, "loss": 0.05733837, "step": 2455 }, { "epoch": 4.912, "grad_norm": 1.3797491788864136, "learning_rate": 2e-05, "loss": 0.05401637, "step": 2456 }, { "epoch": 4.914, "grad_norm": 1.4776387214660645, "learning_rate": 2e-05, "loss": 0.06105582, "step": 2457 }, { "epoch": 4.916, "grad_norm": 1.95939040184021, "learning_rate": 2e-05, "loss": 0.04500619, "step": 2458 }, { "epoch": 4.918, "grad_norm": 1.286211609840393, "learning_rate": 2e-05, "loss": 0.03892947, "step": 2459 }, { "epoch": 4.92, "grad_norm": 1.159041166305542, "learning_rate": 2e-05, "loss": 0.03977415, "step": 2460 }, { "epoch": 4.922, "grad_norm": 1.2239066362380981, "learning_rate": 2e-05, "loss": 0.05262641, "step": 2461 }, { "epoch": 4.924, "grad_norm": 1.4577621221542358, "learning_rate": 2e-05, "loss": 0.04973259, "step": 2462 }, { "epoch": 4.926, "grad_norm": 1.6779803037643433, "learning_rate": 2e-05, "loss": 0.06090097, "step": 2463 }, { "epoch": 4.928, "grad_norm": 1.405738115310669, "learning_rate": 2e-05, "loss": 0.03640286, "step": 2464 }, { "epoch": 4.93, "grad_norm": 1.4361226558685303, "learning_rate": 2e-05, "loss": 0.05156115, "step": 2465 }, { "epoch": 4.932, "grad_norm": 1.7754102945327759, "learning_rate": 2e-05, "loss": 0.04374959, "step": 2466 }, { "epoch": 4.934, "grad_norm": 1.0777870416641235, "learning_rate": 2e-05, "loss": 0.04769883, "step": 2467 }, { "epoch": 4.936, "grad_norm": 1.4147472381591797, "learning_rate": 2e-05, "loss": 0.0415495, "step": 2468 }, { "epoch": 4.938, "grad_norm": 1.4324427843093872, "learning_rate": 2e-05, "loss": 0.0460553, "step": 2469 }, { "epoch": 4.9399999999999995, "grad_norm": 1.281327486038208, "learning_rate": 2e-05, "loss": 0.0426899, "step": 2470 }, { "epoch": 4.942, "grad_norm": 1.6230335235595703, "learning_rate": 2e-05, "loss": 0.06094572, "step": 2471 }, { "epoch": 4.944, "grad_norm": 2.4324069023132324, "learning_rate": 2e-05, "loss": 0.06636138, "step": 2472 }, { "epoch": 4.946, "grad_norm": 1.3603817224502563, "learning_rate": 2e-05, "loss": 0.05335851, "step": 2473 }, { "epoch": 4.948, "grad_norm": 1.7483546733856201, "learning_rate": 2e-05, "loss": 0.06515578, "step": 2474 }, { "epoch": 4.95, "grad_norm": 1.7496479749679565, "learning_rate": 2e-05, "loss": 0.053516, "step": 2475 }, { "epoch": 4.952, "grad_norm": 1.8903632164001465, "learning_rate": 2e-05, "loss": 0.05142172, "step": 2476 }, { "epoch": 4.954, "grad_norm": 1.436606526374817, "learning_rate": 2e-05, "loss": 0.06670608, "step": 2477 }, { "epoch": 4.9559999999999995, "grad_norm": 1.3558646440505981, "learning_rate": 2e-05, "loss": 0.03845021, "step": 2478 }, { "epoch": 4.958, "grad_norm": 2.306734561920166, "learning_rate": 2e-05, "loss": 0.07663551, "step": 2479 }, { "epoch": 4.96, "grad_norm": 1.0982824563980103, "learning_rate": 2e-05, "loss": 0.04220589, "step": 2480 }, { "epoch": 4.962, "grad_norm": 1.0680259466171265, "learning_rate": 2e-05, "loss": 0.04148862, "step": 2481 }, { "epoch": 4.964, "grad_norm": 1.6535357236862183, "learning_rate": 2e-05, "loss": 0.03783375, "step": 2482 }, { "epoch": 4.966, "grad_norm": 1.627331256866455, "learning_rate": 2e-05, "loss": 0.04947282, "step": 2483 }, { "epoch": 4.968, "grad_norm": 1.148332118988037, "learning_rate": 2e-05, "loss": 0.03641383, "step": 2484 }, { "epoch": 4.97, "grad_norm": 1.1122504472732544, "learning_rate": 2e-05, "loss": 0.03861043, "step": 2485 }, { "epoch": 4.9719999999999995, "grad_norm": 1.828122854232788, "learning_rate": 2e-05, "loss": 0.04262727, "step": 2486 }, { "epoch": 4.974, "grad_norm": 1.3069729804992676, "learning_rate": 2e-05, "loss": 0.0403937, "step": 2487 }, { "epoch": 4.976, "grad_norm": 1.3833580017089844, "learning_rate": 2e-05, "loss": 0.05110749, "step": 2488 }, { "epoch": 4.978, "grad_norm": 0.8173892498016357, "learning_rate": 2e-05, "loss": 0.02464099, "step": 2489 }, { "epoch": 4.98, "grad_norm": 1.0502883195877075, "learning_rate": 2e-05, "loss": 0.04005281, "step": 2490 }, { "epoch": 4.982, "grad_norm": 1.0571186542510986, "learning_rate": 2e-05, "loss": 0.04751731, "step": 2491 }, { "epoch": 4.984, "grad_norm": 0.8940661549568176, "learning_rate": 2e-05, "loss": 0.02703059, "step": 2492 }, { "epoch": 4.986, "grad_norm": 2.060039520263672, "learning_rate": 2e-05, "loss": 0.04748306, "step": 2493 }, { "epoch": 4.9879999999999995, "grad_norm": 2.088318347930908, "learning_rate": 2e-05, "loss": 0.06272329, "step": 2494 }, { "epoch": 4.99, "grad_norm": 1.2175875902175903, "learning_rate": 2e-05, "loss": 0.03415951, "step": 2495 }, { "epoch": 4.992, "grad_norm": 1.9275691509246826, "learning_rate": 2e-05, "loss": 0.05103919, "step": 2496 }, { "epoch": 4.994, "grad_norm": 1.262987494468689, "learning_rate": 2e-05, "loss": 0.04607838, "step": 2497 }, { "epoch": 4.996, "grad_norm": 1.489013433456421, "learning_rate": 2e-05, "loss": 0.05136772, "step": 2498 }, { "epoch": 4.998, "grad_norm": 1.268276572227478, "learning_rate": 2e-05, "loss": 0.0421587, "step": 2499 }, { "epoch": 5.0, "grad_norm": 1.560907244682312, "learning_rate": 2e-05, "loss": 0.05140449, "step": 2500 }, { "epoch": 5.0, "eval_performance": { "AngleClassification_1": 0.982, "AngleClassification_2": 0.978, "AngleClassification_3": 0.8303393213572854, "Equal_1": 0.956, "Equal_2": 0.844311377245509, "Equal_3": 0.7285429141716567, "LineComparison_1": 1.0, "LineComparison_2": 0.9880239520958084, "LineComparison_3": 0.9261477045908184, "Parallel_1": 0.9759519038076152, "Parallel_2": 0.9879759519038076, "Parallel_3": 0.586, "Perpendicular_1": 0.958, "Perpendicular_2": 0.594, "Perpendicular_3": 0.22545090180360722, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9932000000000001, "PointLiesOnCircle_3": 0.9900666666666667, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9398797595190381, "PointLiesOnLine_3": 0.500998003992016 }, "eval_runtime": 225.3342, "eval_samples_per_second": 46.597, "eval_steps_per_second": 0.932, "step": 2500 }, { "epoch": 5.002, "grad_norm": 1.274254560470581, "learning_rate": 2e-05, "loss": 0.05348765, "step": 2501 }, { "epoch": 5.004, "grad_norm": 1.2278605699539185, "learning_rate": 2e-05, "loss": 0.03854948, "step": 2502 }, { "epoch": 5.006, "grad_norm": 1.063114047050476, "learning_rate": 2e-05, "loss": 0.05212869, "step": 2503 }, { "epoch": 5.008, "grad_norm": 1.460443139076233, "learning_rate": 2e-05, "loss": 0.04188091, "step": 2504 }, { "epoch": 5.01, "grad_norm": 1.275728464126587, "learning_rate": 2e-05, "loss": 0.04167286, "step": 2505 }, { "epoch": 5.012, "grad_norm": 0.9974750876426697, "learning_rate": 2e-05, "loss": 0.03575214, "step": 2506 }, { "epoch": 5.014, "grad_norm": 1.749761700630188, "learning_rate": 2e-05, "loss": 0.05473472, "step": 2507 }, { "epoch": 5.016, "grad_norm": 1.3475871086120605, "learning_rate": 2e-05, "loss": 0.03779987, "step": 2508 }, { "epoch": 5.018, "grad_norm": 2.055716037750244, "learning_rate": 2e-05, "loss": 0.06618349, "step": 2509 }, { "epoch": 5.02, "grad_norm": 1.2407305240631104, "learning_rate": 2e-05, "loss": 0.03305146, "step": 2510 }, { "epoch": 5.022, "grad_norm": 1.6509947776794434, "learning_rate": 2e-05, "loss": 0.06643588, "step": 2511 }, { "epoch": 5.024, "grad_norm": 2.7707438468933105, "learning_rate": 2e-05, "loss": 0.08066876, "step": 2512 }, { "epoch": 5.026, "grad_norm": 1.3326873779296875, "learning_rate": 2e-05, "loss": 0.05520583, "step": 2513 }, { "epoch": 5.028, "grad_norm": 1.1136361360549927, "learning_rate": 2e-05, "loss": 0.03297912, "step": 2514 }, { "epoch": 5.03, "grad_norm": 1.4125643968582153, "learning_rate": 2e-05, "loss": 0.04272803, "step": 2515 }, { "epoch": 5.032, "grad_norm": 1.6495161056518555, "learning_rate": 2e-05, "loss": 0.04692927, "step": 2516 }, { "epoch": 5.034, "grad_norm": 1.628611445426941, "learning_rate": 2e-05, "loss": 0.03439975, "step": 2517 }, { "epoch": 5.036, "grad_norm": 1.8219168186187744, "learning_rate": 2e-05, "loss": 0.05679814, "step": 2518 }, { "epoch": 5.038, "grad_norm": 1.415529727935791, "learning_rate": 2e-05, "loss": 0.06007532, "step": 2519 }, { "epoch": 5.04, "grad_norm": 1.0629870891571045, "learning_rate": 2e-05, "loss": 0.03387538, "step": 2520 }, { "epoch": 5.042, "grad_norm": 1.7092219591140747, "learning_rate": 2e-05, "loss": 0.06446123, "step": 2521 }, { "epoch": 5.044, "grad_norm": 1.7353438138961792, "learning_rate": 2e-05, "loss": 0.06203469, "step": 2522 }, { "epoch": 5.046, "grad_norm": 1.8766080141067505, "learning_rate": 2e-05, "loss": 0.0403279, "step": 2523 }, { "epoch": 5.048, "grad_norm": 0.8351106643676758, "learning_rate": 2e-05, "loss": 0.03081093, "step": 2524 }, { "epoch": 5.05, "grad_norm": 1.441839337348938, "learning_rate": 2e-05, "loss": 0.05279294, "step": 2525 }, { "epoch": 5.052, "grad_norm": 1.2187857627868652, "learning_rate": 2e-05, "loss": 0.04248401, "step": 2526 }, { "epoch": 5.054, "grad_norm": 1.6765186786651611, "learning_rate": 2e-05, "loss": 0.05531678, "step": 2527 }, { "epoch": 5.056, "grad_norm": 1.2416213750839233, "learning_rate": 2e-05, "loss": 0.04786776, "step": 2528 }, { "epoch": 5.058, "grad_norm": 1.9737921953201294, "learning_rate": 2e-05, "loss": 0.04501528, "step": 2529 }, { "epoch": 5.06, "grad_norm": 1.4312390089035034, "learning_rate": 2e-05, "loss": 0.03924786, "step": 2530 }, { "epoch": 5.062, "grad_norm": 1.7858320474624634, "learning_rate": 2e-05, "loss": 0.05126807, "step": 2531 }, { "epoch": 5.064, "grad_norm": 1.172790288925171, "learning_rate": 2e-05, "loss": 0.04075055, "step": 2532 }, { "epoch": 5.066, "grad_norm": 1.9834431409835815, "learning_rate": 2e-05, "loss": 0.0634538, "step": 2533 }, { "epoch": 5.068, "grad_norm": 1.3126105070114136, "learning_rate": 2e-05, "loss": 0.04581048, "step": 2534 }, { "epoch": 5.07, "grad_norm": 1.1176971197128296, "learning_rate": 2e-05, "loss": 0.03657329, "step": 2535 }, { "epoch": 5.072, "grad_norm": 1.1107871532440186, "learning_rate": 2e-05, "loss": 0.04105398, "step": 2536 }, { "epoch": 5.074, "grad_norm": 1.1895476579666138, "learning_rate": 2e-05, "loss": 0.05449467, "step": 2537 }, { "epoch": 5.076, "grad_norm": 0.8225005865097046, "learning_rate": 2e-05, "loss": 0.02876183, "step": 2538 }, { "epoch": 5.078, "grad_norm": 1.373420238494873, "learning_rate": 2e-05, "loss": 0.05874623, "step": 2539 }, { "epoch": 5.08, "grad_norm": 2.3037326335906982, "learning_rate": 2e-05, "loss": 0.05055622, "step": 2540 }, { "epoch": 5.082, "grad_norm": 1.1528946161270142, "learning_rate": 2e-05, "loss": 0.04371713, "step": 2541 }, { "epoch": 5.084, "grad_norm": 1.7955396175384521, "learning_rate": 2e-05, "loss": 0.03399004, "step": 2542 }, { "epoch": 5.086, "grad_norm": 0.9599894881248474, "learning_rate": 2e-05, "loss": 0.03310393, "step": 2543 }, { "epoch": 5.088, "grad_norm": 1.2058213949203491, "learning_rate": 2e-05, "loss": 0.05206143, "step": 2544 }, { "epoch": 5.09, "grad_norm": 1.1802030801773071, "learning_rate": 2e-05, "loss": 0.04761612, "step": 2545 }, { "epoch": 5.092, "grad_norm": 0.9872775077819824, "learning_rate": 2e-05, "loss": 0.03328814, "step": 2546 }, { "epoch": 5.094, "grad_norm": 1.2165980339050293, "learning_rate": 2e-05, "loss": 0.03223029, "step": 2547 }, { "epoch": 5.096, "grad_norm": 6.505279541015625, "learning_rate": 2e-05, "loss": 0.04725179, "step": 2548 }, { "epoch": 5.098, "grad_norm": 1.3280826807022095, "learning_rate": 2e-05, "loss": 0.03289382, "step": 2549 }, { "epoch": 5.1, "grad_norm": 2.20570969581604, "learning_rate": 2e-05, "loss": 0.06481442, "step": 2550 }, { "epoch": 5.102, "grad_norm": 1.694236397743225, "learning_rate": 2e-05, "loss": 0.07661946, "step": 2551 }, { "epoch": 5.104, "grad_norm": 1.35165274143219, "learning_rate": 2e-05, "loss": 0.05539154, "step": 2552 }, { "epoch": 5.106, "grad_norm": 1.87081778049469, "learning_rate": 2e-05, "loss": 0.05680997, "step": 2553 }, { "epoch": 5.108, "grad_norm": 2.4739086627960205, "learning_rate": 2e-05, "loss": 0.04873104, "step": 2554 }, { "epoch": 5.11, "grad_norm": 1.981345772743225, "learning_rate": 2e-05, "loss": 0.0595219, "step": 2555 }, { "epoch": 5.112, "grad_norm": 1.9032528400421143, "learning_rate": 2e-05, "loss": 0.04907563, "step": 2556 }, { "epoch": 5.114, "grad_norm": 1.0268688201904297, "learning_rate": 2e-05, "loss": 0.03086902, "step": 2557 }, { "epoch": 5.116, "grad_norm": 1.7726693153381348, "learning_rate": 2e-05, "loss": 0.06604618, "step": 2558 }, { "epoch": 5.118, "grad_norm": 2.432709217071533, "learning_rate": 2e-05, "loss": 0.05265028, "step": 2559 }, { "epoch": 5.12, "grad_norm": 1.6751363277435303, "learning_rate": 2e-05, "loss": 0.04887111, "step": 2560 }, { "epoch": 5.122, "grad_norm": 1.938701868057251, "learning_rate": 2e-05, "loss": 0.06450063, "step": 2561 }, { "epoch": 5.124, "grad_norm": 2.2096526622772217, "learning_rate": 2e-05, "loss": 0.05573916, "step": 2562 }, { "epoch": 5.126, "grad_norm": 1.7558597326278687, "learning_rate": 2e-05, "loss": 0.06811824, "step": 2563 }, { "epoch": 5.128, "grad_norm": 1.3852636814117432, "learning_rate": 2e-05, "loss": 0.05851717, "step": 2564 }, { "epoch": 5.13, "grad_norm": 1.2554768323898315, "learning_rate": 2e-05, "loss": 0.03548806, "step": 2565 }, { "epoch": 5.132, "grad_norm": 1.1914684772491455, "learning_rate": 2e-05, "loss": 0.0481647, "step": 2566 }, { "epoch": 5.134, "grad_norm": 1.1448657512664795, "learning_rate": 2e-05, "loss": 0.04216446, "step": 2567 }, { "epoch": 5.136, "grad_norm": 0.9630345106124878, "learning_rate": 2e-05, "loss": 0.04262756, "step": 2568 }, { "epoch": 5.138, "grad_norm": 1.9433590173721313, "learning_rate": 2e-05, "loss": 0.0554637, "step": 2569 }, { "epoch": 5.14, "grad_norm": 1.0274509191513062, "learning_rate": 2e-05, "loss": 0.04584524, "step": 2570 }, { "epoch": 5.142, "grad_norm": 1.4139691591262817, "learning_rate": 2e-05, "loss": 0.0502334, "step": 2571 }, { "epoch": 5.144, "grad_norm": 1.5718038082122803, "learning_rate": 2e-05, "loss": 0.04789044, "step": 2572 }, { "epoch": 5.146, "grad_norm": 3.1689887046813965, "learning_rate": 2e-05, "loss": 0.05137915, "step": 2573 }, { "epoch": 5.148, "grad_norm": 1.2174690961837769, "learning_rate": 2e-05, "loss": 0.05983402, "step": 2574 }, { "epoch": 5.15, "grad_norm": 1.1080644130706787, "learning_rate": 2e-05, "loss": 0.03960954, "step": 2575 }, { "epoch": 5.152, "grad_norm": 1.5908567905426025, "learning_rate": 2e-05, "loss": 0.04092301, "step": 2576 }, { "epoch": 5.154, "grad_norm": 1.1359602212905884, "learning_rate": 2e-05, "loss": 0.04483944, "step": 2577 }, { "epoch": 5.156, "grad_norm": 1.9286401271820068, "learning_rate": 2e-05, "loss": 0.05970896, "step": 2578 }, { "epoch": 5.158, "grad_norm": 1.0309653282165527, "learning_rate": 2e-05, "loss": 0.03147487, "step": 2579 }, { "epoch": 5.16, "grad_norm": 1.7649133205413818, "learning_rate": 2e-05, "loss": 0.05155741, "step": 2580 }, { "epoch": 5.162, "grad_norm": 1.8642913103103638, "learning_rate": 2e-05, "loss": 0.0608875, "step": 2581 }, { "epoch": 5.164, "grad_norm": 1.7733770608901978, "learning_rate": 2e-05, "loss": 0.04394894, "step": 2582 }, { "epoch": 5.166, "grad_norm": 1.2142730951309204, "learning_rate": 2e-05, "loss": 0.04885126, "step": 2583 }, { "epoch": 5.168, "grad_norm": 2.0643513202667236, "learning_rate": 2e-05, "loss": 0.06478775, "step": 2584 }, { "epoch": 5.17, "grad_norm": 1.0317633152008057, "learning_rate": 2e-05, "loss": 0.04297267, "step": 2585 }, { "epoch": 5.172, "grad_norm": 2.2762112617492676, "learning_rate": 2e-05, "loss": 0.04880513, "step": 2586 }, { "epoch": 5.174, "grad_norm": 1.569178581237793, "learning_rate": 2e-05, "loss": 0.04650364, "step": 2587 }, { "epoch": 5.176, "grad_norm": 1.854203462600708, "learning_rate": 2e-05, "loss": 0.04986256, "step": 2588 }, { "epoch": 5.178, "grad_norm": 2.389082908630371, "learning_rate": 2e-05, "loss": 0.06763738, "step": 2589 }, { "epoch": 5.18, "grad_norm": 1.2257379293441772, "learning_rate": 2e-05, "loss": 0.03880817, "step": 2590 }, { "epoch": 5.182, "grad_norm": 1.6407591104507446, "learning_rate": 2e-05, "loss": 0.03500217, "step": 2591 }, { "epoch": 5.184, "grad_norm": 2.7261722087860107, "learning_rate": 2e-05, "loss": 0.05209564, "step": 2592 }, { "epoch": 5.186, "grad_norm": 1.3117610216140747, "learning_rate": 2e-05, "loss": 0.04958644, "step": 2593 }, { "epoch": 5.188, "grad_norm": 1.3710271120071411, "learning_rate": 2e-05, "loss": 0.05644672, "step": 2594 }, { "epoch": 5.19, "grad_norm": 1.8276746273040771, "learning_rate": 2e-05, "loss": 0.05438906, "step": 2595 }, { "epoch": 5.192, "grad_norm": 1.0954389572143555, "learning_rate": 2e-05, "loss": 0.04617397, "step": 2596 }, { "epoch": 5.194, "grad_norm": 1.6073416471481323, "learning_rate": 2e-05, "loss": 0.05048424, "step": 2597 }, { "epoch": 5.196, "grad_norm": 1.0911192893981934, "learning_rate": 2e-05, "loss": 0.04819644, "step": 2598 }, { "epoch": 5.198, "grad_norm": 1.157811164855957, "learning_rate": 2e-05, "loss": 0.04681427, "step": 2599 }, { "epoch": 5.2, "grad_norm": 1.4944145679473877, "learning_rate": 2e-05, "loss": 0.05504366, "step": 2600 }, { "epoch": 5.202, "grad_norm": 1.1162713766098022, "learning_rate": 2e-05, "loss": 0.03791697, "step": 2601 }, { "epoch": 5.204, "grad_norm": 1.6690373420715332, "learning_rate": 2e-05, "loss": 0.04471238, "step": 2602 }, { "epoch": 5.206, "grad_norm": 1.460219383239746, "learning_rate": 2e-05, "loss": 0.05274561, "step": 2603 }, { "epoch": 5.208, "grad_norm": 1.3927085399627686, "learning_rate": 2e-05, "loss": 0.03696128, "step": 2604 }, { "epoch": 5.21, "grad_norm": 2.409457206726074, "learning_rate": 2e-05, "loss": 0.07169546, "step": 2605 }, { "epoch": 5.212, "grad_norm": 1.2363513708114624, "learning_rate": 2e-05, "loss": 0.04349241, "step": 2606 }, { "epoch": 5.214, "grad_norm": 1.5123097896575928, "learning_rate": 2e-05, "loss": 0.04987927, "step": 2607 }, { "epoch": 5.216, "grad_norm": 1.0378143787384033, "learning_rate": 2e-05, "loss": 0.03799111, "step": 2608 }, { "epoch": 5.218, "grad_norm": 1.5370274782180786, "learning_rate": 2e-05, "loss": 0.04318774, "step": 2609 }, { "epoch": 5.22, "grad_norm": 1.7253919839859009, "learning_rate": 2e-05, "loss": 0.04165493, "step": 2610 }, { "epoch": 5.222, "grad_norm": 1.4816701412200928, "learning_rate": 2e-05, "loss": 0.04548899, "step": 2611 }, { "epoch": 5.224, "grad_norm": 1.5851106643676758, "learning_rate": 2e-05, "loss": 0.05317931, "step": 2612 }, { "epoch": 5.226, "grad_norm": 1.437554121017456, "learning_rate": 2e-05, "loss": 0.03198932, "step": 2613 }, { "epoch": 5.228, "grad_norm": 1.595539927482605, "learning_rate": 2e-05, "loss": 0.05772294, "step": 2614 }, { "epoch": 5.23, "grad_norm": 1.7962896823883057, "learning_rate": 2e-05, "loss": 0.05459448, "step": 2615 }, { "epoch": 5.232, "grad_norm": 1.575662612915039, "learning_rate": 2e-05, "loss": 0.0527058, "step": 2616 }, { "epoch": 5.234, "grad_norm": 1.4220771789550781, "learning_rate": 2e-05, "loss": 0.06228274, "step": 2617 }, { "epoch": 5.236, "grad_norm": 1.1606825590133667, "learning_rate": 2e-05, "loss": 0.03758011, "step": 2618 }, { "epoch": 5.2379999999999995, "grad_norm": 1.074018955230713, "learning_rate": 2e-05, "loss": 0.03069984, "step": 2619 }, { "epoch": 5.24, "grad_norm": 2.3238940238952637, "learning_rate": 2e-05, "loss": 0.07914433, "step": 2620 }, { "epoch": 5.242, "grad_norm": 1.7188016176223755, "learning_rate": 2e-05, "loss": 0.04488772, "step": 2621 }, { "epoch": 5.244, "grad_norm": 1.1710253953933716, "learning_rate": 2e-05, "loss": 0.03768813, "step": 2622 }, { "epoch": 5.246, "grad_norm": 1.8896827697753906, "learning_rate": 2e-05, "loss": 0.06218662, "step": 2623 }, { "epoch": 5.248, "grad_norm": 0.9915817975997925, "learning_rate": 2e-05, "loss": 0.03700841, "step": 2624 }, { "epoch": 5.25, "grad_norm": 1.5043452978134155, "learning_rate": 2e-05, "loss": 0.07462659, "step": 2625 }, { "epoch": 5.252, "grad_norm": 1.0379505157470703, "learning_rate": 2e-05, "loss": 0.03102849, "step": 2626 }, { "epoch": 5.254, "grad_norm": 1.022592306137085, "learning_rate": 2e-05, "loss": 0.03631591, "step": 2627 }, { "epoch": 5.256, "grad_norm": 1.1392953395843506, "learning_rate": 2e-05, "loss": 0.04637909, "step": 2628 }, { "epoch": 5.258, "grad_norm": 1.3320226669311523, "learning_rate": 2e-05, "loss": 0.03964652, "step": 2629 }, { "epoch": 5.26, "grad_norm": 0.8611721396446228, "learning_rate": 2e-05, "loss": 0.0314832, "step": 2630 }, { "epoch": 5.2620000000000005, "grad_norm": 1.0983943939208984, "learning_rate": 2e-05, "loss": 0.03602706, "step": 2631 }, { "epoch": 5.264, "grad_norm": 1.7185841798782349, "learning_rate": 2e-05, "loss": 0.04793741, "step": 2632 }, { "epoch": 5.266, "grad_norm": 1.0905605554580688, "learning_rate": 2e-05, "loss": 0.02892584, "step": 2633 }, { "epoch": 5.268, "grad_norm": 1.2557018995285034, "learning_rate": 2e-05, "loss": 0.04766861, "step": 2634 }, { "epoch": 5.27, "grad_norm": 0.9541990160942078, "learning_rate": 2e-05, "loss": 0.02928709, "step": 2635 }, { "epoch": 5.272, "grad_norm": 1.33302903175354, "learning_rate": 2e-05, "loss": 0.0455764, "step": 2636 }, { "epoch": 5.274, "grad_norm": 1.6237035989761353, "learning_rate": 2e-05, "loss": 0.0488123, "step": 2637 }, { "epoch": 5.276, "grad_norm": 2.2175374031066895, "learning_rate": 2e-05, "loss": 0.06167346, "step": 2638 }, { "epoch": 5.2780000000000005, "grad_norm": 0.9200611710548401, "learning_rate": 2e-05, "loss": 0.0261219, "step": 2639 }, { "epoch": 5.28, "grad_norm": 1.5704294443130493, "learning_rate": 2e-05, "loss": 0.03495432, "step": 2640 }, { "epoch": 5.282, "grad_norm": 2.9279932975769043, "learning_rate": 2e-05, "loss": 0.07248227, "step": 2641 }, { "epoch": 5.284, "grad_norm": 1.2902008295059204, "learning_rate": 2e-05, "loss": 0.04559172, "step": 2642 }, { "epoch": 5.286, "grad_norm": 1.6073837280273438, "learning_rate": 2e-05, "loss": 0.06056985, "step": 2643 }, { "epoch": 5.288, "grad_norm": 2.2505626678466797, "learning_rate": 2e-05, "loss": 0.05137659, "step": 2644 }, { "epoch": 5.29, "grad_norm": 1.2069635391235352, "learning_rate": 2e-05, "loss": 0.045041, "step": 2645 }, { "epoch": 5.292, "grad_norm": 1.3199189901351929, "learning_rate": 2e-05, "loss": 0.05018923, "step": 2646 }, { "epoch": 5.294, "grad_norm": 1.5904293060302734, "learning_rate": 2e-05, "loss": 0.06746846, "step": 2647 }, { "epoch": 5.296, "grad_norm": 1.9439842700958252, "learning_rate": 2e-05, "loss": 0.06254475, "step": 2648 }, { "epoch": 5.298, "grad_norm": 0.9206429123878479, "learning_rate": 2e-05, "loss": 0.0360412, "step": 2649 }, { "epoch": 5.3, "grad_norm": 2.5866482257843018, "learning_rate": 2e-05, "loss": 0.06857111, "step": 2650 }, { "epoch": 5.302, "grad_norm": 2.741086483001709, "learning_rate": 2e-05, "loss": 0.04858804, "step": 2651 }, { "epoch": 5.304, "grad_norm": 1.2274497747421265, "learning_rate": 2e-05, "loss": 0.04378346, "step": 2652 }, { "epoch": 5.306, "grad_norm": 1.5868353843688965, "learning_rate": 2e-05, "loss": 0.04242693, "step": 2653 }, { "epoch": 5.308, "grad_norm": 1.4109787940979004, "learning_rate": 2e-05, "loss": 0.05944882, "step": 2654 }, { "epoch": 5.31, "grad_norm": 2.2271270751953125, "learning_rate": 2e-05, "loss": 0.05557436, "step": 2655 }, { "epoch": 5.312, "grad_norm": 2.004190683364868, "learning_rate": 2e-05, "loss": 0.06546484, "step": 2656 }, { "epoch": 5.314, "grad_norm": 1.1771128177642822, "learning_rate": 2e-05, "loss": 0.05241339, "step": 2657 }, { "epoch": 5.316, "grad_norm": 1.684875726699829, "learning_rate": 2e-05, "loss": 0.03864998, "step": 2658 }, { "epoch": 5.318, "grad_norm": 1.134729266166687, "learning_rate": 2e-05, "loss": 0.05230067, "step": 2659 }, { "epoch": 5.32, "grad_norm": 1.4269050359725952, "learning_rate": 2e-05, "loss": 0.06034885, "step": 2660 }, { "epoch": 5.322, "grad_norm": 2.9518721103668213, "learning_rate": 2e-05, "loss": 0.07433899, "step": 2661 }, { "epoch": 5.324, "grad_norm": 2.572568655014038, "learning_rate": 2e-05, "loss": 0.0539838, "step": 2662 }, { "epoch": 5.326, "grad_norm": 1.2748178243637085, "learning_rate": 2e-05, "loss": 0.05636097, "step": 2663 }, { "epoch": 5.328, "grad_norm": 1.1881786584854126, "learning_rate": 2e-05, "loss": 0.04060423, "step": 2664 }, { "epoch": 5.33, "grad_norm": 1.4298291206359863, "learning_rate": 2e-05, "loss": 0.05204656, "step": 2665 }, { "epoch": 5.332, "grad_norm": 1.4546726942062378, "learning_rate": 2e-05, "loss": 0.04487244, "step": 2666 }, { "epoch": 5.334, "grad_norm": 0.9727299213409424, "learning_rate": 2e-05, "loss": 0.03577352, "step": 2667 }, { "epoch": 5.336, "grad_norm": 0.9614014029502869, "learning_rate": 2e-05, "loss": 0.03809005, "step": 2668 }, { "epoch": 5.338, "grad_norm": 0.8633192181587219, "learning_rate": 2e-05, "loss": 0.03863839, "step": 2669 }, { "epoch": 5.34, "grad_norm": 1.721750020980835, "learning_rate": 2e-05, "loss": 0.04694315, "step": 2670 }, { "epoch": 5.342, "grad_norm": 0.9396949410438538, "learning_rate": 2e-05, "loss": 0.03346519, "step": 2671 }, { "epoch": 5.344, "grad_norm": 1.4146912097930908, "learning_rate": 2e-05, "loss": 0.05517077, "step": 2672 }, { "epoch": 5.346, "grad_norm": 1.7199981212615967, "learning_rate": 2e-05, "loss": 0.04120524, "step": 2673 }, { "epoch": 5.348, "grad_norm": 1.3100799322128296, "learning_rate": 2e-05, "loss": 0.04449036, "step": 2674 }, { "epoch": 5.35, "grad_norm": 1.9745393991470337, "learning_rate": 2e-05, "loss": 0.04217342, "step": 2675 }, { "epoch": 5.352, "grad_norm": 1.7498984336853027, "learning_rate": 2e-05, "loss": 0.07070212, "step": 2676 }, { "epoch": 5.354, "grad_norm": 1.5389410257339478, "learning_rate": 2e-05, "loss": 0.05914912, "step": 2677 }, { "epoch": 5.356, "grad_norm": 2.1040966510772705, "learning_rate": 2e-05, "loss": 0.04873541, "step": 2678 }, { "epoch": 5.358, "grad_norm": 1.7736648321151733, "learning_rate": 2e-05, "loss": 0.05355643, "step": 2679 }, { "epoch": 5.36, "grad_norm": 1.7821412086486816, "learning_rate": 2e-05, "loss": 0.04834352, "step": 2680 }, { "epoch": 5.362, "grad_norm": 1.1524840593338013, "learning_rate": 2e-05, "loss": 0.02683968, "step": 2681 }, { "epoch": 5.364, "grad_norm": 2.1152241230010986, "learning_rate": 2e-05, "loss": 0.05407428, "step": 2682 }, { "epoch": 5.366, "grad_norm": 1.0201750993728638, "learning_rate": 2e-05, "loss": 0.03167866, "step": 2683 }, { "epoch": 5.368, "grad_norm": 1.3844654560089111, "learning_rate": 2e-05, "loss": 0.05712597, "step": 2684 }, { "epoch": 5.37, "grad_norm": 1.2269951105117798, "learning_rate": 2e-05, "loss": 0.04210455, "step": 2685 }, { "epoch": 5.372, "grad_norm": 1.2101631164550781, "learning_rate": 2e-05, "loss": 0.03454702, "step": 2686 }, { "epoch": 5.374, "grad_norm": 1.7403888702392578, "learning_rate": 2e-05, "loss": 0.04555046, "step": 2687 }, { "epoch": 5.376, "grad_norm": 1.4490035772323608, "learning_rate": 2e-05, "loss": 0.04792362, "step": 2688 }, { "epoch": 5.378, "grad_norm": 1.395703673362732, "learning_rate": 2e-05, "loss": 0.04428685, "step": 2689 }, { "epoch": 5.38, "grad_norm": 1.5033087730407715, "learning_rate": 2e-05, "loss": 0.05658453, "step": 2690 }, { "epoch": 5.382, "grad_norm": 1.1471366882324219, "learning_rate": 2e-05, "loss": 0.04373558, "step": 2691 }, { "epoch": 5.384, "grad_norm": 2.165675401687622, "learning_rate": 2e-05, "loss": 0.05095565, "step": 2692 }, { "epoch": 5.386, "grad_norm": 1.4066956043243408, "learning_rate": 2e-05, "loss": 0.05214212, "step": 2693 }, { "epoch": 5.388, "grad_norm": 1.5050036907196045, "learning_rate": 2e-05, "loss": 0.05189269, "step": 2694 }, { "epoch": 5.39, "grad_norm": 2.011453151702881, "learning_rate": 2e-05, "loss": 0.05170311, "step": 2695 }, { "epoch": 5.392, "grad_norm": 1.4490711688995361, "learning_rate": 2e-05, "loss": 0.0495353, "step": 2696 }, { "epoch": 5.394, "grad_norm": 1.1680185794830322, "learning_rate": 2e-05, "loss": 0.03432415, "step": 2697 }, { "epoch": 5.396, "grad_norm": 1.3824403285980225, "learning_rate": 2e-05, "loss": 0.04868587, "step": 2698 }, { "epoch": 5.398, "grad_norm": 1.7004213333129883, "learning_rate": 2e-05, "loss": 0.04387897, "step": 2699 }, { "epoch": 5.4, "grad_norm": 1.2553097009658813, "learning_rate": 2e-05, "loss": 0.05076703, "step": 2700 }, { "epoch": 5.402, "grad_norm": 1.5994603633880615, "learning_rate": 2e-05, "loss": 0.0347247, "step": 2701 }, { "epoch": 5.404, "grad_norm": 1.6143654584884644, "learning_rate": 2e-05, "loss": 0.04832892, "step": 2702 }, { "epoch": 5.406, "grad_norm": 1.3363045454025269, "learning_rate": 2e-05, "loss": 0.04484051, "step": 2703 }, { "epoch": 5.408, "grad_norm": 1.2772718667984009, "learning_rate": 2e-05, "loss": 0.0367212, "step": 2704 }, { "epoch": 5.41, "grad_norm": 1.4310024976730347, "learning_rate": 2e-05, "loss": 0.05687527, "step": 2705 }, { "epoch": 5.412, "grad_norm": 1.4432463645935059, "learning_rate": 2e-05, "loss": 0.04345278, "step": 2706 }, { "epoch": 5.414, "grad_norm": 2.56174898147583, "learning_rate": 2e-05, "loss": 0.05794861, "step": 2707 }, { "epoch": 5.416, "grad_norm": 1.0707594156265259, "learning_rate": 2e-05, "loss": 0.04054152, "step": 2708 }, { "epoch": 5.418, "grad_norm": 1.5291610956192017, "learning_rate": 2e-05, "loss": 0.05981295, "step": 2709 }, { "epoch": 5.42, "grad_norm": 1.474603295326233, "learning_rate": 2e-05, "loss": 0.05704, "step": 2710 }, { "epoch": 5.422, "grad_norm": 1.4767605066299438, "learning_rate": 2e-05, "loss": 0.04334021, "step": 2711 }, { "epoch": 5.424, "grad_norm": 1.1517763137817383, "learning_rate": 2e-05, "loss": 0.04437998, "step": 2712 }, { "epoch": 5.426, "grad_norm": 1.0717130899429321, "learning_rate": 2e-05, "loss": 0.03792693, "step": 2713 }, { "epoch": 5.428, "grad_norm": 1.3737385272979736, "learning_rate": 2e-05, "loss": 0.04516871, "step": 2714 }, { "epoch": 5.43, "grad_norm": 1.7708261013031006, "learning_rate": 2e-05, "loss": 0.03771194, "step": 2715 }, { "epoch": 5.432, "grad_norm": 1.8402330875396729, "learning_rate": 2e-05, "loss": 0.04762217, "step": 2716 }, { "epoch": 5.434, "grad_norm": 1.4321401119232178, "learning_rate": 2e-05, "loss": 0.05097427, "step": 2717 }, { "epoch": 5.436, "grad_norm": 1.171239972114563, "learning_rate": 2e-05, "loss": 0.04301199, "step": 2718 }, { "epoch": 5.438, "grad_norm": 1.200124740600586, "learning_rate": 2e-05, "loss": 0.03662377, "step": 2719 }, { "epoch": 5.44, "grad_norm": 1.271047830581665, "learning_rate": 2e-05, "loss": 0.04506569, "step": 2720 }, { "epoch": 5.442, "grad_norm": 1.198940634727478, "learning_rate": 2e-05, "loss": 0.05181561, "step": 2721 }, { "epoch": 5.444, "grad_norm": 0.9857475161552429, "learning_rate": 2e-05, "loss": 0.03237488, "step": 2722 }, { "epoch": 5.446, "grad_norm": 0.8746547102928162, "learning_rate": 2e-05, "loss": 0.03637395, "step": 2723 }, { "epoch": 5.448, "grad_norm": 1.4782618284225464, "learning_rate": 2e-05, "loss": 0.04850566, "step": 2724 }, { "epoch": 5.45, "grad_norm": 1.8968923091888428, "learning_rate": 2e-05, "loss": 0.04695905, "step": 2725 }, { "epoch": 5.452, "grad_norm": 1.034572720527649, "learning_rate": 2e-05, "loss": 0.04463696, "step": 2726 }, { "epoch": 5.454, "grad_norm": 0.9072864055633545, "learning_rate": 2e-05, "loss": 0.03138497, "step": 2727 }, { "epoch": 5.456, "grad_norm": 1.240186333656311, "learning_rate": 2e-05, "loss": 0.04921221, "step": 2728 }, { "epoch": 5.458, "grad_norm": 1.9251102209091187, "learning_rate": 2e-05, "loss": 0.05067121, "step": 2729 }, { "epoch": 5.46, "grad_norm": 0.9456695914268494, "learning_rate": 2e-05, "loss": 0.03151168, "step": 2730 }, { "epoch": 5.462, "grad_norm": 0.8162277936935425, "learning_rate": 2e-05, "loss": 0.02599393, "step": 2731 }, { "epoch": 5.464, "grad_norm": 1.7150630950927734, "learning_rate": 2e-05, "loss": 0.04861501, "step": 2732 }, { "epoch": 5.466, "grad_norm": 1.8393536806106567, "learning_rate": 2e-05, "loss": 0.05166467, "step": 2733 }, { "epoch": 5.468, "grad_norm": 1.1005531549453735, "learning_rate": 2e-05, "loss": 0.03707793, "step": 2734 }, { "epoch": 5.47, "grad_norm": 1.4234669208526611, "learning_rate": 2e-05, "loss": 0.04701383, "step": 2735 }, { "epoch": 5.4719999999999995, "grad_norm": 1.3762401342391968, "learning_rate": 2e-05, "loss": 0.05516124, "step": 2736 }, { "epoch": 5.474, "grad_norm": 4.193373203277588, "learning_rate": 2e-05, "loss": 0.05090634, "step": 2737 }, { "epoch": 5.476, "grad_norm": 1.3340750932693481, "learning_rate": 2e-05, "loss": 0.03884787, "step": 2738 }, { "epoch": 5.478, "grad_norm": 0.8118112683296204, "learning_rate": 2e-05, "loss": 0.02710287, "step": 2739 }, { "epoch": 5.48, "grad_norm": 1.2886537313461304, "learning_rate": 2e-05, "loss": 0.06007296, "step": 2740 }, { "epoch": 5.482, "grad_norm": 1.3660320043563843, "learning_rate": 2e-05, "loss": 0.03907663, "step": 2741 }, { "epoch": 5.484, "grad_norm": 1.284400224685669, "learning_rate": 2e-05, "loss": 0.05616232, "step": 2742 }, { "epoch": 5.486, "grad_norm": 2.329368829727173, "learning_rate": 2e-05, "loss": 0.05981421, "step": 2743 }, { "epoch": 5.4879999999999995, "grad_norm": 2.0045716762542725, "learning_rate": 2e-05, "loss": 0.04675926, "step": 2744 }, { "epoch": 5.49, "grad_norm": 0.9499562382698059, "learning_rate": 2e-05, "loss": 0.03949323, "step": 2745 }, { "epoch": 5.492, "grad_norm": 1.7040280103683472, "learning_rate": 2e-05, "loss": 0.05110246, "step": 2746 }, { "epoch": 5.494, "grad_norm": 1.4409620761871338, "learning_rate": 2e-05, "loss": 0.03914365, "step": 2747 }, { "epoch": 5.496, "grad_norm": 1.9618107080459595, "learning_rate": 2e-05, "loss": 0.03172352, "step": 2748 }, { "epoch": 5.498, "grad_norm": 1.3093886375427246, "learning_rate": 2e-05, "loss": 0.0528778, "step": 2749 }, { "epoch": 5.5, "grad_norm": 1.7453217506408691, "learning_rate": 2e-05, "loss": 0.05672598, "step": 2750 }, { "epoch": 5.502, "grad_norm": 1.1863491535186768, "learning_rate": 2e-05, "loss": 0.04517543, "step": 2751 }, { "epoch": 5.504, "grad_norm": 0.7430426478385925, "learning_rate": 2e-05, "loss": 0.02445307, "step": 2752 }, { "epoch": 5.506, "grad_norm": 0.8772698044776917, "learning_rate": 2e-05, "loss": 0.03176689, "step": 2753 }, { "epoch": 5.508, "grad_norm": 1.3333395719528198, "learning_rate": 2e-05, "loss": 0.04402076, "step": 2754 }, { "epoch": 5.51, "grad_norm": 0.9693535566329956, "learning_rate": 2e-05, "loss": 0.02512111, "step": 2755 }, { "epoch": 5.5120000000000005, "grad_norm": 1.2075809240341187, "learning_rate": 2e-05, "loss": 0.05389047, "step": 2756 }, { "epoch": 5.514, "grad_norm": 1.412438988685608, "learning_rate": 2e-05, "loss": 0.04657469, "step": 2757 }, { "epoch": 5.516, "grad_norm": 1.1335420608520508, "learning_rate": 2e-05, "loss": 0.04387209, "step": 2758 }, { "epoch": 5.518, "grad_norm": 1.5355936288833618, "learning_rate": 2e-05, "loss": 0.06136326, "step": 2759 }, { "epoch": 5.52, "grad_norm": 1.7836377620697021, "learning_rate": 2e-05, "loss": 0.05746901, "step": 2760 }, { "epoch": 5.522, "grad_norm": 1.4249262809753418, "learning_rate": 2e-05, "loss": 0.04984912, "step": 2761 }, { "epoch": 5.524, "grad_norm": 1.0746277570724487, "learning_rate": 2e-05, "loss": 0.04349656, "step": 2762 }, { "epoch": 5.526, "grad_norm": 1.392073631286621, "learning_rate": 2e-05, "loss": 0.04211304, "step": 2763 }, { "epoch": 5.5280000000000005, "grad_norm": 1.526862382888794, "learning_rate": 2e-05, "loss": 0.03757313, "step": 2764 }, { "epoch": 5.53, "grad_norm": 1.168284296989441, "learning_rate": 2e-05, "loss": 0.04408391, "step": 2765 }, { "epoch": 5.532, "grad_norm": 0.7556564807891846, "learning_rate": 2e-05, "loss": 0.02818235, "step": 2766 }, { "epoch": 5.534, "grad_norm": 1.3440006971359253, "learning_rate": 2e-05, "loss": 0.0430432, "step": 2767 }, { "epoch": 5.536, "grad_norm": 1.4979382753372192, "learning_rate": 2e-05, "loss": 0.0697758, "step": 2768 }, { "epoch": 5.538, "grad_norm": 1.3355696201324463, "learning_rate": 2e-05, "loss": 0.05753803, "step": 2769 }, { "epoch": 5.54, "grad_norm": 2.2455711364746094, "learning_rate": 2e-05, "loss": 0.06441894, "step": 2770 }, { "epoch": 5.542, "grad_norm": 1.3767648935317993, "learning_rate": 2e-05, "loss": 0.04122285, "step": 2771 }, { "epoch": 5.5440000000000005, "grad_norm": 1.721429467201233, "learning_rate": 2e-05, "loss": 0.03885366, "step": 2772 }, { "epoch": 5.546, "grad_norm": 1.969995379447937, "learning_rate": 2e-05, "loss": 0.0441504, "step": 2773 }, { "epoch": 5.548, "grad_norm": 1.0759145021438599, "learning_rate": 2e-05, "loss": 0.03599454, "step": 2774 }, { "epoch": 5.55, "grad_norm": 1.5190961360931396, "learning_rate": 2e-05, "loss": 0.04657687, "step": 2775 }, { "epoch": 5.552, "grad_norm": 1.285163164138794, "learning_rate": 2e-05, "loss": 0.03839288, "step": 2776 }, { "epoch": 5.554, "grad_norm": 1.226860523223877, "learning_rate": 2e-05, "loss": 0.03010844, "step": 2777 }, { "epoch": 5.556, "grad_norm": 1.7060494422912598, "learning_rate": 2e-05, "loss": 0.04985185, "step": 2778 }, { "epoch": 5.558, "grad_norm": 1.2999157905578613, "learning_rate": 2e-05, "loss": 0.04224576, "step": 2779 }, { "epoch": 5.5600000000000005, "grad_norm": 1.3456339836120605, "learning_rate": 2e-05, "loss": 0.06814632, "step": 2780 }, { "epoch": 5.562, "grad_norm": 0.9363784193992615, "learning_rate": 2e-05, "loss": 0.02755808, "step": 2781 }, { "epoch": 5.564, "grad_norm": 1.001887321472168, "learning_rate": 2e-05, "loss": 0.04303287, "step": 2782 }, { "epoch": 5.566, "grad_norm": 1.3569835424423218, "learning_rate": 2e-05, "loss": 0.04556364, "step": 2783 }, { "epoch": 5.568, "grad_norm": 0.973330557346344, "learning_rate": 2e-05, "loss": 0.04103645, "step": 2784 }, { "epoch": 5.57, "grad_norm": 1.038578748703003, "learning_rate": 2e-05, "loss": 0.03170725, "step": 2785 }, { "epoch": 5.572, "grad_norm": 1.9504655599594116, "learning_rate": 2e-05, "loss": 0.04959633, "step": 2786 }, { "epoch": 5.574, "grad_norm": 1.01435124874115, "learning_rate": 2e-05, "loss": 0.03992799, "step": 2787 }, { "epoch": 5.576, "grad_norm": 2.174560785293579, "learning_rate": 2e-05, "loss": 0.05079005, "step": 2788 }, { "epoch": 5.578, "grad_norm": 1.8908747434616089, "learning_rate": 2e-05, "loss": 0.04022849, "step": 2789 }, { "epoch": 5.58, "grad_norm": 1.0057222843170166, "learning_rate": 2e-05, "loss": 0.03048504, "step": 2790 }, { "epoch": 5.582, "grad_norm": 0.9592764377593994, "learning_rate": 2e-05, "loss": 0.0302098, "step": 2791 }, { "epoch": 5.584, "grad_norm": 1.1021705865859985, "learning_rate": 2e-05, "loss": 0.04133482, "step": 2792 }, { "epoch": 5.586, "grad_norm": 0.8478265404701233, "learning_rate": 2e-05, "loss": 0.02666524, "step": 2793 }, { "epoch": 5.588, "grad_norm": 1.0878331661224365, "learning_rate": 2e-05, "loss": 0.04419788, "step": 2794 }, { "epoch": 5.59, "grad_norm": 2.2261617183685303, "learning_rate": 2e-05, "loss": 0.03761724, "step": 2795 }, { "epoch": 5.592, "grad_norm": 1.6982733011245728, "learning_rate": 2e-05, "loss": 0.04068662, "step": 2796 }, { "epoch": 5.594, "grad_norm": 1.0842010974884033, "learning_rate": 2e-05, "loss": 0.03345931, "step": 2797 }, { "epoch": 5.596, "grad_norm": 1.1795265674591064, "learning_rate": 2e-05, "loss": 0.0340689, "step": 2798 }, { "epoch": 5.598, "grad_norm": 1.4015120267868042, "learning_rate": 2e-05, "loss": 0.04135866, "step": 2799 }, { "epoch": 5.6, "grad_norm": 1.041655421257019, "learning_rate": 2e-05, "loss": 0.03796409, "step": 2800 }, { "epoch": 5.602, "grad_norm": 1.8997700214385986, "learning_rate": 2e-05, "loss": 0.04897504, "step": 2801 }, { "epoch": 5.604, "grad_norm": 1.6254674196243286, "learning_rate": 2e-05, "loss": 0.04715632, "step": 2802 }, { "epoch": 5.606, "grad_norm": 1.4282335042953491, "learning_rate": 2e-05, "loss": 0.03881311, "step": 2803 }, { "epoch": 5.608, "grad_norm": 1.262339472770691, "learning_rate": 2e-05, "loss": 0.05249061, "step": 2804 }, { "epoch": 5.61, "grad_norm": 1.3242909908294678, "learning_rate": 2e-05, "loss": 0.03999505, "step": 2805 }, { "epoch": 5.612, "grad_norm": 1.4575308561325073, "learning_rate": 2e-05, "loss": 0.05663625, "step": 2806 }, { "epoch": 5.614, "grad_norm": 2.511418581008911, "learning_rate": 2e-05, "loss": 0.06045591, "step": 2807 }, { "epoch": 5.616, "grad_norm": 2.0469627380371094, "learning_rate": 2e-05, "loss": 0.05448069, "step": 2808 }, { "epoch": 5.618, "grad_norm": 1.9743788242340088, "learning_rate": 2e-05, "loss": 0.06813675, "step": 2809 }, { "epoch": 5.62, "grad_norm": 1.295620322227478, "learning_rate": 2e-05, "loss": 0.05767171, "step": 2810 }, { "epoch": 5.622, "grad_norm": 1.1891217231750488, "learning_rate": 2e-05, "loss": 0.0359478, "step": 2811 }, { "epoch": 5.624, "grad_norm": 1.174956202507019, "learning_rate": 2e-05, "loss": 0.04307503, "step": 2812 }, { "epoch": 5.626, "grad_norm": 2.1059420108795166, "learning_rate": 2e-05, "loss": 0.04967503, "step": 2813 }, { "epoch": 5.628, "grad_norm": 1.7537864446640015, "learning_rate": 2e-05, "loss": 0.05831044, "step": 2814 }, { "epoch": 5.63, "grad_norm": 1.6061570644378662, "learning_rate": 2e-05, "loss": 0.0488851, "step": 2815 }, { "epoch": 5.632, "grad_norm": 1.195953607559204, "learning_rate": 2e-05, "loss": 0.05475558, "step": 2816 }, { "epoch": 5.634, "grad_norm": 1.2140015363693237, "learning_rate": 2e-05, "loss": 0.04117091, "step": 2817 }, { "epoch": 5.636, "grad_norm": 1.0586501359939575, "learning_rate": 2e-05, "loss": 0.05319555, "step": 2818 }, { "epoch": 5.638, "grad_norm": 1.3716130256652832, "learning_rate": 2e-05, "loss": 0.05939301, "step": 2819 }, { "epoch": 5.64, "grad_norm": 1.4354772567749023, "learning_rate": 2e-05, "loss": 0.04823098, "step": 2820 }, { "epoch": 5.642, "grad_norm": 1.3899197578430176, "learning_rate": 2e-05, "loss": 0.0539398, "step": 2821 }, { "epoch": 5.644, "grad_norm": 1.1216223239898682, "learning_rate": 2e-05, "loss": 0.04169286, "step": 2822 }, { "epoch": 5.646, "grad_norm": 1.0056108236312866, "learning_rate": 2e-05, "loss": 0.03774118, "step": 2823 }, { "epoch": 5.648, "grad_norm": 0.9322355389595032, "learning_rate": 2e-05, "loss": 0.03859473, "step": 2824 }, { "epoch": 5.65, "grad_norm": 1.4102550745010376, "learning_rate": 2e-05, "loss": 0.04660697, "step": 2825 }, { "epoch": 5.652, "grad_norm": 1.0983365774154663, "learning_rate": 2e-05, "loss": 0.04172538, "step": 2826 }, { "epoch": 5.654, "grad_norm": 1.2165638208389282, "learning_rate": 2e-05, "loss": 0.04812425, "step": 2827 }, { "epoch": 5.656, "grad_norm": 1.3303769826889038, "learning_rate": 2e-05, "loss": 0.04481729, "step": 2828 }, { "epoch": 5.658, "grad_norm": 0.8931049108505249, "learning_rate": 2e-05, "loss": 0.02980588, "step": 2829 }, { "epoch": 5.66, "grad_norm": 1.7124024629592896, "learning_rate": 2e-05, "loss": 0.04651967, "step": 2830 }, { "epoch": 5.662, "grad_norm": 1.6406296491622925, "learning_rate": 2e-05, "loss": 0.05489205, "step": 2831 }, { "epoch": 5.664, "grad_norm": 1.389189600944519, "learning_rate": 2e-05, "loss": 0.05295068, "step": 2832 }, { "epoch": 5.666, "grad_norm": 2.7110085487365723, "learning_rate": 2e-05, "loss": 0.05014222, "step": 2833 }, { "epoch": 5.668, "grad_norm": 0.9814625382423401, "learning_rate": 2e-05, "loss": 0.03854743, "step": 2834 }, { "epoch": 5.67, "grad_norm": 1.4477282762527466, "learning_rate": 2e-05, "loss": 0.04454483, "step": 2835 }, { "epoch": 5.672, "grad_norm": 1.3077199459075928, "learning_rate": 2e-05, "loss": 0.05645507, "step": 2836 }, { "epoch": 5.674, "grad_norm": 1.0442079305648804, "learning_rate": 2e-05, "loss": 0.0339361, "step": 2837 }, { "epoch": 5.676, "grad_norm": 1.385206937789917, "learning_rate": 2e-05, "loss": 0.03288971, "step": 2838 }, { "epoch": 5.678, "grad_norm": 1.150848150253296, "learning_rate": 2e-05, "loss": 0.05100866, "step": 2839 }, { "epoch": 5.68, "grad_norm": 1.014420509338379, "learning_rate": 2e-05, "loss": 0.03455331, "step": 2840 }, { "epoch": 5.682, "grad_norm": 1.110944390296936, "learning_rate": 2e-05, "loss": 0.04735414, "step": 2841 }, { "epoch": 5.684, "grad_norm": 1.8632248640060425, "learning_rate": 2e-05, "loss": 0.0478204, "step": 2842 }, { "epoch": 5.686, "grad_norm": 1.2941515445709229, "learning_rate": 2e-05, "loss": 0.03799584, "step": 2843 }, { "epoch": 5.688, "grad_norm": 2.1600589752197266, "learning_rate": 2e-05, "loss": 0.06423374, "step": 2844 }, { "epoch": 5.6899999999999995, "grad_norm": 1.1550672054290771, "learning_rate": 2e-05, "loss": 0.05224154, "step": 2845 }, { "epoch": 5.692, "grad_norm": 1.4903887510299683, "learning_rate": 2e-05, "loss": 0.0445109, "step": 2846 }, { "epoch": 5.694, "grad_norm": 1.3106459379196167, "learning_rate": 2e-05, "loss": 0.05626269, "step": 2847 }, { "epoch": 5.696, "grad_norm": 1.0039697885513306, "learning_rate": 2e-05, "loss": 0.04690451, "step": 2848 }, { "epoch": 5.698, "grad_norm": 1.073158860206604, "learning_rate": 2e-05, "loss": 0.03464031, "step": 2849 }, { "epoch": 5.7, "grad_norm": 1.1310467720031738, "learning_rate": 2e-05, "loss": 0.04242202, "step": 2850 }, { "epoch": 5.702, "grad_norm": 0.8533942103385925, "learning_rate": 2e-05, "loss": 0.03796684, "step": 2851 }, { "epoch": 5.704, "grad_norm": 1.092539668083191, "learning_rate": 2e-05, "loss": 0.0295783, "step": 2852 }, { "epoch": 5.7059999999999995, "grad_norm": 1.14443039894104, "learning_rate": 2e-05, "loss": 0.03674935, "step": 2853 }, { "epoch": 5.708, "grad_norm": 0.9979750514030457, "learning_rate": 2e-05, "loss": 0.03995182, "step": 2854 }, { "epoch": 5.71, "grad_norm": 1.1231728792190552, "learning_rate": 2e-05, "loss": 0.03494559, "step": 2855 }, { "epoch": 5.712, "grad_norm": 1.1001152992248535, "learning_rate": 2e-05, "loss": 0.03883237, "step": 2856 }, { "epoch": 5.714, "grad_norm": 1.7853176593780518, "learning_rate": 2e-05, "loss": 0.05064235, "step": 2857 }, { "epoch": 5.716, "grad_norm": 2.209711790084839, "learning_rate": 2e-05, "loss": 0.04457976, "step": 2858 }, { "epoch": 5.718, "grad_norm": 1.1608480215072632, "learning_rate": 2e-05, "loss": 0.03451565, "step": 2859 }, { "epoch": 5.72, "grad_norm": 1.5767822265625, "learning_rate": 2e-05, "loss": 0.06043113, "step": 2860 }, { "epoch": 5.7219999999999995, "grad_norm": 1.4318007230758667, "learning_rate": 2e-05, "loss": 0.06273182, "step": 2861 }, { "epoch": 5.724, "grad_norm": 1.313744306564331, "learning_rate": 2e-05, "loss": 0.03421508, "step": 2862 }, { "epoch": 5.726, "grad_norm": 1.675789475440979, "learning_rate": 2e-05, "loss": 0.03667891, "step": 2863 }, { "epoch": 5.728, "grad_norm": 1.2720035314559937, "learning_rate": 2e-05, "loss": 0.05019858, "step": 2864 }, { "epoch": 5.73, "grad_norm": 1.30983567237854, "learning_rate": 2e-05, "loss": 0.04676288, "step": 2865 }, { "epoch": 5.732, "grad_norm": 0.9884480237960815, "learning_rate": 2e-05, "loss": 0.03488427, "step": 2866 }, { "epoch": 5.734, "grad_norm": 1.7114611864089966, "learning_rate": 2e-05, "loss": 0.05122133, "step": 2867 }, { "epoch": 5.736, "grad_norm": 1.7135913372039795, "learning_rate": 2e-05, "loss": 0.04813797, "step": 2868 }, { "epoch": 5.7379999999999995, "grad_norm": 3.1470212936401367, "learning_rate": 2e-05, "loss": 0.06894997, "step": 2869 }, { "epoch": 5.74, "grad_norm": 1.096247673034668, "learning_rate": 2e-05, "loss": 0.03306488, "step": 2870 }, { "epoch": 5.742, "grad_norm": 0.9625670313835144, "learning_rate": 2e-05, "loss": 0.03933379, "step": 2871 }, { "epoch": 5.744, "grad_norm": 1.6643515825271606, "learning_rate": 2e-05, "loss": 0.04675451, "step": 2872 }, { "epoch": 5.746, "grad_norm": 2.6767466068267822, "learning_rate": 2e-05, "loss": 0.06190599, "step": 2873 }, { "epoch": 5.748, "grad_norm": 0.9536160826683044, "learning_rate": 2e-05, "loss": 0.02489068, "step": 2874 }, { "epoch": 5.75, "grad_norm": 1.170341968536377, "learning_rate": 2e-05, "loss": 0.0338923, "step": 2875 }, { "epoch": 5.752, "grad_norm": 1.8890984058380127, "learning_rate": 2e-05, "loss": 0.0494932, "step": 2876 }, { "epoch": 5.754, "grad_norm": 1.265891671180725, "learning_rate": 2e-05, "loss": 0.04788334, "step": 2877 }, { "epoch": 5.756, "grad_norm": 1.2991775274276733, "learning_rate": 2e-05, "loss": 0.03866075, "step": 2878 }, { "epoch": 5.758, "grad_norm": 1.6693869829177856, "learning_rate": 2e-05, "loss": 0.03779318, "step": 2879 }, { "epoch": 5.76, "grad_norm": 1.1965926885604858, "learning_rate": 2e-05, "loss": 0.04971344, "step": 2880 }, { "epoch": 5.7620000000000005, "grad_norm": 1.5279757976531982, "learning_rate": 2e-05, "loss": 0.04355852, "step": 2881 }, { "epoch": 5.764, "grad_norm": 2.3361642360687256, "learning_rate": 2e-05, "loss": 0.06249002, "step": 2882 }, { "epoch": 5.766, "grad_norm": 1.160375952720642, "learning_rate": 2e-05, "loss": 0.0406298, "step": 2883 }, { "epoch": 5.768, "grad_norm": 1.023131012916565, "learning_rate": 2e-05, "loss": 0.03389404, "step": 2884 }, { "epoch": 5.77, "grad_norm": 0.9041054248809814, "learning_rate": 2e-05, "loss": 0.03502499, "step": 2885 }, { "epoch": 5.772, "grad_norm": 0.9861660599708557, "learning_rate": 2e-05, "loss": 0.04226292, "step": 2886 }, { "epoch": 5.774, "grad_norm": 1.1348789930343628, "learning_rate": 2e-05, "loss": 0.06125481, "step": 2887 }, { "epoch": 5.776, "grad_norm": 1.3225739002227783, "learning_rate": 2e-05, "loss": 0.04853552, "step": 2888 }, { "epoch": 5.7780000000000005, "grad_norm": 1.5088573694229126, "learning_rate": 2e-05, "loss": 0.04202688, "step": 2889 }, { "epoch": 5.78, "grad_norm": 1.4508006572723389, "learning_rate": 2e-05, "loss": 0.04638818, "step": 2890 }, { "epoch": 5.782, "grad_norm": 1.5032254457473755, "learning_rate": 2e-05, "loss": 0.06157321, "step": 2891 }, { "epoch": 5.784, "grad_norm": 0.944823145866394, "learning_rate": 2e-05, "loss": 0.03227479, "step": 2892 }, { "epoch": 5.786, "grad_norm": 2.0540854930877686, "learning_rate": 2e-05, "loss": 0.05460972, "step": 2893 }, { "epoch": 5.788, "grad_norm": 1.4161620140075684, "learning_rate": 2e-05, "loss": 0.03801805, "step": 2894 }, { "epoch": 5.79, "grad_norm": 1.3737353086471558, "learning_rate": 2e-05, "loss": 0.04621457, "step": 2895 }, { "epoch": 5.792, "grad_norm": 1.3665794134140015, "learning_rate": 2e-05, "loss": 0.03408165, "step": 2896 }, { "epoch": 5.7940000000000005, "grad_norm": 1.673358678817749, "learning_rate": 2e-05, "loss": 0.04911122, "step": 2897 }, { "epoch": 5.796, "grad_norm": 2.0860588550567627, "learning_rate": 2e-05, "loss": 0.06043109, "step": 2898 }, { "epoch": 5.798, "grad_norm": 1.0691807270050049, "learning_rate": 2e-05, "loss": 0.02785878, "step": 2899 }, { "epoch": 5.8, "grad_norm": 1.7366712093353271, "learning_rate": 2e-05, "loss": 0.03472973, "step": 2900 }, { "epoch": 5.802, "grad_norm": 1.7986063957214355, "learning_rate": 2e-05, "loss": 0.05627033, "step": 2901 }, { "epoch": 5.804, "grad_norm": 1.4205238819122314, "learning_rate": 2e-05, "loss": 0.03846651, "step": 2902 }, { "epoch": 5.806, "grad_norm": 1.3321818113327026, "learning_rate": 2e-05, "loss": 0.04455447, "step": 2903 }, { "epoch": 5.808, "grad_norm": 1.220061182975769, "learning_rate": 2e-05, "loss": 0.04368271, "step": 2904 }, { "epoch": 5.8100000000000005, "grad_norm": 1.1751261949539185, "learning_rate": 2e-05, "loss": 0.05241146, "step": 2905 }, { "epoch": 5.812, "grad_norm": 1.4119185209274292, "learning_rate": 2e-05, "loss": 0.04886335, "step": 2906 }, { "epoch": 5.814, "grad_norm": 1.2438604831695557, "learning_rate": 2e-05, "loss": 0.05454946, "step": 2907 }, { "epoch": 5.816, "grad_norm": 1.1836755275726318, "learning_rate": 2e-05, "loss": 0.04367521, "step": 2908 }, { "epoch": 5.818, "grad_norm": 1.185683250427246, "learning_rate": 2e-05, "loss": 0.05116605, "step": 2909 }, { "epoch": 5.82, "grad_norm": 1.1339044570922852, "learning_rate": 2e-05, "loss": 0.0302528, "step": 2910 }, { "epoch": 5.822, "grad_norm": 1.0865379571914673, "learning_rate": 2e-05, "loss": 0.035244, "step": 2911 }, { "epoch": 5.824, "grad_norm": 1.097998857498169, "learning_rate": 2e-05, "loss": 0.05083325, "step": 2912 }, { "epoch": 5.826, "grad_norm": 1.0054258108139038, "learning_rate": 2e-05, "loss": 0.03768182, "step": 2913 }, { "epoch": 5.828, "grad_norm": 1.1770591735839844, "learning_rate": 2e-05, "loss": 0.03336333, "step": 2914 }, { "epoch": 5.83, "grad_norm": 1.1220648288726807, "learning_rate": 2e-05, "loss": 0.04429811, "step": 2915 }, { "epoch": 5.832, "grad_norm": 1.0926729440689087, "learning_rate": 2e-05, "loss": 0.04578809, "step": 2916 }, { "epoch": 5.834, "grad_norm": 1.5484569072723389, "learning_rate": 2e-05, "loss": 0.03992088, "step": 2917 }, { "epoch": 5.836, "grad_norm": 1.4758068323135376, "learning_rate": 2e-05, "loss": 0.04368002, "step": 2918 }, { "epoch": 5.838, "grad_norm": 1.1719555854797363, "learning_rate": 2e-05, "loss": 0.04148347, "step": 2919 }, { "epoch": 5.84, "grad_norm": 1.7539640665054321, "learning_rate": 2e-05, "loss": 0.04905649, "step": 2920 }, { "epoch": 5.842, "grad_norm": 1.8233855962753296, "learning_rate": 2e-05, "loss": 0.04216454, "step": 2921 }, { "epoch": 5.844, "grad_norm": 1.4949369430541992, "learning_rate": 2e-05, "loss": 0.04457965, "step": 2922 }, { "epoch": 5.846, "grad_norm": 2.1406314373016357, "learning_rate": 2e-05, "loss": 0.05990191, "step": 2923 }, { "epoch": 5.848, "grad_norm": 1.448477029800415, "learning_rate": 2e-05, "loss": 0.0556497, "step": 2924 }, { "epoch": 5.85, "grad_norm": 1.9012106657028198, "learning_rate": 2e-05, "loss": 0.06153479, "step": 2925 }, { "epoch": 5.852, "grad_norm": 0.9102736115455627, "learning_rate": 2e-05, "loss": 0.03957338, "step": 2926 }, { "epoch": 5.854, "grad_norm": 1.3240022659301758, "learning_rate": 2e-05, "loss": 0.06046633, "step": 2927 }, { "epoch": 5.856, "grad_norm": 1.0060014724731445, "learning_rate": 2e-05, "loss": 0.03977562, "step": 2928 }, { "epoch": 5.858, "grad_norm": 1.3019710779190063, "learning_rate": 2e-05, "loss": 0.02535181, "step": 2929 }, { "epoch": 5.86, "grad_norm": 1.0640290975570679, "learning_rate": 2e-05, "loss": 0.04199129, "step": 2930 }, { "epoch": 5.862, "grad_norm": 1.9135032892227173, "learning_rate": 2e-05, "loss": 0.04243597, "step": 2931 }, { "epoch": 5.864, "grad_norm": 1.420491337776184, "learning_rate": 2e-05, "loss": 0.05045445, "step": 2932 }, { "epoch": 5.866, "grad_norm": 1.1001486778259277, "learning_rate": 2e-05, "loss": 0.04370749, "step": 2933 }, { "epoch": 5.868, "grad_norm": 1.5934385061264038, "learning_rate": 2e-05, "loss": 0.03703097, "step": 2934 }, { "epoch": 5.87, "grad_norm": 1.1814768314361572, "learning_rate": 2e-05, "loss": 0.04241617, "step": 2935 }, { "epoch": 5.872, "grad_norm": 0.6100843548774719, "learning_rate": 2e-05, "loss": 0.01979692, "step": 2936 }, { "epoch": 5.874, "grad_norm": 1.0642284154891968, "learning_rate": 2e-05, "loss": 0.05003105, "step": 2937 }, { "epoch": 5.876, "grad_norm": 0.8967114090919495, "learning_rate": 2e-05, "loss": 0.04204514, "step": 2938 }, { "epoch": 5.878, "grad_norm": 1.170918345451355, "learning_rate": 2e-05, "loss": 0.05277304, "step": 2939 }, { "epoch": 5.88, "grad_norm": 1.0849500894546509, "learning_rate": 2e-05, "loss": 0.03190074, "step": 2940 }, { "epoch": 5.882, "grad_norm": 1.2641491889953613, "learning_rate": 2e-05, "loss": 0.04309519, "step": 2941 }, { "epoch": 5.884, "grad_norm": 1.0766750574111938, "learning_rate": 2e-05, "loss": 0.04164543, "step": 2942 }, { "epoch": 5.886, "grad_norm": 0.8320326209068298, "learning_rate": 2e-05, "loss": 0.03146955, "step": 2943 }, { "epoch": 5.888, "grad_norm": 1.118658423423767, "learning_rate": 2e-05, "loss": 0.03715554, "step": 2944 }, { "epoch": 5.89, "grad_norm": 0.9736190438270569, "learning_rate": 2e-05, "loss": 0.03481939, "step": 2945 }, { "epoch": 5.892, "grad_norm": 0.9873157143592834, "learning_rate": 2e-05, "loss": 0.03754682, "step": 2946 }, { "epoch": 5.894, "grad_norm": 1.4703787565231323, "learning_rate": 2e-05, "loss": 0.05178484, "step": 2947 }, { "epoch": 5.896, "grad_norm": 1.4210960865020752, "learning_rate": 2e-05, "loss": 0.0428045, "step": 2948 }, { "epoch": 5.898, "grad_norm": 1.6248009204864502, "learning_rate": 2e-05, "loss": 0.02344857, "step": 2949 }, { "epoch": 5.9, "grad_norm": 0.8838896751403809, "learning_rate": 2e-05, "loss": 0.03207851, "step": 2950 }, { "epoch": 5.902, "grad_norm": 2.1775131225585938, "learning_rate": 2e-05, "loss": 0.04790495, "step": 2951 }, { "epoch": 5.904, "grad_norm": 1.3693369626998901, "learning_rate": 2e-05, "loss": 0.03549768, "step": 2952 }, { "epoch": 5.906, "grad_norm": 1.6582708358764648, "learning_rate": 2e-05, "loss": 0.03871614, "step": 2953 }, { "epoch": 5.908, "grad_norm": 1.4673857688903809, "learning_rate": 2e-05, "loss": 0.03315175, "step": 2954 }, { "epoch": 5.91, "grad_norm": 1.037751317024231, "learning_rate": 2e-05, "loss": 0.03130211, "step": 2955 }, { "epoch": 5.912, "grad_norm": 2.0079879760742188, "learning_rate": 2e-05, "loss": 0.05596152, "step": 2956 }, { "epoch": 5.914, "grad_norm": 1.0041816234588623, "learning_rate": 2e-05, "loss": 0.03196197, "step": 2957 }, { "epoch": 5.916, "grad_norm": 0.8064287304878235, "learning_rate": 2e-05, "loss": 0.02279401, "step": 2958 }, { "epoch": 5.918, "grad_norm": 2.16861629486084, "learning_rate": 2e-05, "loss": 0.067145, "step": 2959 }, { "epoch": 5.92, "grad_norm": 2.058781385421753, "learning_rate": 2e-05, "loss": 0.04492671, "step": 2960 }, { "epoch": 5.922, "grad_norm": 0.7367409467697144, "learning_rate": 2e-05, "loss": 0.02037537, "step": 2961 }, { "epoch": 5.924, "grad_norm": 1.9977240562438965, "learning_rate": 2e-05, "loss": 0.06307687, "step": 2962 }, { "epoch": 5.926, "grad_norm": 1.6586973667144775, "learning_rate": 2e-05, "loss": 0.05629431, "step": 2963 }, { "epoch": 5.928, "grad_norm": 1.842704176902771, "learning_rate": 2e-05, "loss": 0.05144266, "step": 2964 }, { "epoch": 5.93, "grad_norm": 0.9082912802696228, "learning_rate": 2e-05, "loss": 0.03780605, "step": 2965 }, { "epoch": 5.932, "grad_norm": 0.9635425209999084, "learning_rate": 2e-05, "loss": 0.02591936, "step": 2966 }, { "epoch": 5.934, "grad_norm": 1.1012859344482422, "learning_rate": 2e-05, "loss": 0.04033398, "step": 2967 }, { "epoch": 5.936, "grad_norm": 0.8882836699485779, "learning_rate": 2e-05, "loss": 0.03119293, "step": 2968 }, { "epoch": 5.938, "grad_norm": 1.564009189605713, "learning_rate": 2e-05, "loss": 0.04491838, "step": 2969 }, { "epoch": 5.9399999999999995, "grad_norm": 2.2292184829711914, "learning_rate": 2e-05, "loss": 0.05703265, "step": 2970 }, { "epoch": 5.942, "grad_norm": 1.0735360383987427, "learning_rate": 2e-05, "loss": 0.03565111, "step": 2971 }, { "epoch": 5.944, "grad_norm": 1.2419227361679077, "learning_rate": 2e-05, "loss": 0.03339553, "step": 2972 }, { "epoch": 5.946, "grad_norm": 1.0682003498077393, "learning_rate": 2e-05, "loss": 0.0435975, "step": 2973 }, { "epoch": 5.948, "grad_norm": 1.4168179035186768, "learning_rate": 2e-05, "loss": 0.03408816, "step": 2974 }, { "epoch": 5.95, "grad_norm": 1.324600338935852, "learning_rate": 2e-05, "loss": 0.0291413, "step": 2975 }, { "epoch": 5.952, "grad_norm": 1.6872317790985107, "learning_rate": 2e-05, "loss": 0.04608905, "step": 2976 }, { "epoch": 5.954, "grad_norm": 1.3927865028381348, "learning_rate": 2e-05, "loss": 0.0372963, "step": 2977 }, { "epoch": 5.9559999999999995, "grad_norm": 1.4962786436080933, "learning_rate": 2e-05, "loss": 0.03412071, "step": 2978 }, { "epoch": 5.958, "grad_norm": 0.9910528063774109, "learning_rate": 2e-05, "loss": 0.03533617, "step": 2979 }, { "epoch": 5.96, "grad_norm": 1.7080367803573608, "learning_rate": 2e-05, "loss": 0.05476376, "step": 2980 }, { "epoch": 5.962, "grad_norm": 1.7132834196090698, "learning_rate": 2e-05, "loss": 0.04628608, "step": 2981 }, { "epoch": 5.964, "grad_norm": 1.1088155508041382, "learning_rate": 2e-05, "loss": 0.03366048, "step": 2982 }, { "epoch": 5.966, "grad_norm": 1.736371397972107, "learning_rate": 2e-05, "loss": 0.06801055, "step": 2983 }, { "epoch": 5.968, "grad_norm": 1.1367436647415161, "learning_rate": 2e-05, "loss": 0.03756493, "step": 2984 }, { "epoch": 5.97, "grad_norm": 3.5082836151123047, "learning_rate": 2e-05, "loss": 0.04000496, "step": 2985 }, { "epoch": 5.9719999999999995, "grad_norm": 1.375747799873352, "learning_rate": 2e-05, "loss": 0.04657515, "step": 2986 }, { "epoch": 5.974, "grad_norm": 1.4611024856567383, "learning_rate": 2e-05, "loss": 0.03940439, "step": 2987 }, { "epoch": 5.976, "grad_norm": 1.1854066848754883, "learning_rate": 2e-05, "loss": 0.03805132, "step": 2988 }, { "epoch": 5.978, "grad_norm": 2.021512746810913, "learning_rate": 2e-05, "loss": 0.04191308, "step": 2989 }, { "epoch": 5.98, "grad_norm": 2.109743356704712, "learning_rate": 2e-05, "loss": 0.05224823, "step": 2990 }, { "epoch": 5.982, "grad_norm": 1.127806544303894, "learning_rate": 2e-05, "loss": 0.03519157, "step": 2991 }, { "epoch": 5.984, "grad_norm": 1.0197484493255615, "learning_rate": 2e-05, "loss": 0.03518543, "step": 2992 }, { "epoch": 5.986, "grad_norm": 2.16267728805542, "learning_rate": 2e-05, "loss": 0.07302199, "step": 2993 }, { "epoch": 5.9879999999999995, "grad_norm": 1.092279314994812, "learning_rate": 2e-05, "loss": 0.03761041, "step": 2994 }, { "epoch": 5.99, "grad_norm": 1.807556390762329, "learning_rate": 2e-05, "loss": 0.04661435, "step": 2995 }, { "epoch": 5.992, "grad_norm": 1.6824204921722412, "learning_rate": 2e-05, "loss": 0.04034545, "step": 2996 }, { "epoch": 5.994, "grad_norm": 1.7178198099136353, "learning_rate": 2e-05, "loss": 0.05870678, "step": 2997 }, { "epoch": 5.996, "grad_norm": 1.335422396659851, "learning_rate": 2e-05, "loss": 0.03846036, "step": 2998 }, { "epoch": 5.998, "grad_norm": 1.3994457721710205, "learning_rate": 2e-05, "loss": 0.05020154, "step": 2999 }, { "epoch": 6.0, "grad_norm": 1.4923213720321655, "learning_rate": 2e-05, "loss": 0.05444247, "step": 3000 }, { "epoch": 6.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.998, "AngleClassification_3": 0.8702594810379242, "Equal_1": 0.968, "Equal_2": 0.846307385229541, "Equal_3": 0.7784431137724551, "LineComparison_1": 1.0, "LineComparison_2": 0.9860279441117764, "LineComparison_3": 0.9660678642714571, "Parallel_1": 0.9779559118236473, "Parallel_2": 0.9779559118236473, "Parallel_3": 0.812, "Perpendicular_1": 0.972, "Perpendicular_2": 0.714, "Perpendicular_3": 0.24949899799599198, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.9716, "PointLiesOnLine_1": 0.9859719438877755, "PointLiesOnLine_2": 0.9839679358717435, "PointLiesOnLine_3": 0.7425149700598802 }, "eval_runtime": 225.1043, "eval_samples_per_second": 46.645, "eval_steps_per_second": 0.933, "step": 3000 }, { "epoch": 6.002, "grad_norm": 1.3161890506744385, "learning_rate": 2e-05, "loss": 0.04378459, "step": 3001 }, { "epoch": 6.004, "grad_norm": 1.2961128950119019, "learning_rate": 2e-05, "loss": 0.05367633, "step": 3002 }, { "epoch": 6.006, "grad_norm": 1.0465567111968994, "learning_rate": 2e-05, "loss": 0.04422379, "step": 3003 }, { "epoch": 6.008, "grad_norm": 1.5291719436645508, "learning_rate": 2e-05, "loss": 0.04696681, "step": 3004 }, { "epoch": 6.01, "grad_norm": 1.3451310396194458, "learning_rate": 2e-05, "loss": 0.05145978, "step": 3005 }, { "epoch": 6.012, "grad_norm": 1.1946146488189697, "learning_rate": 2e-05, "loss": 0.04929256, "step": 3006 }, { "epoch": 6.014, "grad_norm": 1.94503653049469, "learning_rate": 2e-05, "loss": 0.04044854, "step": 3007 }, { "epoch": 6.016, "grad_norm": 1.1857857704162598, "learning_rate": 2e-05, "loss": 0.0375718, "step": 3008 }, { "epoch": 6.018, "grad_norm": 1.631342887878418, "learning_rate": 2e-05, "loss": 0.05159768, "step": 3009 }, { "epoch": 6.02, "grad_norm": 1.0188089609146118, "learning_rate": 2e-05, "loss": 0.03937045, "step": 3010 }, { "epoch": 6.022, "grad_norm": 1.3660142421722412, "learning_rate": 2e-05, "loss": 0.05680571, "step": 3011 }, { "epoch": 6.024, "grad_norm": 1.3825116157531738, "learning_rate": 2e-05, "loss": 0.0641457, "step": 3012 }, { "epoch": 6.026, "grad_norm": 1.8643577098846436, "learning_rate": 2e-05, "loss": 0.06286463, "step": 3013 }, { "epoch": 6.028, "grad_norm": 1.1185054779052734, "learning_rate": 2e-05, "loss": 0.03633307, "step": 3014 }, { "epoch": 6.03, "grad_norm": 1.4574296474456787, "learning_rate": 2e-05, "loss": 0.05417381, "step": 3015 }, { "epoch": 6.032, "grad_norm": 1.3509869575500488, "learning_rate": 2e-05, "loss": 0.04900474, "step": 3016 }, { "epoch": 6.034, "grad_norm": 0.9867383241653442, "learning_rate": 2e-05, "loss": 0.03862732, "step": 3017 }, { "epoch": 6.036, "grad_norm": 1.2692068815231323, "learning_rate": 2e-05, "loss": 0.04914217, "step": 3018 }, { "epoch": 6.038, "grad_norm": 1.7467186450958252, "learning_rate": 2e-05, "loss": 0.04507627, "step": 3019 }, { "epoch": 6.04, "grad_norm": 1.4998663663864136, "learning_rate": 2e-05, "loss": 0.05345377, "step": 3020 }, { "epoch": 6.042, "grad_norm": 1.582988977432251, "learning_rate": 2e-05, "loss": 0.04472338, "step": 3021 }, { "epoch": 6.044, "grad_norm": 1.3374141454696655, "learning_rate": 2e-05, "loss": 0.04722141, "step": 3022 }, { "epoch": 6.046, "grad_norm": 1.4675813913345337, "learning_rate": 2e-05, "loss": 0.06187618, "step": 3023 }, { "epoch": 6.048, "grad_norm": 1.677485704421997, "learning_rate": 2e-05, "loss": 0.05583868, "step": 3024 }, { "epoch": 6.05, "grad_norm": 1.0209105014801025, "learning_rate": 2e-05, "loss": 0.03524206, "step": 3025 }, { "epoch": 6.052, "grad_norm": 1.5814648866653442, "learning_rate": 2e-05, "loss": 0.04622476, "step": 3026 }, { "epoch": 6.054, "grad_norm": 1.5755735635757446, "learning_rate": 2e-05, "loss": 0.0479579, "step": 3027 }, { "epoch": 6.056, "grad_norm": 1.281966209411621, "learning_rate": 2e-05, "loss": 0.03380118, "step": 3028 }, { "epoch": 6.058, "grad_norm": 1.0457772016525269, "learning_rate": 2e-05, "loss": 0.04000027, "step": 3029 }, { "epoch": 6.06, "grad_norm": 1.3684334754943848, "learning_rate": 2e-05, "loss": 0.04558077, "step": 3030 }, { "epoch": 6.062, "grad_norm": 1.2864797115325928, "learning_rate": 2e-05, "loss": 0.04938862, "step": 3031 }, { "epoch": 6.064, "grad_norm": 1.3729755878448486, "learning_rate": 2e-05, "loss": 0.03566775, "step": 3032 }, { "epoch": 6.066, "grad_norm": 1.832397222518921, "learning_rate": 2e-05, "loss": 0.05182783, "step": 3033 }, { "epoch": 6.068, "grad_norm": 1.470686674118042, "learning_rate": 2e-05, "loss": 0.04592628, "step": 3034 }, { "epoch": 6.07, "grad_norm": 0.8238653540611267, "learning_rate": 2e-05, "loss": 0.03109474, "step": 3035 }, { "epoch": 6.072, "grad_norm": 1.6109676361083984, "learning_rate": 2e-05, "loss": 0.0458492, "step": 3036 }, { "epoch": 6.074, "grad_norm": 1.1719423532485962, "learning_rate": 2e-05, "loss": 0.04321983, "step": 3037 }, { "epoch": 6.076, "grad_norm": 1.3623055219650269, "learning_rate": 2e-05, "loss": 0.04440439, "step": 3038 }, { "epoch": 6.078, "grad_norm": 0.9973179697990417, "learning_rate": 2e-05, "loss": 0.04166106, "step": 3039 }, { "epoch": 6.08, "grad_norm": 0.8681755065917969, "learning_rate": 2e-05, "loss": 0.03179272, "step": 3040 }, { "epoch": 6.082, "grad_norm": 1.4890780448913574, "learning_rate": 2e-05, "loss": 0.05809349, "step": 3041 }, { "epoch": 6.084, "grad_norm": 1.4804933071136475, "learning_rate": 2e-05, "loss": 0.04718833, "step": 3042 }, { "epoch": 6.086, "grad_norm": 0.845701277256012, "learning_rate": 2e-05, "loss": 0.0248522, "step": 3043 }, { "epoch": 6.088, "grad_norm": 1.4337422847747803, "learning_rate": 2e-05, "loss": 0.04450101, "step": 3044 }, { "epoch": 6.09, "grad_norm": 1.2754102945327759, "learning_rate": 2e-05, "loss": 0.04626191, "step": 3045 }, { "epoch": 6.092, "grad_norm": 1.0614118576049805, "learning_rate": 2e-05, "loss": 0.05025931, "step": 3046 }, { "epoch": 6.094, "grad_norm": 1.2671074867248535, "learning_rate": 2e-05, "loss": 0.04070025, "step": 3047 }, { "epoch": 6.096, "grad_norm": 1.440001130104065, "learning_rate": 2e-05, "loss": 0.05203547, "step": 3048 }, { "epoch": 6.098, "grad_norm": 0.9908827543258667, "learning_rate": 2e-05, "loss": 0.04474022, "step": 3049 }, { "epoch": 6.1, "grad_norm": 1.2202837467193604, "learning_rate": 2e-05, "loss": 0.04143565, "step": 3050 }, { "epoch": 6.102, "grad_norm": 1.6719521284103394, "learning_rate": 2e-05, "loss": 0.04686677, "step": 3051 }, { "epoch": 6.104, "grad_norm": 1.2258193492889404, "learning_rate": 2e-05, "loss": 0.05522996, "step": 3052 }, { "epoch": 6.106, "grad_norm": 1.479974389076233, "learning_rate": 2e-05, "loss": 0.04719516, "step": 3053 }, { "epoch": 6.108, "grad_norm": 1.53831148147583, "learning_rate": 2e-05, "loss": 0.04133063, "step": 3054 }, { "epoch": 6.11, "grad_norm": 1.8587276935577393, "learning_rate": 2e-05, "loss": 0.03950656, "step": 3055 }, { "epoch": 6.112, "grad_norm": 1.272857904434204, "learning_rate": 2e-05, "loss": 0.03740232, "step": 3056 }, { "epoch": 6.114, "grad_norm": 2.306462526321411, "learning_rate": 2e-05, "loss": 0.05091295, "step": 3057 }, { "epoch": 6.116, "grad_norm": 1.4783159494400024, "learning_rate": 2e-05, "loss": 0.05756344, "step": 3058 }, { "epoch": 6.118, "grad_norm": 0.8899611830711365, "learning_rate": 2e-05, "loss": 0.02739152, "step": 3059 }, { "epoch": 6.12, "grad_norm": 1.0665615797042847, "learning_rate": 2e-05, "loss": 0.04276315, "step": 3060 }, { "epoch": 6.122, "grad_norm": 1.842308521270752, "learning_rate": 2e-05, "loss": 0.05516462, "step": 3061 }, { "epoch": 6.124, "grad_norm": 1.257582187652588, "learning_rate": 2e-05, "loss": 0.03166568, "step": 3062 }, { "epoch": 6.126, "grad_norm": 0.9712730646133423, "learning_rate": 2e-05, "loss": 0.03660333, "step": 3063 }, { "epoch": 6.128, "grad_norm": 1.440290093421936, "learning_rate": 2e-05, "loss": 0.04885936, "step": 3064 }, { "epoch": 6.13, "grad_norm": 1.5558891296386719, "learning_rate": 2e-05, "loss": 0.05717931, "step": 3065 }, { "epoch": 6.132, "grad_norm": 1.5532346963882446, "learning_rate": 2e-05, "loss": 0.04880936, "step": 3066 }, { "epoch": 6.134, "grad_norm": 2.02591609954834, "learning_rate": 2e-05, "loss": 0.0564698, "step": 3067 }, { "epoch": 6.136, "grad_norm": 1.334431767463684, "learning_rate": 2e-05, "loss": 0.03501514, "step": 3068 }, { "epoch": 6.138, "grad_norm": 1.788008213043213, "learning_rate": 2e-05, "loss": 0.034972, "step": 3069 }, { "epoch": 6.14, "grad_norm": 1.317406415939331, "learning_rate": 2e-05, "loss": 0.04335321, "step": 3070 }, { "epoch": 6.142, "grad_norm": 1.169618844985962, "learning_rate": 2e-05, "loss": 0.04345078, "step": 3071 }, { "epoch": 6.144, "grad_norm": 1.5153645277023315, "learning_rate": 2e-05, "loss": 0.03565796, "step": 3072 }, { "epoch": 6.146, "grad_norm": 0.9811383485794067, "learning_rate": 2e-05, "loss": 0.04565081, "step": 3073 }, { "epoch": 6.148, "grad_norm": 2.310459613800049, "learning_rate": 2e-05, "loss": 0.058357, "step": 3074 }, { "epoch": 6.15, "grad_norm": 1.2713453769683838, "learning_rate": 2e-05, "loss": 0.03925248, "step": 3075 }, { "epoch": 6.152, "grad_norm": 1.417584776878357, "learning_rate": 2e-05, "loss": 0.04796022, "step": 3076 }, { "epoch": 6.154, "grad_norm": 1.4617929458618164, "learning_rate": 2e-05, "loss": 0.05526109, "step": 3077 }, { "epoch": 6.156, "grad_norm": 2.2081918716430664, "learning_rate": 2e-05, "loss": 0.05006688, "step": 3078 }, { "epoch": 6.158, "grad_norm": 1.0381126403808594, "learning_rate": 2e-05, "loss": 0.04281139, "step": 3079 }, { "epoch": 6.16, "grad_norm": 1.2815098762512207, "learning_rate": 2e-05, "loss": 0.04445343, "step": 3080 }, { "epoch": 6.162, "grad_norm": 1.4426833391189575, "learning_rate": 2e-05, "loss": 0.04177586, "step": 3081 }, { "epoch": 6.164, "grad_norm": 0.9423404335975647, "learning_rate": 2e-05, "loss": 0.04299556, "step": 3082 }, { "epoch": 6.166, "grad_norm": 0.9810669422149658, "learning_rate": 2e-05, "loss": 0.03550537, "step": 3083 }, { "epoch": 6.168, "grad_norm": 1.547959566116333, "learning_rate": 2e-05, "loss": 0.05556474, "step": 3084 }, { "epoch": 6.17, "grad_norm": 1.1658647060394287, "learning_rate": 2e-05, "loss": 0.03783731, "step": 3085 }, { "epoch": 6.172, "grad_norm": 0.9328312873840332, "learning_rate": 2e-05, "loss": 0.02851342, "step": 3086 }, { "epoch": 6.174, "grad_norm": 1.4125739336013794, "learning_rate": 2e-05, "loss": 0.04720511, "step": 3087 }, { "epoch": 6.176, "grad_norm": 1.7162665128707886, "learning_rate": 2e-05, "loss": 0.05028572, "step": 3088 }, { "epoch": 6.178, "grad_norm": 1.5587931871414185, "learning_rate": 2e-05, "loss": 0.04898805, "step": 3089 }, { "epoch": 6.18, "grad_norm": 1.1429084539413452, "learning_rate": 2e-05, "loss": 0.04541856, "step": 3090 }, { "epoch": 6.182, "grad_norm": 1.3425962924957275, "learning_rate": 2e-05, "loss": 0.04894719, "step": 3091 }, { "epoch": 6.184, "grad_norm": 1.0190035104751587, "learning_rate": 2e-05, "loss": 0.03490899, "step": 3092 }, { "epoch": 6.186, "grad_norm": 1.7610735893249512, "learning_rate": 2e-05, "loss": 0.04846447, "step": 3093 }, { "epoch": 6.188, "grad_norm": 1.1729168891906738, "learning_rate": 2e-05, "loss": 0.03721018, "step": 3094 }, { "epoch": 6.19, "grad_norm": 1.3536039590835571, "learning_rate": 2e-05, "loss": 0.05594255, "step": 3095 }, { "epoch": 6.192, "grad_norm": 1.0234556198120117, "learning_rate": 2e-05, "loss": 0.04579875, "step": 3096 }, { "epoch": 6.194, "grad_norm": 0.9043900370597839, "learning_rate": 2e-05, "loss": 0.03340976, "step": 3097 }, { "epoch": 6.196, "grad_norm": 0.8678377866744995, "learning_rate": 2e-05, "loss": 0.02561942, "step": 3098 }, { "epoch": 6.198, "grad_norm": 1.6659561395645142, "learning_rate": 2e-05, "loss": 0.04909433, "step": 3099 }, { "epoch": 6.2, "grad_norm": 1.3002934455871582, "learning_rate": 2e-05, "loss": 0.03873566, "step": 3100 }, { "epoch": 6.202, "grad_norm": 2.3957648277282715, "learning_rate": 2e-05, "loss": 0.06940307, "step": 3101 }, { "epoch": 6.204, "grad_norm": 1.047159194946289, "learning_rate": 2e-05, "loss": 0.03468559, "step": 3102 }, { "epoch": 6.206, "grad_norm": 1.5703145265579224, "learning_rate": 2e-05, "loss": 0.04476855, "step": 3103 }, { "epoch": 6.208, "grad_norm": 1.143730878829956, "learning_rate": 2e-05, "loss": 0.0371956, "step": 3104 }, { "epoch": 6.21, "grad_norm": 0.904877781867981, "learning_rate": 2e-05, "loss": 0.03321954, "step": 3105 }, { "epoch": 6.212, "grad_norm": 1.137384295463562, "learning_rate": 2e-05, "loss": 0.03107522, "step": 3106 }, { "epoch": 6.214, "grad_norm": 0.7291117310523987, "learning_rate": 2e-05, "loss": 0.01996266, "step": 3107 }, { "epoch": 6.216, "grad_norm": 1.6794525384902954, "learning_rate": 2e-05, "loss": 0.06432796, "step": 3108 }, { "epoch": 6.218, "grad_norm": 1.8913358449935913, "learning_rate": 2e-05, "loss": 0.04582284, "step": 3109 }, { "epoch": 6.22, "grad_norm": 1.0575504302978516, "learning_rate": 2e-05, "loss": 0.03536428, "step": 3110 }, { "epoch": 6.222, "grad_norm": 1.417241096496582, "learning_rate": 2e-05, "loss": 0.03927748, "step": 3111 }, { "epoch": 6.224, "grad_norm": 1.2141749858856201, "learning_rate": 2e-05, "loss": 0.04373081, "step": 3112 }, { "epoch": 6.226, "grad_norm": 1.2671862840652466, "learning_rate": 2e-05, "loss": 0.03179625, "step": 3113 }, { "epoch": 6.228, "grad_norm": 1.364601731300354, "learning_rate": 2e-05, "loss": 0.03070304, "step": 3114 }, { "epoch": 6.23, "grad_norm": 0.9407054781913757, "learning_rate": 2e-05, "loss": 0.02954445, "step": 3115 }, { "epoch": 6.232, "grad_norm": 1.8013519048690796, "learning_rate": 2e-05, "loss": 0.04315212, "step": 3116 }, { "epoch": 6.234, "grad_norm": 2.4341020584106445, "learning_rate": 2e-05, "loss": 0.06439336, "step": 3117 }, { "epoch": 6.236, "grad_norm": 1.2608367204666138, "learning_rate": 2e-05, "loss": 0.04622247, "step": 3118 }, { "epoch": 6.2379999999999995, "grad_norm": 1.26650071144104, "learning_rate": 2e-05, "loss": 0.0406836, "step": 3119 }, { "epoch": 6.24, "grad_norm": 1.2908011674880981, "learning_rate": 2e-05, "loss": 0.05191736, "step": 3120 }, { "epoch": 6.242, "grad_norm": 1.1509265899658203, "learning_rate": 2e-05, "loss": 0.04590309, "step": 3121 }, { "epoch": 6.244, "grad_norm": 1.4848324060440063, "learning_rate": 2e-05, "loss": 0.05465535, "step": 3122 }, { "epoch": 6.246, "grad_norm": 1.3963325023651123, "learning_rate": 2e-05, "loss": 0.04458882, "step": 3123 }, { "epoch": 6.248, "grad_norm": 1.5089930295944214, "learning_rate": 2e-05, "loss": 0.05655834, "step": 3124 }, { "epoch": 6.25, "grad_norm": 0.9687227010726929, "learning_rate": 2e-05, "loss": 0.03655609, "step": 3125 }, { "epoch": 6.252, "grad_norm": 0.9722353219985962, "learning_rate": 2e-05, "loss": 0.03613928, "step": 3126 }, { "epoch": 6.254, "grad_norm": 1.3469654321670532, "learning_rate": 2e-05, "loss": 0.04081573, "step": 3127 }, { "epoch": 6.256, "grad_norm": 1.3990508317947388, "learning_rate": 2e-05, "loss": 0.04836226, "step": 3128 }, { "epoch": 6.258, "grad_norm": 1.209796667098999, "learning_rate": 2e-05, "loss": 0.0362431, "step": 3129 }, { "epoch": 6.26, "grad_norm": 1.1029555797576904, "learning_rate": 2e-05, "loss": 0.03270067, "step": 3130 }, { "epoch": 6.2620000000000005, "grad_norm": 1.240614891052246, "learning_rate": 2e-05, "loss": 0.04148632, "step": 3131 }, { "epoch": 6.264, "grad_norm": 1.2987496852874756, "learning_rate": 2e-05, "loss": 0.04694106, "step": 3132 }, { "epoch": 6.266, "grad_norm": 1.1305968761444092, "learning_rate": 2e-05, "loss": 0.03982592, "step": 3133 }, { "epoch": 6.268, "grad_norm": 1.9024533033370972, "learning_rate": 2e-05, "loss": 0.06878211, "step": 3134 }, { "epoch": 6.27, "grad_norm": 1.1951837539672852, "learning_rate": 2e-05, "loss": 0.04061879, "step": 3135 }, { "epoch": 6.272, "grad_norm": 1.137134075164795, "learning_rate": 2e-05, "loss": 0.0351263, "step": 3136 }, { "epoch": 6.274, "grad_norm": 1.5089586973190308, "learning_rate": 2e-05, "loss": 0.05485439, "step": 3137 }, { "epoch": 6.276, "grad_norm": 1.51993727684021, "learning_rate": 2e-05, "loss": 0.03410707, "step": 3138 }, { "epoch": 6.2780000000000005, "grad_norm": 1.3345667123794556, "learning_rate": 2e-05, "loss": 0.05253769, "step": 3139 }, { "epoch": 6.28, "grad_norm": 1.496768832206726, "learning_rate": 2e-05, "loss": 0.05014579, "step": 3140 }, { "epoch": 6.282, "grad_norm": 1.8037397861480713, "learning_rate": 2e-05, "loss": 0.05037682, "step": 3141 }, { "epoch": 6.284, "grad_norm": 1.6372870206832886, "learning_rate": 2e-05, "loss": 0.04796243, "step": 3142 }, { "epoch": 6.286, "grad_norm": 1.4426953792572021, "learning_rate": 2e-05, "loss": 0.04980314, "step": 3143 }, { "epoch": 6.288, "grad_norm": 1.0515247583389282, "learning_rate": 2e-05, "loss": 0.0424369, "step": 3144 }, { "epoch": 6.29, "grad_norm": 1.1916460990905762, "learning_rate": 2e-05, "loss": 0.03924889, "step": 3145 }, { "epoch": 6.292, "grad_norm": 1.3459762334823608, "learning_rate": 2e-05, "loss": 0.0479975, "step": 3146 }, { "epoch": 6.294, "grad_norm": 1.4435149431228638, "learning_rate": 2e-05, "loss": 0.03353125, "step": 3147 }, { "epoch": 6.296, "grad_norm": 0.9942042827606201, "learning_rate": 2e-05, "loss": 0.04053356, "step": 3148 }, { "epoch": 6.298, "grad_norm": 1.0446157455444336, "learning_rate": 2e-05, "loss": 0.03388612, "step": 3149 }, { "epoch": 6.3, "grad_norm": 1.5248451232910156, "learning_rate": 2e-05, "loss": 0.04546296, "step": 3150 }, { "epoch": 6.302, "grad_norm": 0.9659856557846069, "learning_rate": 2e-05, "loss": 0.03362028, "step": 3151 }, { "epoch": 6.304, "grad_norm": 1.2279140949249268, "learning_rate": 2e-05, "loss": 0.03981371, "step": 3152 }, { "epoch": 6.306, "grad_norm": 0.8475239276885986, "learning_rate": 2e-05, "loss": 0.03402438, "step": 3153 }, { "epoch": 6.308, "grad_norm": 1.4898488521575928, "learning_rate": 2e-05, "loss": 0.06024408, "step": 3154 }, { "epoch": 6.31, "grad_norm": 0.7652701735496521, "learning_rate": 2e-05, "loss": 0.02798587, "step": 3155 }, { "epoch": 6.312, "grad_norm": 1.4900647401809692, "learning_rate": 2e-05, "loss": 0.04824045, "step": 3156 }, { "epoch": 6.314, "grad_norm": 1.5422735214233398, "learning_rate": 2e-05, "loss": 0.05436178, "step": 3157 }, { "epoch": 6.316, "grad_norm": 1.217408299446106, "learning_rate": 2e-05, "loss": 0.03753351, "step": 3158 }, { "epoch": 6.318, "grad_norm": 1.004137635231018, "learning_rate": 2e-05, "loss": 0.03109561, "step": 3159 }, { "epoch": 6.32, "grad_norm": 1.479217529296875, "learning_rate": 2e-05, "loss": 0.0453066, "step": 3160 }, { "epoch": 6.322, "grad_norm": 1.092153549194336, "learning_rate": 2e-05, "loss": 0.03759048, "step": 3161 }, { "epoch": 6.324, "grad_norm": 1.261121153831482, "learning_rate": 2e-05, "loss": 0.05244471, "step": 3162 }, { "epoch": 6.326, "grad_norm": 1.0578536987304688, "learning_rate": 2e-05, "loss": 0.04246055, "step": 3163 }, { "epoch": 6.328, "grad_norm": 1.104163408279419, "learning_rate": 2e-05, "loss": 0.04848431, "step": 3164 }, { "epoch": 6.33, "grad_norm": 1.1136482954025269, "learning_rate": 2e-05, "loss": 0.03464284, "step": 3165 }, { "epoch": 6.332, "grad_norm": 0.8674597144126892, "learning_rate": 2e-05, "loss": 0.03293711, "step": 3166 }, { "epoch": 6.334, "grad_norm": 1.517189621925354, "learning_rate": 2e-05, "loss": 0.05445864, "step": 3167 }, { "epoch": 6.336, "grad_norm": 1.1480637788772583, "learning_rate": 2e-05, "loss": 0.03713479, "step": 3168 }, { "epoch": 6.338, "grad_norm": 1.6498615741729736, "learning_rate": 2e-05, "loss": 0.03739785, "step": 3169 }, { "epoch": 6.34, "grad_norm": 1.8390711545944214, "learning_rate": 2e-05, "loss": 0.05097878, "step": 3170 }, { "epoch": 6.342, "grad_norm": 1.4589474201202393, "learning_rate": 2e-05, "loss": 0.05498251, "step": 3171 }, { "epoch": 6.344, "grad_norm": 1.8099794387817383, "learning_rate": 2e-05, "loss": 0.04620732, "step": 3172 }, { "epoch": 6.346, "grad_norm": 0.8821712732315063, "learning_rate": 2e-05, "loss": 0.02499045, "step": 3173 }, { "epoch": 6.348, "grad_norm": 0.9748029708862305, "learning_rate": 2e-05, "loss": 0.02997813, "step": 3174 }, { "epoch": 6.35, "grad_norm": 2.5794260501861572, "learning_rate": 2e-05, "loss": 0.04815774, "step": 3175 }, { "epoch": 6.352, "grad_norm": 1.745931625366211, "learning_rate": 2e-05, "loss": 0.04260145, "step": 3176 }, { "epoch": 6.354, "grad_norm": 2.080000877380371, "learning_rate": 2e-05, "loss": 0.03944608, "step": 3177 }, { "epoch": 6.356, "grad_norm": 1.117545247077942, "learning_rate": 2e-05, "loss": 0.04236875, "step": 3178 }, { "epoch": 6.358, "grad_norm": 1.2376056909561157, "learning_rate": 2e-05, "loss": 0.03447882, "step": 3179 }, { "epoch": 6.36, "grad_norm": 1.7711291313171387, "learning_rate": 2e-05, "loss": 0.04368237, "step": 3180 }, { "epoch": 6.362, "grad_norm": 1.5781739950180054, "learning_rate": 2e-05, "loss": 0.0376658, "step": 3181 }, { "epoch": 6.364, "grad_norm": 1.5504977703094482, "learning_rate": 2e-05, "loss": 0.04475008, "step": 3182 }, { "epoch": 6.366, "grad_norm": 1.3855267763137817, "learning_rate": 2e-05, "loss": 0.0245607, "step": 3183 }, { "epoch": 6.368, "grad_norm": 1.345805287361145, "learning_rate": 2e-05, "loss": 0.05902901, "step": 3184 }, { "epoch": 6.37, "grad_norm": 1.2207070589065552, "learning_rate": 2e-05, "loss": 0.04775108, "step": 3185 }, { "epoch": 6.372, "grad_norm": 1.073164463043213, "learning_rate": 2e-05, "loss": 0.0471521, "step": 3186 }, { "epoch": 6.374, "grad_norm": 1.2355923652648926, "learning_rate": 2e-05, "loss": 0.04240716, "step": 3187 }, { "epoch": 6.376, "grad_norm": 1.0203465223312378, "learning_rate": 2e-05, "loss": 0.0405271, "step": 3188 }, { "epoch": 6.378, "grad_norm": 1.0130765438079834, "learning_rate": 2e-05, "loss": 0.03724421, "step": 3189 }, { "epoch": 6.38, "grad_norm": 0.9806060791015625, "learning_rate": 2e-05, "loss": 0.03374423, "step": 3190 }, { "epoch": 6.382, "grad_norm": 1.4668108224868774, "learning_rate": 2e-05, "loss": 0.05625939, "step": 3191 }, { "epoch": 6.384, "grad_norm": 1.9418399333953857, "learning_rate": 2e-05, "loss": 0.04299239, "step": 3192 }, { "epoch": 6.386, "grad_norm": 1.4132583141326904, "learning_rate": 2e-05, "loss": 0.07042644, "step": 3193 }, { "epoch": 6.388, "grad_norm": 1.2331854104995728, "learning_rate": 2e-05, "loss": 0.03946827, "step": 3194 }, { "epoch": 6.39, "grad_norm": 0.882416307926178, "learning_rate": 2e-05, "loss": 0.03792533, "step": 3195 }, { "epoch": 6.392, "grad_norm": 1.3268921375274658, "learning_rate": 2e-05, "loss": 0.04443733, "step": 3196 }, { "epoch": 6.394, "grad_norm": 1.2642195224761963, "learning_rate": 2e-05, "loss": 0.04089775, "step": 3197 }, { "epoch": 6.396, "grad_norm": 0.9844847321510315, "learning_rate": 2e-05, "loss": 0.03189672, "step": 3198 }, { "epoch": 6.398, "grad_norm": 0.9967857599258423, "learning_rate": 2e-05, "loss": 0.04067467, "step": 3199 }, { "epoch": 6.4, "grad_norm": 1.7642426490783691, "learning_rate": 2e-05, "loss": 0.05553395, "step": 3200 }, { "epoch": 6.402, "grad_norm": 1.2402091026306152, "learning_rate": 2e-05, "loss": 0.03991061, "step": 3201 }, { "epoch": 6.404, "grad_norm": 2.2273826599121094, "learning_rate": 2e-05, "loss": 0.07795635, "step": 3202 }, { "epoch": 6.406, "grad_norm": 0.9199107885360718, "learning_rate": 2e-05, "loss": 0.02727722, "step": 3203 }, { "epoch": 6.408, "grad_norm": 1.4984984397888184, "learning_rate": 2e-05, "loss": 0.04829284, "step": 3204 }, { "epoch": 6.41, "grad_norm": 1.4614923000335693, "learning_rate": 2e-05, "loss": 0.05470449, "step": 3205 }, { "epoch": 6.412, "grad_norm": 1.0658401250839233, "learning_rate": 2e-05, "loss": 0.04166016, "step": 3206 }, { "epoch": 6.414, "grad_norm": 0.7831665873527527, "learning_rate": 2e-05, "loss": 0.0230201, "step": 3207 }, { "epoch": 6.416, "grad_norm": 1.2403923273086548, "learning_rate": 2e-05, "loss": 0.04816927, "step": 3208 }, { "epoch": 6.418, "grad_norm": 1.3461334705352783, "learning_rate": 2e-05, "loss": 0.03600474, "step": 3209 }, { "epoch": 6.42, "grad_norm": 1.3055542707443237, "learning_rate": 2e-05, "loss": 0.04211121, "step": 3210 }, { "epoch": 6.422, "grad_norm": 0.9492444396018982, "learning_rate": 2e-05, "loss": 0.03234901, "step": 3211 }, { "epoch": 6.424, "grad_norm": 1.0829654932022095, "learning_rate": 2e-05, "loss": 0.04390299, "step": 3212 }, { "epoch": 6.426, "grad_norm": 1.5620437860488892, "learning_rate": 2e-05, "loss": 0.04758184, "step": 3213 }, { "epoch": 6.428, "grad_norm": 0.9126538634300232, "learning_rate": 2e-05, "loss": 0.03003806, "step": 3214 }, { "epoch": 6.43, "grad_norm": 0.9068739414215088, "learning_rate": 2e-05, "loss": 0.02818158, "step": 3215 }, { "epoch": 6.432, "grad_norm": 0.8464283347129822, "learning_rate": 2e-05, "loss": 0.02605343, "step": 3216 }, { "epoch": 6.434, "grad_norm": 1.0385702848434448, "learning_rate": 2e-05, "loss": 0.03342744, "step": 3217 }, { "epoch": 6.436, "grad_norm": 1.0558537244796753, "learning_rate": 2e-05, "loss": 0.04086405, "step": 3218 }, { "epoch": 6.438, "grad_norm": 2.29348087310791, "learning_rate": 2e-05, "loss": 0.04503333, "step": 3219 }, { "epoch": 6.44, "grad_norm": 1.9502143859863281, "learning_rate": 2e-05, "loss": 0.04272745, "step": 3220 }, { "epoch": 6.442, "grad_norm": 2.232313394546509, "learning_rate": 2e-05, "loss": 0.04222386, "step": 3221 }, { "epoch": 6.444, "grad_norm": 1.836313247680664, "learning_rate": 2e-05, "loss": 0.04705041, "step": 3222 }, { "epoch": 6.446, "grad_norm": 0.7322311997413635, "learning_rate": 2e-05, "loss": 0.02296886, "step": 3223 }, { "epoch": 6.448, "grad_norm": 1.494072437286377, "learning_rate": 2e-05, "loss": 0.03639394, "step": 3224 }, { "epoch": 6.45, "grad_norm": 2.495302677154541, "learning_rate": 2e-05, "loss": 0.05203876, "step": 3225 }, { "epoch": 6.452, "grad_norm": 1.1443636417388916, "learning_rate": 2e-05, "loss": 0.03674702, "step": 3226 }, { "epoch": 6.454, "grad_norm": 2.419667959213257, "learning_rate": 2e-05, "loss": 0.04219575, "step": 3227 }, { "epoch": 6.456, "grad_norm": 1.262884259223938, "learning_rate": 2e-05, "loss": 0.04617929, "step": 3228 }, { "epoch": 6.458, "grad_norm": 2.2660434246063232, "learning_rate": 2e-05, "loss": 0.05761437, "step": 3229 }, { "epoch": 6.46, "grad_norm": 0.8994302153587341, "learning_rate": 2e-05, "loss": 0.026951, "step": 3230 }, { "epoch": 6.462, "grad_norm": 1.6061365604400635, "learning_rate": 2e-05, "loss": 0.05332478, "step": 3231 }, { "epoch": 6.464, "grad_norm": 0.9589073061943054, "learning_rate": 2e-05, "loss": 0.03688629, "step": 3232 }, { "epoch": 6.466, "grad_norm": 1.4944084882736206, "learning_rate": 2e-05, "loss": 0.05502374, "step": 3233 }, { "epoch": 6.468, "grad_norm": 1.7852489948272705, "learning_rate": 2e-05, "loss": 0.04708889, "step": 3234 }, { "epoch": 6.47, "grad_norm": 1.4473493099212646, "learning_rate": 2e-05, "loss": 0.04440481, "step": 3235 }, { "epoch": 6.4719999999999995, "grad_norm": 1.0919731855392456, "learning_rate": 2e-05, "loss": 0.03112622, "step": 3236 }, { "epoch": 6.474, "grad_norm": 2.611055612564087, "learning_rate": 2e-05, "loss": 0.0420262, "step": 3237 }, { "epoch": 6.476, "grad_norm": 1.4983372688293457, "learning_rate": 2e-05, "loss": 0.04474982, "step": 3238 }, { "epoch": 6.478, "grad_norm": 1.2199146747589111, "learning_rate": 2e-05, "loss": 0.03457824, "step": 3239 }, { "epoch": 6.48, "grad_norm": 2.1361682415008545, "learning_rate": 2e-05, "loss": 0.04375783, "step": 3240 }, { "epoch": 6.482, "grad_norm": 2.967723846435547, "learning_rate": 2e-05, "loss": 0.0432057, "step": 3241 }, { "epoch": 6.484, "grad_norm": 1.1857640743255615, "learning_rate": 2e-05, "loss": 0.04291175, "step": 3242 }, { "epoch": 6.486, "grad_norm": 0.9245066046714783, "learning_rate": 2e-05, "loss": 0.02915109, "step": 3243 }, { "epoch": 6.4879999999999995, "grad_norm": 1.0996273756027222, "learning_rate": 2e-05, "loss": 0.03724172, "step": 3244 }, { "epoch": 6.49, "grad_norm": 1.2449524402618408, "learning_rate": 2e-05, "loss": 0.04266273, "step": 3245 }, { "epoch": 6.492, "grad_norm": 0.8912386894226074, "learning_rate": 2e-05, "loss": 0.02502548, "step": 3246 }, { "epoch": 6.494, "grad_norm": 2.111814022064209, "learning_rate": 2e-05, "loss": 0.05880164, "step": 3247 }, { "epoch": 6.496, "grad_norm": 1.4234532117843628, "learning_rate": 2e-05, "loss": 0.03681805, "step": 3248 }, { "epoch": 6.498, "grad_norm": 1.3853726387023926, "learning_rate": 2e-05, "loss": 0.04147249, "step": 3249 }, { "epoch": 6.5, "grad_norm": 1.7019610404968262, "learning_rate": 2e-05, "loss": 0.04452681, "step": 3250 }, { "epoch": 6.502, "grad_norm": 1.9521831274032593, "learning_rate": 2e-05, "loss": 0.05269092, "step": 3251 }, { "epoch": 6.504, "grad_norm": 1.6485697031021118, "learning_rate": 2e-05, "loss": 0.0483473, "step": 3252 }, { "epoch": 6.506, "grad_norm": 2.20625901222229, "learning_rate": 2e-05, "loss": 0.04744877, "step": 3253 }, { "epoch": 6.508, "grad_norm": 1.37063729763031, "learning_rate": 2e-05, "loss": 0.03588368, "step": 3254 }, { "epoch": 6.51, "grad_norm": 0.9575537443161011, "learning_rate": 2e-05, "loss": 0.03912383, "step": 3255 }, { "epoch": 6.5120000000000005, "grad_norm": 0.8667104244232178, "learning_rate": 2e-05, "loss": 0.02957069, "step": 3256 }, { "epoch": 6.514, "grad_norm": 1.2409992218017578, "learning_rate": 2e-05, "loss": 0.03851247, "step": 3257 }, { "epoch": 6.516, "grad_norm": 2.2713325023651123, "learning_rate": 2e-05, "loss": 0.06293901, "step": 3258 }, { "epoch": 6.518, "grad_norm": 1.8614284992218018, "learning_rate": 2e-05, "loss": 0.03862715, "step": 3259 }, { "epoch": 6.52, "grad_norm": 1.1193434000015259, "learning_rate": 2e-05, "loss": 0.06010866, "step": 3260 }, { "epoch": 6.522, "grad_norm": 1.0424953699111938, "learning_rate": 2e-05, "loss": 0.03431137, "step": 3261 }, { "epoch": 6.524, "grad_norm": 1.7676591873168945, "learning_rate": 2e-05, "loss": 0.04659017, "step": 3262 }, { "epoch": 6.526, "grad_norm": 0.9104840755462646, "learning_rate": 2e-05, "loss": 0.03388787, "step": 3263 }, { "epoch": 6.5280000000000005, "grad_norm": 1.1736050844192505, "learning_rate": 2e-05, "loss": 0.05460058, "step": 3264 }, { "epoch": 6.53, "grad_norm": 1.2880010604858398, "learning_rate": 2e-05, "loss": 0.02913444, "step": 3265 }, { "epoch": 6.532, "grad_norm": 1.1868360042572021, "learning_rate": 2e-05, "loss": 0.03757363, "step": 3266 }, { "epoch": 6.534, "grad_norm": 1.4566246271133423, "learning_rate": 2e-05, "loss": 0.05385797, "step": 3267 }, { "epoch": 6.536, "grad_norm": 1.4764174222946167, "learning_rate": 2e-05, "loss": 0.05808482, "step": 3268 }, { "epoch": 6.538, "grad_norm": 1.0427627563476562, "learning_rate": 2e-05, "loss": 0.04558924, "step": 3269 }, { "epoch": 6.54, "grad_norm": 1.1908636093139648, "learning_rate": 2e-05, "loss": 0.04079745, "step": 3270 }, { "epoch": 6.542, "grad_norm": 1.8669737577438354, "learning_rate": 2e-05, "loss": 0.04906952, "step": 3271 }, { "epoch": 6.5440000000000005, "grad_norm": 0.8782942891120911, "learning_rate": 2e-05, "loss": 0.03116231, "step": 3272 }, { "epoch": 6.546, "grad_norm": 1.2570682764053345, "learning_rate": 2e-05, "loss": 0.03947549, "step": 3273 }, { "epoch": 6.548, "grad_norm": 1.314305067062378, "learning_rate": 2e-05, "loss": 0.05259968, "step": 3274 }, { "epoch": 6.55, "grad_norm": 1.183228611946106, "learning_rate": 2e-05, "loss": 0.04200091, "step": 3275 }, { "epoch": 6.552, "grad_norm": 1.6059496402740479, "learning_rate": 2e-05, "loss": 0.03941065, "step": 3276 }, { "epoch": 6.554, "grad_norm": 0.9690706133842468, "learning_rate": 2e-05, "loss": 0.02802291, "step": 3277 }, { "epoch": 6.556, "grad_norm": 1.4801586866378784, "learning_rate": 2e-05, "loss": 0.03633005, "step": 3278 }, { "epoch": 6.558, "grad_norm": 1.19659423828125, "learning_rate": 2e-05, "loss": 0.03681434, "step": 3279 }, { "epoch": 6.5600000000000005, "grad_norm": 1.755506992340088, "learning_rate": 2e-05, "loss": 0.05039933, "step": 3280 }, { "epoch": 6.562, "grad_norm": 0.9715291857719421, "learning_rate": 2e-05, "loss": 0.03310108, "step": 3281 }, { "epoch": 6.564, "grad_norm": 0.8169735074043274, "learning_rate": 2e-05, "loss": 0.02150278, "step": 3282 }, { "epoch": 6.566, "grad_norm": 1.8143644332885742, "learning_rate": 2e-05, "loss": 0.03411907, "step": 3283 }, { "epoch": 6.568, "grad_norm": 0.9892904758453369, "learning_rate": 2e-05, "loss": 0.02915862, "step": 3284 }, { "epoch": 6.57, "grad_norm": 1.7217111587524414, "learning_rate": 2e-05, "loss": 0.04770703, "step": 3285 }, { "epoch": 6.572, "grad_norm": 1.0632236003875732, "learning_rate": 2e-05, "loss": 0.03468326, "step": 3286 }, { "epoch": 6.574, "grad_norm": 1.6081547737121582, "learning_rate": 2e-05, "loss": 0.03781705, "step": 3287 }, { "epoch": 6.576, "grad_norm": 0.9494056701660156, "learning_rate": 2e-05, "loss": 0.03896241, "step": 3288 }, { "epoch": 6.578, "grad_norm": 1.8109192848205566, "learning_rate": 2e-05, "loss": 0.05101265, "step": 3289 }, { "epoch": 6.58, "grad_norm": 1.596126675605774, "learning_rate": 2e-05, "loss": 0.06593664, "step": 3290 }, { "epoch": 6.582, "grad_norm": 1.3216789960861206, "learning_rate": 2e-05, "loss": 0.04046667, "step": 3291 }, { "epoch": 6.584, "grad_norm": 1.527478575706482, "learning_rate": 2e-05, "loss": 0.04944303, "step": 3292 }, { "epoch": 6.586, "grad_norm": 1.7856775522232056, "learning_rate": 2e-05, "loss": 0.04871891, "step": 3293 }, { "epoch": 6.588, "grad_norm": 1.3294587135314941, "learning_rate": 2e-05, "loss": 0.0352642, "step": 3294 }, { "epoch": 6.59, "grad_norm": 1.276484727859497, "learning_rate": 2e-05, "loss": 0.0488582, "step": 3295 }, { "epoch": 6.592, "grad_norm": 1.1529980897903442, "learning_rate": 2e-05, "loss": 0.03182267, "step": 3296 }, { "epoch": 6.594, "grad_norm": 1.2852643728256226, "learning_rate": 2e-05, "loss": 0.03118693, "step": 3297 }, { "epoch": 6.596, "grad_norm": 1.3444160223007202, "learning_rate": 2e-05, "loss": 0.03754349, "step": 3298 }, { "epoch": 6.598, "grad_norm": 1.004757046699524, "learning_rate": 2e-05, "loss": 0.03347757, "step": 3299 }, { "epoch": 6.6, "grad_norm": 1.8686344623565674, "learning_rate": 2e-05, "loss": 0.05513815, "step": 3300 }, { "epoch": 6.602, "grad_norm": 1.9110959768295288, "learning_rate": 2e-05, "loss": 0.04857612, "step": 3301 }, { "epoch": 6.604, "grad_norm": 1.0715402364730835, "learning_rate": 2e-05, "loss": 0.03564036, "step": 3302 }, { "epoch": 6.606, "grad_norm": 1.2949354648590088, "learning_rate": 2e-05, "loss": 0.04693384, "step": 3303 }, { "epoch": 6.608, "grad_norm": 1.8850829601287842, "learning_rate": 2e-05, "loss": 0.03005291, "step": 3304 }, { "epoch": 6.61, "grad_norm": 2.198153495788574, "learning_rate": 2e-05, "loss": 0.04676298, "step": 3305 }, { "epoch": 6.612, "grad_norm": 1.7620174884796143, "learning_rate": 2e-05, "loss": 0.04518133, "step": 3306 }, { "epoch": 6.614, "grad_norm": 0.9728515148162842, "learning_rate": 2e-05, "loss": 0.04012098, "step": 3307 }, { "epoch": 6.616, "grad_norm": 1.2676047086715698, "learning_rate": 2e-05, "loss": 0.04109008, "step": 3308 }, { "epoch": 6.618, "grad_norm": 1.0146527290344238, "learning_rate": 2e-05, "loss": 0.03996342, "step": 3309 }, { "epoch": 6.62, "grad_norm": 1.4322385787963867, "learning_rate": 2e-05, "loss": 0.05803744, "step": 3310 }, { "epoch": 6.622, "grad_norm": 1.6033459901809692, "learning_rate": 2e-05, "loss": 0.04865368, "step": 3311 }, { "epoch": 6.624, "grad_norm": 1.335490107536316, "learning_rate": 2e-05, "loss": 0.04578111, "step": 3312 }, { "epoch": 6.626, "grad_norm": 0.9062521457672119, "learning_rate": 2e-05, "loss": 0.03169809, "step": 3313 }, { "epoch": 6.628, "grad_norm": 1.5599009990692139, "learning_rate": 2e-05, "loss": 0.06193957, "step": 3314 }, { "epoch": 6.63, "grad_norm": 1.3662382364273071, "learning_rate": 2e-05, "loss": 0.05522493, "step": 3315 }, { "epoch": 6.632, "grad_norm": 1.005143404006958, "learning_rate": 2e-05, "loss": 0.04119807, "step": 3316 }, { "epoch": 6.634, "grad_norm": 1.0832799673080444, "learning_rate": 2e-05, "loss": 0.03823555, "step": 3317 }, { "epoch": 6.636, "grad_norm": 1.071890950202942, "learning_rate": 2e-05, "loss": 0.04504975, "step": 3318 }, { "epoch": 6.638, "grad_norm": 0.9647811651229858, "learning_rate": 2e-05, "loss": 0.03949877, "step": 3319 }, { "epoch": 6.64, "grad_norm": 1.691012978553772, "learning_rate": 2e-05, "loss": 0.03227589, "step": 3320 }, { "epoch": 6.642, "grad_norm": 1.4568291902542114, "learning_rate": 2e-05, "loss": 0.06334361, "step": 3321 }, { "epoch": 6.644, "grad_norm": 1.3656632900238037, "learning_rate": 2e-05, "loss": 0.04173194, "step": 3322 }, { "epoch": 6.646, "grad_norm": 1.0714610815048218, "learning_rate": 2e-05, "loss": 0.03870899, "step": 3323 }, { "epoch": 6.648, "grad_norm": 1.4556567668914795, "learning_rate": 2e-05, "loss": 0.05417705, "step": 3324 }, { "epoch": 6.65, "grad_norm": 2.428889036178589, "learning_rate": 2e-05, "loss": 0.03874531, "step": 3325 }, { "epoch": 6.652, "grad_norm": 1.9186711311340332, "learning_rate": 2e-05, "loss": 0.04238386, "step": 3326 }, { "epoch": 6.654, "grad_norm": 1.2089457511901855, "learning_rate": 2e-05, "loss": 0.04375707, "step": 3327 }, { "epoch": 6.656, "grad_norm": 1.551080584526062, "learning_rate": 2e-05, "loss": 0.04818584, "step": 3328 }, { "epoch": 6.658, "grad_norm": 1.6396806240081787, "learning_rate": 2e-05, "loss": 0.05009399, "step": 3329 }, { "epoch": 6.66, "grad_norm": 2.1258862018585205, "learning_rate": 2e-05, "loss": 0.04509543, "step": 3330 }, { "epoch": 6.662, "grad_norm": 1.1822887659072876, "learning_rate": 2e-05, "loss": 0.03724986, "step": 3331 }, { "epoch": 6.664, "grad_norm": 1.2143762111663818, "learning_rate": 2e-05, "loss": 0.04569676, "step": 3332 }, { "epoch": 6.666, "grad_norm": 0.8996707797050476, "learning_rate": 2e-05, "loss": 0.02685695, "step": 3333 }, { "epoch": 6.668, "grad_norm": 1.9152095317840576, "learning_rate": 2e-05, "loss": 0.05275287, "step": 3334 }, { "epoch": 6.67, "grad_norm": 1.267945647239685, "learning_rate": 2e-05, "loss": 0.05568327, "step": 3335 }, { "epoch": 6.672, "grad_norm": 1.7751346826553345, "learning_rate": 2e-05, "loss": 0.03771976, "step": 3336 }, { "epoch": 6.674, "grad_norm": 1.023088812828064, "learning_rate": 2e-05, "loss": 0.04702979, "step": 3337 }, { "epoch": 6.676, "grad_norm": 1.142436146736145, "learning_rate": 2e-05, "loss": 0.04169992, "step": 3338 }, { "epoch": 6.678, "grad_norm": 1.0713934898376465, "learning_rate": 2e-05, "loss": 0.03997438, "step": 3339 }, { "epoch": 6.68, "grad_norm": 1.2672288417816162, "learning_rate": 2e-05, "loss": 0.03754934, "step": 3340 }, { "epoch": 6.682, "grad_norm": 1.1337416172027588, "learning_rate": 2e-05, "loss": 0.04338862, "step": 3341 }, { "epoch": 6.684, "grad_norm": 1.052075982093811, "learning_rate": 2e-05, "loss": 0.03329548, "step": 3342 }, { "epoch": 6.686, "grad_norm": 2.1627840995788574, "learning_rate": 2e-05, "loss": 0.03961411, "step": 3343 }, { "epoch": 6.688, "grad_norm": 1.3108724355697632, "learning_rate": 2e-05, "loss": 0.04051596, "step": 3344 }, { "epoch": 6.6899999999999995, "grad_norm": 1.2980796098709106, "learning_rate": 2e-05, "loss": 0.03444849, "step": 3345 }, { "epoch": 6.692, "grad_norm": 1.6458882093429565, "learning_rate": 2e-05, "loss": 0.04823191, "step": 3346 }, { "epoch": 6.694, "grad_norm": 1.2547824382781982, "learning_rate": 2e-05, "loss": 0.04692468, "step": 3347 }, { "epoch": 6.696, "grad_norm": 1.3602755069732666, "learning_rate": 2e-05, "loss": 0.04524711, "step": 3348 }, { "epoch": 6.698, "grad_norm": 1.6463963985443115, "learning_rate": 2e-05, "loss": 0.04222873, "step": 3349 }, { "epoch": 6.7, "grad_norm": 1.2637560367584229, "learning_rate": 2e-05, "loss": 0.04475297, "step": 3350 }, { "epoch": 6.702, "grad_norm": 1.290636420249939, "learning_rate": 2e-05, "loss": 0.04047076, "step": 3351 }, { "epoch": 6.704, "grad_norm": 1.321968674659729, "learning_rate": 2e-05, "loss": 0.04055145, "step": 3352 }, { "epoch": 6.7059999999999995, "grad_norm": 1.4020812511444092, "learning_rate": 2e-05, "loss": 0.04540452, "step": 3353 }, { "epoch": 6.708, "grad_norm": 1.0298370122909546, "learning_rate": 2e-05, "loss": 0.03884371, "step": 3354 }, { "epoch": 6.71, "grad_norm": 1.6785837411880493, "learning_rate": 2e-05, "loss": 0.02381696, "step": 3355 }, { "epoch": 6.712, "grad_norm": 1.3670672178268433, "learning_rate": 2e-05, "loss": 0.04163823, "step": 3356 }, { "epoch": 6.714, "grad_norm": 2.576000928878784, "learning_rate": 2e-05, "loss": 0.05501535, "step": 3357 }, { "epoch": 6.716, "grad_norm": 1.557173252105713, "learning_rate": 2e-05, "loss": 0.04547663, "step": 3358 }, { "epoch": 6.718, "grad_norm": 1.4260321855545044, "learning_rate": 2e-05, "loss": 0.03467124, "step": 3359 }, { "epoch": 6.72, "grad_norm": 1.151957631111145, "learning_rate": 2e-05, "loss": 0.03484927, "step": 3360 }, { "epoch": 6.7219999999999995, "grad_norm": 1.547073483467102, "learning_rate": 2e-05, "loss": 0.04147929, "step": 3361 }, { "epoch": 6.724, "grad_norm": 1.636284351348877, "learning_rate": 2e-05, "loss": 0.05319513, "step": 3362 }, { "epoch": 6.726, "grad_norm": 1.2850128412246704, "learning_rate": 2e-05, "loss": 0.05941699, "step": 3363 }, { "epoch": 6.728, "grad_norm": 1.3793518543243408, "learning_rate": 2e-05, "loss": 0.04686755, "step": 3364 }, { "epoch": 6.73, "grad_norm": 1.1899698972702026, "learning_rate": 2e-05, "loss": 0.0411698, "step": 3365 }, { "epoch": 6.732, "grad_norm": 1.4995540380477905, "learning_rate": 2e-05, "loss": 0.0547979, "step": 3366 }, { "epoch": 6.734, "grad_norm": 1.761295199394226, "learning_rate": 2e-05, "loss": 0.05972136, "step": 3367 }, { "epoch": 6.736, "grad_norm": 3.986799955368042, "learning_rate": 2e-05, "loss": 0.04645439, "step": 3368 }, { "epoch": 6.7379999999999995, "grad_norm": 0.939888060092926, "learning_rate": 2e-05, "loss": 0.03369498, "step": 3369 }, { "epoch": 6.74, "grad_norm": 1.3017959594726562, "learning_rate": 2e-05, "loss": 0.03941808, "step": 3370 }, { "epoch": 6.742, "grad_norm": 1.3570096492767334, "learning_rate": 2e-05, "loss": 0.05957269, "step": 3371 }, { "epoch": 6.744, "grad_norm": 1.9899928569793701, "learning_rate": 2e-05, "loss": 0.03165451, "step": 3372 }, { "epoch": 6.746, "grad_norm": 1.3551559448242188, "learning_rate": 2e-05, "loss": 0.04913147, "step": 3373 }, { "epoch": 6.748, "grad_norm": 1.112451434135437, "learning_rate": 2e-05, "loss": 0.04206763, "step": 3374 }, { "epoch": 6.75, "grad_norm": 1.0929338932037354, "learning_rate": 2e-05, "loss": 0.04744282, "step": 3375 }, { "epoch": 6.752, "grad_norm": 1.1742464303970337, "learning_rate": 2e-05, "loss": 0.04074844, "step": 3376 }, { "epoch": 6.754, "grad_norm": 1.8257708549499512, "learning_rate": 2e-05, "loss": 0.03645295, "step": 3377 }, { "epoch": 6.756, "grad_norm": 0.9792839884757996, "learning_rate": 2e-05, "loss": 0.03295071, "step": 3378 }, { "epoch": 6.758, "grad_norm": 1.6531082391738892, "learning_rate": 2e-05, "loss": 0.06488992, "step": 3379 }, { "epoch": 6.76, "grad_norm": 0.8899463415145874, "learning_rate": 2e-05, "loss": 0.0329383, "step": 3380 }, { "epoch": 6.7620000000000005, "grad_norm": 1.3066645860671997, "learning_rate": 2e-05, "loss": 0.0431657, "step": 3381 }, { "epoch": 6.764, "grad_norm": 0.9877235293388367, "learning_rate": 2e-05, "loss": 0.03590939, "step": 3382 }, { "epoch": 6.766, "grad_norm": 0.6814942955970764, "learning_rate": 2e-05, "loss": 0.01763538, "step": 3383 }, { "epoch": 6.768, "grad_norm": 1.3760477304458618, "learning_rate": 2e-05, "loss": 0.0547229, "step": 3384 }, { "epoch": 6.77, "grad_norm": 1.2730796337127686, "learning_rate": 2e-05, "loss": 0.04224884, "step": 3385 }, { "epoch": 6.772, "grad_norm": 1.4784380197525024, "learning_rate": 2e-05, "loss": 0.04226485, "step": 3386 }, { "epoch": 6.774, "grad_norm": 1.2227555513381958, "learning_rate": 2e-05, "loss": 0.04214308, "step": 3387 }, { "epoch": 6.776, "grad_norm": 1.0215377807617188, "learning_rate": 2e-05, "loss": 0.03375834, "step": 3388 }, { "epoch": 6.7780000000000005, "grad_norm": 1.3898252248764038, "learning_rate": 2e-05, "loss": 0.03833441, "step": 3389 }, { "epoch": 6.78, "grad_norm": 1.3035609722137451, "learning_rate": 2e-05, "loss": 0.03248435, "step": 3390 }, { "epoch": 6.782, "grad_norm": 1.4700543880462646, "learning_rate": 2e-05, "loss": 0.0392487, "step": 3391 }, { "epoch": 6.784, "grad_norm": 0.9370866417884827, "learning_rate": 2e-05, "loss": 0.03084411, "step": 3392 }, { "epoch": 6.786, "grad_norm": 2.0634961128234863, "learning_rate": 2e-05, "loss": 0.03822386, "step": 3393 }, { "epoch": 6.788, "grad_norm": 1.1548117399215698, "learning_rate": 2e-05, "loss": 0.02791761, "step": 3394 }, { "epoch": 6.79, "grad_norm": 1.4738540649414062, "learning_rate": 2e-05, "loss": 0.0408001, "step": 3395 }, { "epoch": 6.792, "grad_norm": 1.1283282041549683, "learning_rate": 2e-05, "loss": 0.03170909, "step": 3396 }, { "epoch": 6.7940000000000005, "grad_norm": 2.8261640071868896, "learning_rate": 2e-05, "loss": 0.05293885, "step": 3397 }, { "epoch": 6.796, "grad_norm": 1.261568546295166, "learning_rate": 2e-05, "loss": 0.03546068, "step": 3398 }, { "epoch": 6.798, "grad_norm": 1.9862771034240723, "learning_rate": 2e-05, "loss": 0.04700635, "step": 3399 }, { "epoch": 6.8, "grad_norm": 1.1229135990142822, "learning_rate": 2e-05, "loss": 0.03663046, "step": 3400 }, { "epoch": 6.802, "grad_norm": 2.375462055206299, "learning_rate": 2e-05, "loss": 0.0470563, "step": 3401 }, { "epoch": 6.804, "grad_norm": 2.697751045227051, "learning_rate": 2e-05, "loss": 0.03948415, "step": 3402 }, { "epoch": 6.806, "grad_norm": 0.9717214107513428, "learning_rate": 2e-05, "loss": 0.03656136, "step": 3403 }, { "epoch": 6.808, "grad_norm": 2.782092332839966, "learning_rate": 2e-05, "loss": 0.03715229, "step": 3404 }, { "epoch": 6.8100000000000005, "grad_norm": 1.2326487302780151, "learning_rate": 2e-05, "loss": 0.02518865, "step": 3405 }, { "epoch": 6.812, "grad_norm": 1.664267897605896, "learning_rate": 2e-05, "loss": 0.03265184, "step": 3406 }, { "epoch": 6.814, "grad_norm": 1.6320873498916626, "learning_rate": 2e-05, "loss": 0.0380751, "step": 3407 }, { "epoch": 6.816, "grad_norm": 0.9816185832023621, "learning_rate": 2e-05, "loss": 0.02956392, "step": 3408 }, { "epoch": 6.818, "grad_norm": 0.9093066453933716, "learning_rate": 2e-05, "loss": 0.0346065, "step": 3409 }, { "epoch": 6.82, "grad_norm": 2.0303432941436768, "learning_rate": 2e-05, "loss": 0.03398874, "step": 3410 }, { "epoch": 6.822, "grad_norm": 1.2814768552780151, "learning_rate": 2e-05, "loss": 0.03963382, "step": 3411 }, { "epoch": 6.824, "grad_norm": 1.2515296936035156, "learning_rate": 2e-05, "loss": 0.03346917, "step": 3412 }, { "epoch": 6.826, "grad_norm": 1.2658002376556396, "learning_rate": 2e-05, "loss": 0.04067706, "step": 3413 }, { "epoch": 6.828, "grad_norm": 1.9908965826034546, "learning_rate": 2e-05, "loss": 0.04793311, "step": 3414 }, { "epoch": 6.83, "grad_norm": 1.6327311992645264, "learning_rate": 2e-05, "loss": 0.04586256, "step": 3415 }, { "epoch": 6.832, "grad_norm": 2.102358341217041, "learning_rate": 2e-05, "loss": 0.04829361, "step": 3416 }, { "epoch": 6.834, "grad_norm": 1.054677963256836, "learning_rate": 2e-05, "loss": 0.03276619, "step": 3417 }, { "epoch": 6.836, "grad_norm": 1.2387585639953613, "learning_rate": 2e-05, "loss": 0.03872739, "step": 3418 }, { "epoch": 6.838, "grad_norm": 2.787712812423706, "learning_rate": 2e-05, "loss": 0.04284476, "step": 3419 }, { "epoch": 6.84, "grad_norm": 1.8472949266433716, "learning_rate": 2e-05, "loss": 0.03279556, "step": 3420 }, { "epoch": 6.842, "grad_norm": 2.012164831161499, "learning_rate": 2e-05, "loss": 0.05931329, "step": 3421 }, { "epoch": 6.844, "grad_norm": 2.5423688888549805, "learning_rate": 2e-05, "loss": 0.02890222, "step": 3422 }, { "epoch": 6.846, "grad_norm": 1.3172720670700073, "learning_rate": 2e-05, "loss": 0.04723623, "step": 3423 }, { "epoch": 6.848, "grad_norm": 1.420525074005127, "learning_rate": 2e-05, "loss": 0.03453006, "step": 3424 }, { "epoch": 6.85, "grad_norm": 0.8677119016647339, "learning_rate": 2e-05, "loss": 0.02621027, "step": 3425 }, { "epoch": 6.852, "grad_norm": 1.720272183418274, "learning_rate": 2e-05, "loss": 0.04682282, "step": 3426 }, { "epoch": 6.854, "grad_norm": 1.8855124711990356, "learning_rate": 2e-05, "loss": 0.06847136, "step": 3427 }, { "epoch": 6.856, "grad_norm": 1.885934591293335, "learning_rate": 2e-05, "loss": 0.04604185, "step": 3428 }, { "epoch": 6.858, "grad_norm": 1.467432975769043, "learning_rate": 2e-05, "loss": 0.04723091, "step": 3429 }, { "epoch": 6.86, "grad_norm": 1.2041890621185303, "learning_rate": 2e-05, "loss": 0.03593369, "step": 3430 }, { "epoch": 6.862, "grad_norm": 1.270368218421936, "learning_rate": 2e-05, "loss": 0.04030476, "step": 3431 }, { "epoch": 6.864, "grad_norm": 1.0098954439163208, "learning_rate": 2e-05, "loss": 0.0294608, "step": 3432 }, { "epoch": 6.866, "grad_norm": 0.9179061055183411, "learning_rate": 2e-05, "loss": 0.03166584, "step": 3433 }, { "epoch": 6.868, "grad_norm": 0.8878882527351379, "learning_rate": 2e-05, "loss": 0.03237126, "step": 3434 }, { "epoch": 6.87, "grad_norm": 1.2229421138763428, "learning_rate": 2e-05, "loss": 0.03407587, "step": 3435 }, { "epoch": 6.872, "grad_norm": 1.583560585975647, "learning_rate": 2e-05, "loss": 0.0503473, "step": 3436 }, { "epoch": 6.874, "grad_norm": 1.0500982999801636, "learning_rate": 2e-05, "loss": 0.04004457, "step": 3437 }, { "epoch": 6.876, "grad_norm": 1.361086368560791, "learning_rate": 2e-05, "loss": 0.03766085, "step": 3438 }, { "epoch": 6.878, "grad_norm": 1.9526699781417847, "learning_rate": 2e-05, "loss": 0.0668399, "step": 3439 }, { "epoch": 6.88, "grad_norm": 1.892948865890503, "learning_rate": 2e-05, "loss": 0.04500614, "step": 3440 }, { "epoch": 6.882, "grad_norm": 1.138198733329773, "learning_rate": 2e-05, "loss": 0.03649968, "step": 3441 }, { "epoch": 6.884, "grad_norm": 1.4107952117919922, "learning_rate": 2e-05, "loss": 0.03892181, "step": 3442 }, { "epoch": 6.886, "grad_norm": 1.7920465469360352, "learning_rate": 2e-05, "loss": 0.04723229, "step": 3443 }, { "epoch": 6.888, "grad_norm": 0.950280487537384, "learning_rate": 2e-05, "loss": 0.02975946, "step": 3444 }, { "epoch": 6.89, "grad_norm": 1.0443146228790283, "learning_rate": 2e-05, "loss": 0.03059829, "step": 3445 }, { "epoch": 6.892, "grad_norm": 1.5793449878692627, "learning_rate": 2e-05, "loss": 0.06125038, "step": 3446 }, { "epoch": 6.894, "grad_norm": 1.0104767084121704, "learning_rate": 2e-05, "loss": 0.03262583, "step": 3447 }, { "epoch": 6.896, "grad_norm": 1.0158295631408691, "learning_rate": 2e-05, "loss": 0.03388122, "step": 3448 }, { "epoch": 6.898, "grad_norm": 1.0225321054458618, "learning_rate": 2e-05, "loss": 0.03525139, "step": 3449 }, { "epoch": 6.9, "grad_norm": 1.1488553285598755, "learning_rate": 2e-05, "loss": 0.03914342, "step": 3450 }, { "epoch": 6.902, "grad_norm": 2.1833736896514893, "learning_rate": 2e-05, "loss": 0.04441334, "step": 3451 }, { "epoch": 6.904, "grad_norm": 1.1425408124923706, "learning_rate": 2e-05, "loss": 0.04421782, "step": 3452 }, { "epoch": 6.906, "grad_norm": 1.2551945447921753, "learning_rate": 2e-05, "loss": 0.04276825, "step": 3453 }, { "epoch": 6.908, "grad_norm": 1.220436453819275, "learning_rate": 2e-05, "loss": 0.03884857, "step": 3454 }, { "epoch": 6.91, "grad_norm": 1.0726264715194702, "learning_rate": 2e-05, "loss": 0.04179862, "step": 3455 }, { "epoch": 6.912, "grad_norm": 1.4682646989822388, "learning_rate": 2e-05, "loss": 0.05259724, "step": 3456 }, { "epoch": 6.914, "grad_norm": 1.1421536207199097, "learning_rate": 2e-05, "loss": 0.02865803, "step": 3457 }, { "epoch": 6.916, "grad_norm": 0.9634175896644592, "learning_rate": 2e-05, "loss": 0.03669003, "step": 3458 }, { "epoch": 6.918, "grad_norm": 1.1276708841323853, "learning_rate": 2e-05, "loss": 0.03666895, "step": 3459 }, { "epoch": 6.92, "grad_norm": 1.2425280809402466, "learning_rate": 2e-05, "loss": 0.03734131, "step": 3460 }, { "epoch": 6.922, "grad_norm": 1.0493005514144897, "learning_rate": 2e-05, "loss": 0.03591837, "step": 3461 }, { "epoch": 6.924, "grad_norm": 1.1144272089004517, "learning_rate": 2e-05, "loss": 0.04854745, "step": 3462 }, { "epoch": 6.926, "grad_norm": 1.4433996677398682, "learning_rate": 2e-05, "loss": 0.07262223, "step": 3463 }, { "epoch": 6.928, "grad_norm": 1.3264694213867188, "learning_rate": 2e-05, "loss": 0.04320496, "step": 3464 }, { "epoch": 6.93, "grad_norm": 0.8981539607048035, "learning_rate": 2e-05, "loss": 0.04096449, "step": 3465 }, { "epoch": 6.932, "grad_norm": 1.478447675704956, "learning_rate": 2e-05, "loss": 0.03124829, "step": 3466 }, { "epoch": 6.934, "grad_norm": 1.6400063037872314, "learning_rate": 2e-05, "loss": 0.05259836, "step": 3467 }, { "epoch": 6.936, "grad_norm": 1.2507061958312988, "learning_rate": 2e-05, "loss": 0.03599778, "step": 3468 }, { "epoch": 6.938, "grad_norm": 1.781505823135376, "learning_rate": 2e-05, "loss": 0.05103444, "step": 3469 }, { "epoch": 6.9399999999999995, "grad_norm": 1.6475948095321655, "learning_rate": 2e-05, "loss": 0.04331469, "step": 3470 }, { "epoch": 6.942, "grad_norm": 1.1223915815353394, "learning_rate": 2e-05, "loss": 0.04753123, "step": 3471 }, { "epoch": 6.944, "grad_norm": 1.0290122032165527, "learning_rate": 2e-05, "loss": 0.02938306, "step": 3472 }, { "epoch": 6.946, "grad_norm": 1.473054051399231, "learning_rate": 2e-05, "loss": 0.03937855, "step": 3473 }, { "epoch": 6.948, "grad_norm": 1.4074567556381226, "learning_rate": 2e-05, "loss": 0.04956938, "step": 3474 }, { "epoch": 6.95, "grad_norm": 3.5536975860595703, "learning_rate": 2e-05, "loss": 0.03741268, "step": 3475 }, { "epoch": 6.952, "grad_norm": 1.2296284437179565, "learning_rate": 2e-05, "loss": 0.04181647, "step": 3476 }, { "epoch": 6.954, "grad_norm": 1.3431607484817505, "learning_rate": 2e-05, "loss": 0.05298343, "step": 3477 }, { "epoch": 6.9559999999999995, "grad_norm": 1.5881373882293701, "learning_rate": 2e-05, "loss": 0.04500671, "step": 3478 }, { "epoch": 6.958, "grad_norm": 1.0103130340576172, "learning_rate": 2e-05, "loss": 0.03693963, "step": 3479 }, { "epoch": 6.96, "grad_norm": 1.4085220098495483, "learning_rate": 2e-05, "loss": 0.04532019, "step": 3480 }, { "epoch": 6.962, "grad_norm": 1.2073783874511719, "learning_rate": 2e-05, "loss": 0.03919894, "step": 3481 }, { "epoch": 6.964, "grad_norm": 1.2066093683242798, "learning_rate": 2e-05, "loss": 0.02972195, "step": 3482 }, { "epoch": 6.966, "grad_norm": 0.8612711429595947, "learning_rate": 2e-05, "loss": 0.03065001, "step": 3483 }, { "epoch": 6.968, "grad_norm": 1.1287932395935059, "learning_rate": 2e-05, "loss": 0.03599424, "step": 3484 }, { "epoch": 6.97, "grad_norm": 0.8197962045669556, "learning_rate": 2e-05, "loss": 0.02245808, "step": 3485 }, { "epoch": 6.9719999999999995, "grad_norm": 1.3062806129455566, "learning_rate": 2e-05, "loss": 0.04791644, "step": 3486 }, { "epoch": 6.974, "grad_norm": 1.3897563219070435, "learning_rate": 2e-05, "loss": 0.05377492, "step": 3487 }, { "epoch": 6.976, "grad_norm": 1.2727558612823486, "learning_rate": 2e-05, "loss": 0.0302354, "step": 3488 }, { "epoch": 6.978, "grad_norm": 1.2816267013549805, "learning_rate": 2e-05, "loss": 0.04273042, "step": 3489 }, { "epoch": 6.98, "grad_norm": 2.28310227394104, "learning_rate": 2e-05, "loss": 0.03521216, "step": 3490 }, { "epoch": 6.982, "grad_norm": 0.990056037902832, "learning_rate": 2e-05, "loss": 0.03357947, "step": 3491 }, { "epoch": 6.984, "grad_norm": 1.2733341455459595, "learning_rate": 2e-05, "loss": 0.03327303, "step": 3492 }, { "epoch": 6.986, "grad_norm": 1.3056036233901978, "learning_rate": 2e-05, "loss": 0.03448343, "step": 3493 }, { "epoch": 6.9879999999999995, "grad_norm": 1.0707203149795532, "learning_rate": 2e-05, "loss": 0.03063057, "step": 3494 }, { "epoch": 6.99, "grad_norm": 1.3116205930709839, "learning_rate": 2e-05, "loss": 0.03945713, "step": 3495 }, { "epoch": 6.992, "grad_norm": 2.039046287536621, "learning_rate": 2e-05, "loss": 0.04921161, "step": 3496 }, { "epoch": 6.994, "grad_norm": 2.2736213207244873, "learning_rate": 2e-05, "loss": 0.06876344, "step": 3497 }, { "epoch": 6.996, "grad_norm": 1.3070014715194702, "learning_rate": 2e-05, "loss": 0.03777097, "step": 3498 }, { "epoch": 6.998, "grad_norm": 1.431994080543518, "learning_rate": 2e-05, "loss": 0.03406931, "step": 3499 }, { "epoch": 7.0, "grad_norm": 2.305985927581787, "learning_rate": 2e-05, "loss": 0.06021082, "step": 3500 }, { "epoch": 7.0, "eval_performance": { "AngleClassification_1": 0.97, "AngleClassification_2": 0.966, "AngleClassification_3": 0.8762475049900199, "Equal_1": 0.976, "Equal_2": 0.9121756487025948, "Equal_3": 0.7924151696606786, "LineComparison_1": 0.996, "LineComparison_2": 0.9940119760479041, "LineComparison_3": 0.9640718562874252, "Parallel_1": 0.9799599198396793, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.87, "Perpendicular_1": 0.978, "Perpendicular_2": 0.844, "Perpendicular_3": 0.4228456913827655, "PointLiesOnCircle_1": 0.9976619906479627, "PointLiesOnCircle_2": 0.9929333333333334, "PointLiesOnCircle_3": 0.9857333333333334, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9859719438877755, "PointLiesOnLine_3": 0.8642714570858283 }, "eval_runtime": 224.791, "eval_samples_per_second": 46.71, "eval_steps_per_second": 0.934, "step": 3500 }, { "epoch": 7.002, "grad_norm": 1.7654236555099487, "learning_rate": 2e-05, "loss": 0.0463194, "step": 3501 }, { "epoch": 7.004, "grad_norm": 1.1480334997177124, "learning_rate": 2e-05, "loss": 0.03836069, "step": 3502 }, { "epoch": 7.006, "grad_norm": 1.1734998226165771, "learning_rate": 2e-05, "loss": 0.03044814, "step": 3503 }, { "epoch": 7.008, "grad_norm": 2.132579803466797, "learning_rate": 2e-05, "loss": 0.04456195, "step": 3504 }, { "epoch": 7.01, "grad_norm": 1.367837905883789, "learning_rate": 2e-05, "loss": 0.04647694, "step": 3505 }, { "epoch": 7.012, "grad_norm": 1.209482192993164, "learning_rate": 2e-05, "loss": 0.04331946, "step": 3506 }, { "epoch": 7.014, "grad_norm": 0.9829257130622864, "learning_rate": 2e-05, "loss": 0.03328446, "step": 3507 }, { "epoch": 7.016, "grad_norm": 1.163879156112671, "learning_rate": 2e-05, "loss": 0.03752585, "step": 3508 }, { "epoch": 7.018, "grad_norm": 1.2233892679214478, "learning_rate": 2e-05, "loss": 0.03645088, "step": 3509 }, { "epoch": 7.02, "grad_norm": 1.3547741174697876, "learning_rate": 2e-05, "loss": 0.04536892, "step": 3510 }, { "epoch": 7.022, "grad_norm": 1.0542008876800537, "learning_rate": 2e-05, "loss": 0.03453778, "step": 3511 }, { "epoch": 7.024, "grad_norm": 1.0724635124206543, "learning_rate": 2e-05, "loss": 0.03198606, "step": 3512 }, { "epoch": 7.026, "grad_norm": 1.1272120475769043, "learning_rate": 2e-05, "loss": 0.03495032, "step": 3513 }, { "epoch": 7.028, "grad_norm": 1.5987498760223389, "learning_rate": 2e-05, "loss": 0.04844986, "step": 3514 }, { "epoch": 7.03, "grad_norm": 2.4234402179718018, "learning_rate": 2e-05, "loss": 0.04497677, "step": 3515 }, { "epoch": 7.032, "grad_norm": 1.3928197622299194, "learning_rate": 2e-05, "loss": 0.05575183, "step": 3516 }, { "epoch": 7.034, "grad_norm": 0.8797189593315125, "learning_rate": 2e-05, "loss": 0.03210068, "step": 3517 }, { "epoch": 7.036, "grad_norm": 1.299830675125122, "learning_rate": 2e-05, "loss": 0.04609405, "step": 3518 }, { "epoch": 7.038, "grad_norm": 1.2263742685317993, "learning_rate": 2e-05, "loss": 0.04556087, "step": 3519 }, { "epoch": 7.04, "grad_norm": 1.6108256578445435, "learning_rate": 2e-05, "loss": 0.05731671, "step": 3520 }, { "epoch": 7.042, "grad_norm": 1.2161571979522705, "learning_rate": 2e-05, "loss": 0.04118603, "step": 3521 }, { "epoch": 7.044, "grad_norm": 0.7967638969421387, "learning_rate": 2e-05, "loss": 0.02026917, "step": 3522 }, { "epoch": 7.046, "grad_norm": 1.0279566049575806, "learning_rate": 2e-05, "loss": 0.03185375, "step": 3523 }, { "epoch": 7.048, "grad_norm": 1.0841623544692993, "learning_rate": 2e-05, "loss": 0.03629187, "step": 3524 }, { "epoch": 7.05, "grad_norm": 1.3334144353866577, "learning_rate": 2e-05, "loss": 0.04861633, "step": 3525 }, { "epoch": 7.052, "grad_norm": 1.4516124725341797, "learning_rate": 2e-05, "loss": 0.03941116, "step": 3526 }, { "epoch": 7.054, "grad_norm": 1.2045552730560303, "learning_rate": 2e-05, "loss": 0.03711585, "step": 3527 }, { "epoch": 7.056, "grad_norm": 1.7586414813995361, "learning_rate": 2e-05, "loss": 0.05360979, "step": 3528 }, { "epoch": 7.058, "grad_norm": 0.9527034163475037, "learning_rate": 2e-05, "loss": 0.02817396, "step": 3529 }, { "epoch": 7.06, "grad_norm": 1.6280465126037598, "learning_rate": 2e-05, "loss": 0.05200241, "step": 3530 }, { "epoch": 7.062, "grad_norm": 1.2518244981765747, "learning_rate": 2e-05, "loss": 0.0437836, "step": 3531 }, { "epoch": 7.064, "grad_norm": 1.1152139902114868, "learning_rate": 2e-05, "loss": 0.04008471, "step": 3532 }, { "epoch": 7.066, "grad_norm": 1.1436395645141602, "learning_rate": 2e-05, "loss": 0.04294354, "step": 3533 }, { "epoch": 7.068, "grad_norm": 1.6549543142318726, "learning_rate": 2e-05, "loss": 0.04446728, "step": 3534 }, { "epoch": 7.07, "grad_norm": 1.1992180347442627, "learning_rate": 2e-05, "loss": 0.04233711, "step": 3535 }, { "epoch": 7.072, "grad_norm": 1.106359601020813, "learning_rate": 2e-05, "loss": 0.03719247, "step": 3536 }, { "epoch": 7.074, "grad_norm": 1.456709861755371, "learning_rate": 2e-05, "loss": 0.05803198, "step": 3537 }, { "epoch": 7.076, "grad_norm": 0.7159732580184937, "learning_rate": 2e-05, "loss": 0.02427624, "step": 3538 }, { "epoch": 7.078, "grad_norm": 1.2955063581466675, "learning_rate": 2e-05, "loss": 0.03997677, "step": 3539 }, { "epoch": 7.08, "grad_norm": 1.2882788181304932, "learning_rate": 2e-05, "loss": 0.04622707, "step": 3540 }, { "epoch": 7.082, "grad_norm": 1.5002450942993164, "learning_rate": 2e-05, "loss": 0.03281282, "step": 3541 }, { "epoch": 7.084, "grad_norm": 1.3884634971618652, "learning_rate": 2e-05, "loss": 0.04638532, "step": 3542 }, { "epoch": 7.086, "grad_norm": 1.4702792167663574, "learning_rate": 2e-05, "loss": 0.04235964, "step": 3543 }, { "epoch": 7.088, "grad_norm": 1.404921293258667, "learning_rate": 2e-05, "loss": 0.03210161, "step": 3544 }, { "epoch": 7.09, "grad_norm": 0.998028039932251, "learning_rate": 2e-05, "loss": 0.03736594, "step": 3545 }, { "epoch": 7.092, "grad_norm": 1.098626732826233, "learning_rate": 2e-05, "loss": 0.03796948, "step": 3546 }, { "epoch": 7.094, "grad_norm": 1.1231931447982788, "learning_rate": 2e-05, "loss": 0.03816213, "step": 3547 }, { "epoch": 7.096, "grad_norm": 0.731264591217041, "learning_rate": 2e-05, "loss": 0.02085353, "step": 3548 }, { "epoch": 7.098, "grad_norm": 1.052249550819397, "learning_rate": 2e-05, "loss": 0.03557489, "step": 3549 }, { "epoch": 7.1, "grad_norm": 1.1321067810058594, "learning_rate": 2e-05, "loss": 0.03116557, "step": 3550 }, { "epoch": 7.102, "grad_norm": 1.5153459310531616, "learning_rate": 2e-05, "loss": 0.05023051, "step": 3551 }, { "epoch": 7.104, "grad_norm": 1.1604336500167847, "learning_rate": 2e-05, "loss": 0.0466033, "step": 3552 }, { "epoch": 7.106, "grad_norm": 3.6913938522338867, "learning_rate": 2e-05, "loss": 0.04958513, "step": 3553 }, { "epoch": 7.108, "grad_norm": 1.62620210647583, "learning_rate": 2e-05, "loss": 0.04998818, "step": 3554 }, { "epoch": 7.11, "grad_norm": 1.9701951742172241, "learning_rate": 2e-05, "loss": 0.0715565, "step": 3555 }, { "epoch": 7.112, "grad_norm": 0.8687713146209717, "learning_rate": 2e-05, "loss": 0.03260127, "step": 3556 }, { "epoch": 7.114, "grad_norm": 1.2364085912704468, "learning_rate": 2e-05, "loss": 0.05030511, "step": 3557 }, { "epoch": 7.116, "grad_norm": 1.3326131105422974, "learning_rate": 2e-05, "loss": 0.04023805, "step": 3558 }, { "epoch": 7.118, "grad_norm": 1.423392415046692, "learning_rate": 2e-05, "loss": 0.04237299, "step": 3559 }, { "epoch": 7.12, "grad_norm": 1.0094372034072876, "learning_rate": 2e-05, "loss": 0.03096232, "step": 3560 }, { "epoch": 7.122, "grad_norm": 0.7558750510215759, "learning_rate": 2e-05, "loss": 0.02493845, "step": 3561 }, { "epoch": 7.124, "grad_norm": 1.5370718240737915, "learning_rate": 2e-05, "loss": 0.05452661, "step": 3562 }, { "epoch": 7.126, "grad_norm": 1.1733839511871338, "learning_rate": 2e-05, "loss": 0.0398747, "step": 3563 }, { "epoch": 7.128, "grad_norm": 1.4234188795089722, "learning_rate": 2e-05, "loss": 0.03203755, "step": 3564 }, { "epoch": 7.13, "grad_norm": 1.05776846408844, "learning_rate": 2e-05, "loss": 0.04618067, "step": 3565 }, { "epoch": 7.132, "grad_norm": 0.9408735036849976, "learning_rate": 2e-05, "loss": 0.04195787, "step": 3566 }, { "epoch": 7.134, "grad_norm": 1.0735963582992554, "learning_rate": 2e-05, "loss": 0.04233218, "step": 3567 }, { "epoch": 7.136, "grad_norm": 1.15713369846344, "learning_rate": 2e-05, "loss": 0.03437551, "step": 3568 }, { "epoch": 7.138, "grad_norm": 2.767455816268921, "learning_rate": 2e-05, "loss": 0.03778763, "step": 3569 }, { "epoch": 7.14, "grad_norm": 1.3326915502548218, "learning_rate": 2e-05, "loss": 0.04540315, "step": 3570 }, { "epoch": 7.142, "grad_norm": 1.3416930437088013, "learning_rate": 2e-05, "loss": 0.03234928, "step": 3571 }, { "epoch": 7.144, "grad_norm": 1.550812005996704, "learning_rate": 2e-05, "loss": 0.07008831, "step": 3572 }, { "epoch": 7.146, "grad_norm": 1.3687742948532104, "learning_rate": 2e-05, "loss": 0.04256024, "step": 3573 }, { "epoch": 7.148, "grad_norm": 1.2043324708938599, "learning_rate": 2e-05, "loss": 0.03743562, "step": 3574 }, { "epoch": 7.15, "grad_norm": 1.1399524211883545, "learning_rate": 2e-05, "loss": 0.04111329, "step": 3575 }, { "epoch": 7.152, "grad_norm": 1.7729153633117676, "learning_rate": 2e-05, "loss": 0.04263386, "step": 3576 }, { "epoch": 7.154, "grad_norm": 1.312277913093567, "learning_rate": 2e-05, "loss": 0.0412445, "step": 3577 }, { "epoch": 7.156, "grad_norm": 1.2981204986572266, "learning_rate": 2e-05, "loss": 0.04208249, "step": 3578 }, { "epoch": 7.158, "grad_norm": 1.7900687456130981, "learning_rate": 2e-05, "loss": 0.0664155, "step": 3579 }, { "epoch": 7.16, "grad_norm": 1.750162124633789, "learning_rate": 2e-05, "loss": 0.07115132, "step": 3580 }, { "epoch": 7.162, "grad_norm": 0.9982078075408936, "learning_rate": 2e-05, "loss": 0.03646863, "step": 3581 }, { "epoch": 7.164, "grad_norm": 0.995746374130249, "learning_rate": 2e-05, "loss": 0.03448178, "step": 3582 }, { "epoch": 7.166, "grad_norm": 1.2090404033660889, "learning_rate": 2e-05, "loss": 0.04309242, "step": 3583 }, { "epoch": 7.168, "grad_norm": 1.4107164144515991, "learning_rate": 2e-05, "loss": 0.04972535, "step": 3584 }, { "epoch": 7.17, "grad_norm": 0.886931300163269, "learning_rate": 2e-05, "loss": 0.04178151, "step": 3585 }, { "epoch": 7.172, "grad_norm": 1.0958306789398193, "learning_rate": 2e-05, "loss": 0.03348741, "step": 3586 }, { "epoch": 7.174, "grad_norm": 1.255009651184082, "learning_rate": 2e-05, "loss": 0.05879929, "step": 3587 }, { "epoch": 7.176, "grad_norm": 1.7695122957229614, "learning_rate": 2e-05, "loss": 0.04559481, "step": 3588 }, { "epoch": 7.178, "grad_norm": 1.5076839923858643, "learning_rate": 2e-05, "loss": 0.02845868, "step": 3589 }, { "epoch": 7.18, "grad_norm": 1.185160517692566, "learning_rate": 2e-05, "loss": 0.03633834, "step": 3590 }, { "epoch": 7.182, "grad_norm": 1.5600054264068604, "learning_rate": 2e-05, "loss": 0.0530347, "step": 3591 }, { "epoch": 7.184, "grad_norm": 1.0053508281707764, "learning_rate": 2e-05, "loss": 0.03013034, "step": 3592 }, { "epoch": 7.186, "grad_norm": 0.98488450050354, "learning_rate": 2e-05, "loss": 0.03671765, "step": 3593 }, { "epoch": 7.188, "grad_norm": 1.0015475749969482, "learning_rate": 2e-05, "loss": 0.04332153, "step": 3594 }, { "epoch": 7.19, "grad_norm": 0.9634996652603149, "learning_rate": 2e-05, "loss": 0.03477596, "step": 3595 }, { "epoch": 7.192, "grad_norm": 1.449459433555603, "learning_rate": 2e-05, "loss": 0.04470537, "step": 3596 }, { "epoch": 7.194, "grad_norm": 1.5424998998641968, "learning_rate": 2e-05, "loss": 0.0429943, "step": 3597 }, { "epoch": 7.196, "grad_norm": 1.188292145729065, "learning_rate": 2e-05, "loss": 0.0557626, "step": 3598 }, { "epoch": 7.198, "grad_norm": 2.4084107875823975, "learning_rate": 2e-05, "loss": 0.05601403, "step": 3599 }, { "epoch": 7.2, "grad_norm": 0.9908252358436584, "learning_rate": 2e-05, "loss": 0.03369357, "step": 3600 }, { "epoch": 7.202, "grad_norm": 1.5220204591751099, "learning_rate": 2e-05, "loss": 0.02625774, "step": 3601 }, { "epoch": 7.204, "grad_norm": 0.6580475568771362, "learning_rate": 2e-05, "loss": 0.02305327, "step": 3602 }, { "epoch": 7.206, "grad_norm": 1.1257959604263306, "learning_rate": 2e-05, "loss": 0.03910206, "step": 3603 }, { "epoch": 7.208, "grad_norm": 1.540611743927002, "learning_rate": 2e-05, "loss": 0.03736247, "step": 3604 }, { "epoch": 7.21, "grad_norm": 1.1671334505081177, "learning_rate": 2e-05, "loss": 0.04040615, "step": 3605 }, { "epoch": 7.212, "grad_norm": 1.2511060237884521, "learning_rate": 2e-05, "loss": 0.04984138, "step": 3606 }, { "epoch": 7.214, "grad_norm": 1.3831896781921387, "learning_rate": 2e-05, "loss": 0.04208995, "step": 3607 }, { "epoch": 7.216, "grad_norm": 1.0656285285949707, "learning_rate": 2e-05, "loss": 0.03059685, "step": 3608 }, { "epoch": 7.218, "grad_norm": 1.7086400985717773, "learning_rate": 2e-05, "loss": 0.03772062, "step": 3609 }, { "epoch": 7.22, "grad_norm": 0.8708809018135071, "learning_rate": 2e-05, "loss": 0.02981712, "step": 3610 }, { "epoch": 7.222, "grad_norm": 1.2278753519058228, "learning_rate": 2e-05, "loss": 0.0340943, "step": 3611 }, { "epoch": 7.224, "grad_norm": 1.2294296026229858, "learning_rate": 2e-05, "loss": 0.03797633, "step": 3612 }, { "epoch": 7.226, "grad_norm": 1.307619333267212, "learning_rate": 2e-05, "loss": 0.02904113, "step": 3613 }, { "epoch": 7.228, "grad_norm": 0.9629518985748291, "learning_rate": 2e-05, "loss": 0.03015028, "step": 3614 }, { "epoch": 7.23, "grad_norm": 1.7522755861282349, "learning_rate": 2e-05, "loss": 0.05577182, "step": 3615 }, { "epoch": 7.232, "grad_norm": 1.749764323234558, "learning_rate": 2e-05, "loss": 0.07278188, "step": 3616 }, { "epoch": 7.234, "grad_norm": 0.9146685600280762, "learning_rate": 2e-05, "loss": 0.03350521, "step": 3617 }, { "epoch": 7.236, "grad_norm": 1.0423475503921509, "learning_rate": 2e-05, "loss": 0.04199705, "step": 3618 }, { "epoch": 7.2379999999999995, "grad_norm": 1.1367098093032837, "learning_rate": 2e-05, "loss": 0.02509356, "step": 3619 }, { "epoch": 7.24, "grad_norm": 2.201166868209839, "learning_rate": 2e-05, "loss": 0.04431981, "step": 3620 }, { "epoch": 7.242, "grad_norm": 0.9358102679252625, "learning_rate": 2e-05, "loss": 0.03562379, "step": 3621 }, { "epoch": 7.244, "grad_norm": 1.9329403638839722, "learning_rate": 2e-05, "loss": 0.05947933, "step": 3622 }, { "epoch": 7.246, "grad_norm": 1.1022632122039795, "learning_rate": 2e-05, "loss": 0.03769203, "step": 3623 }, { "epoch": 7.248, "grad_norm": 1.1916468143463135, "learning_rate": 2e-05, "loss": 0.0513382, "step": 3624 }, { "epoch": 7.25, "grad_norm": 1.168161153793335, "learning_rate": 2e-05, "loss": 0.0357165, "step": 3625 }, { "epoch": 7.252, "grad_norm": 1.5427764654159546, "learning_rate": 2e-05, "loss": 0.04057292, "step": 3626 }, { "epoch": 7.254, "grad_norm": 1.5742231607437134, "learning_rate": 2e-05, "loss": 0.05547933, "step": 3627 }, { "epoch": 7.256, "grad_norm": 1.3378602266311646, "learning_rate": 2e-05, "loss": 0.05053571, "step": 3628 }, { "epoch": 7.258, "grad_norm": 1.113234043121338, "learning_rate": 2e-05, "loss": 0.03095096, "step": 3629 }, { "epoch": 7.26, "grad_norm": 0.9802829027175903, "learning_rate": 2e-05, "loss": 0.03707263, "step": 3630 }, { "epoch": 7.2620000000000005, "grad_norm": 1.468116283416748, "learning_rate": 2e-05, "loss": 0.03658748, "step": 3631 }, { "epoch": 7.264, "grad_norm": 1.2311400175094604, "learning_rate": 2e-05, "loss": 0.04673175, "step": 3632 }, { "epoch": 7.266, "grad_norm": 1.1862722635269165, "learning_rate": 2e-05, "loss": 0.03474464, "step": 3633 }, { "epoch": 7.268, "grad_norm": 1.407196283340454, "learning_rate": 2e-05, "loss": 0.04753347, "step": 3634 }, { "epoch": 7.27, "grad_norm": 0.9811577796936035, "learning_rate": 2e-05, "loss": 0.03299804, "step": 3635 }, { "epoch": 7.272, "grad_norm": 0.899973452091217, "learning_rate": 2e-05, "loss": 0.03169245, "step": 3636 }, { "epoch": 7.274, "grad_norm": 1.058289885520935, "learning_rate": 2e-05, "loss": 0.0447946, "step": 3637 }, { "epoch": 7.276, "grad_norm": 0.9645974636077881, "learning_rate": 2e-05, "loss": 0.04266893, "step": 3638 }, { "epoch": 7.2780000000000005, "grad_norm": 1.165807843208313, "learning_rate": 2e-05, "loss": 0.05726609, "step": 3639 }, { "epoch": 7.28, "grad_norm": 1.3790762424468994, "learning_rate": 2e-05, "loss": 0.03359796, "step": 3640 }, { "epoch": 7.282, "grad_norm": 1.9710363149642944, "learning_rate": 2e-05, "loss": 0.05350699, "step": 3641 }, { "epoch": 7.284, "grad_norm": 1.0167227983474731, "learning_rate": 2e-05, "loss": 0.02356956, "step": 3642 }, { "epoch": 7.286, "grad_norm": 1.0017025470733643, "learning_rate": 2e-05, "loss": 0.04047828, "step": 3643 }, { "epoch": 7.288, "grad_norm": 1.360489010810852, "learning_rate": 2e-05, "loss": 0.04198756, "step": 3644 }, { "epoch": 7.29, "grad_norm": 1.9278860092163086, "learning_rate": 2e-05, "loss": 0.04838209, "step": 3645 }, { "epoch": 7.292, "grad_norm": 1.193939447402954, "learning_rate": 2e-05, "loss": 0.03845323, "step": 3646 }, { "epoch": 7.294, "grad_norm": 1.2597097158432007, "learning_rate": 2e-05, "loss": 0.03964346, "step": 3647 }, { "epoch": 7.296, "grad_norm": 0.8793847560882568, "learning_rate": 2e-05, "loss": 0.02905294, "step": 3648 }, { "epoch": 7.298, "grad_norm": 2.039497137069702, "learning_rate": 2e-05, "loss": 0.03595114, "step": 3649 }, { "epoch": 7.3, "grad_norm": 1.90884530544281, "learning_rate": 2e-05, "loss": 0.03373088, "step": 3650 }, { "epoch": 7.302, "grad_norm": 1.29863440990448, "learning_rate": 2e-05, "loss": 0.04182725, "step": 3651 }, { "epoch": 7.304, "grad_norm": 1.377968668937683, "learning_rate": 2e-05, "loss": 0.04984019, "step": 3652 }, { "epoch": 7.306, "grad_norm": 1.3460925817489624, "learning_rate": 2e-05, "loss": 0.03476295, "step": 3653 }, { "epoch": 7.308, "grad_norm": 1.1356927156448364, "learning_rate": 2e-05, "loss": 0.04496831, "step": 3654 }, { "epoch": 7.31, "grad_norm": 1.2044658660888672, "learning_rate": 2e-05, "loss": 0.03297555, "step": 3655 }, { "epoch": 7.312, "grad_norm": 1.1783875226974487, "learning_rate": 2e-05, "loss": 0.03951931, "step": 3656 }, { "epoch": 7.314, "grad_norm": 1.4998606443405151, "learning_rate": 2e-05, "loss": 0.04904345, "step": 3657 }, { "epoch": 7.316, "grad_norm": 1.069299340248108, "learning_rate": 2e-05, "loss": 0.04221024, "step": 3658 }, { "epoch": 7.318, "grad_norm": 1.0751981735229492, "learning_rate": 2e-05, "loss": 0.04810484, "step": 3659 }, { "epoch": 7.32, "grad_norm": 0.7839384078979492, "learning_rate": 2e-05, "loss": 0.02803702, "step": 3660 }, { "epoch": 7.322, "grad_norm": 1.5564318895339966, "learning_rate": 2e-05, "loss": 0.05072021, "step": 3661 }, { "epoch": 7.324, "grad_norm": 1.5220894813537598, "learning_rate": 2e-05, "loss": 0.05519965, "step": 3662 }, { "epoch": 7.326, "grad_norm": 1.0204460620880127, "learning_rate": 2e-05, "loss": 0.034462, "step": 3663 }, { "epoch": 7.328, "grad_norm": 1.13776695728302, "learning_rate": 2e-05, "loss": 0.04813009, "step": 3664 }, { "epoch": 7.33, "grad_norm": 1.80879807472229, "learning_rate": 2e-05, "loss": 0.03736478, "step": 3665 }, { "epoch": 7.332, "grad_norm": 1.3701401948928833, "learning_rate": 2e-05, "loss": 0.04804238, "step": 3666 }, { "epoch": 7.334, "grad_norm": 0.9035919904708862, "learning_rate": 2e-05, "loss": 0.03037902, "step": 3667 }, { "epoch": 7.336, "grad_norm": 1.7895723581314087, "learning_rate": 2e-05, "loss": 0.03846157, "step": 3668 }, { "epoch": 7.338, "grad_norm": 1.781222939491272, "learning_rate": 2e-05, "loss": 0.04183713, "step": 3669 }, { "epoch": 7.34, "grad_norm": 1.0978511571884155, "learning_rate": 2e-05, "loss": 0.0466851, "step": 3670 }, { "epoch": 7.342, "grad_norm": 0.9733462333679199, "learning_rate": 2e-05, "loss": 0.02589707, "step": 3671 }, { "epoch": 7.344, "grad_norm": 0.9411913156509399, "learning_rate": 2e-05, "loss": 0.03021464, "step": 3672 }, { "epoch": 7.346, "grad_norm": 2.3697471618652344, "learning_rate": 2e-05, "loss": 0.0212607, "step": 3673 }, { "epoch": 7.348, "grad_norm": 0.9489407539367676, "learning_rate": 2e-05, "loss": 0.03146159, "step": 3674 }, { "epoch": 7.35, "grad_norm": 1.205493450164795, "learning_rate": 2e-05, "loss": 0.04328721, "step": 3675 }, { "epoch": 7.352, "grad_norm": 1.1871635913848877, "learning_rate": 2e-05, "loss": 0.03284777, "step": 3676 }, { "epoch": 7.354, "grad_norm": 2.1281025409698486, "learning_rate": 2e-05, "loss": 0.04016072, "step": 3677 }, { "epoch": 7.356, "grad_norm": 1.7099283933639526, "learning_rate": 2e-05, "loss": 0.04911397, "step": 3678 }, { "epoch": 7.358, "grad_norm": 1.3739433288574219, "learning_rate": 2e-05, "loss": 0.04767404, "step": 3679 }, { "epoch": 7.36, "grad_norm": 1.3795156478881836, "learning_rate": 2e-05, "loss": 0.04077213, "step": 3680 }, { "epoch": 7.362, "grad_norm": 2.404796600341797, "learning_rate": 2e-05, "loss": 0.05505502, "step": 3681 }, { "epoch": 7.364, "grad_norm": 1.2563914060592651, "learning_rate": 2e-05, "loss": 0.04131902, "step": 3682 }, { "epoch": 7.366, "grad_norm": 1.4878116846084595, "learning_rate": 2e-05, "loss": 0.04153151, "step": 3683 }, { "epoch": 7.368, "grad_norm": 1.1153217554092407, "learning_rate": 2e-05, "loss": 0.04917615, "step": 3684 }, { "epoch": 7.37, "grad_norm": 1.1411298513412476, "learning_rate": 2e-05, "loss": 0.03250815, "step": 3685 }, { "epoch": 7.372, "grad_norm": 2.1213037967681885, "learning_rate": 2e-05, "loss": 0.04034688, "step": 3686 }, { "epoch": 7.374, "grad_norm": 1.7561700344085693, "learning_rate": 2e-05, "loss": 0.03943007, "step": 3687 }, { "epoch": 7.376, "grad_norm": 0.9968758225440979, "learning_rate": 2e-05, "loss": 0.03736941, "step": 3688 }, { "epoch": 7.378, "grad_norm": 0.9791030883789062, "learning_rate": 2e-05, "loss": 0.03951246, "step": 3689 }, { "epoch": 7.38, "grad_norm": 1.057389736175537, "learning_rate": 2e-05, "loss": 0.02968369, "step": 3690 }, { "epoch": 7.382, "grad_norm": 1.167794942855835, "learning_rate": 2e-05, "loss": 0.03916791, "step": 3691 }, { "epoch": 7.384, "grad_norm": 0.9653112292289734, "learning_rate": 2e-05, "loss": 0.04399735, "step": 3692 }, { "epoch": 7.386, "grad_norm": 0.9677613973617554, "learning_rate": 2e-05, "loss": 0.03709462, "step": 3693 }, { "epoch": 7.388, "grad_norm": 1.3866636753082275, "learning_rate": 2e-05, "loss": 0.04609895, "step": 3694 }, { "epoch": 7.39, "grad_norm": 1.3283741474151611, "learning_rate": 2e-05, "loss": 0.03613387, "step": 3695 }, { "epoch": 7.392, "grad_norm": 1.6678130626678467, "learning_rate": 2e-05, "loss": 0.04043178, "step": 3696 }, { "epoch": 7.394, "grad_norm": 0.7619472742080688, "learning_rate": 2e-05, "loss": 0.0298718, "step": 3697 }, { "epoch": 7.396, "grad_norm": 1.3646373748779297, "learning_rate": 2e-05, "loss": 0.04568679, "step": 3698 }, { "epoch": 7.398, "grad_norm": 1.1899365186691284, "learning_rate": 2e-05, "loss": 0.03193174, "step": 3699 }, { "epoch": 7.4, "grad_norm": 1.4613161087036133, "learning_rate": 2e-05, "loss": 0.0527728, "step": 3700 }, { "epoch": 7.402, "grad_norm": 1.7745336294174194, "learning_rate": 2e-05, "loss": 0.04560399, "step": 3701 }, { "epoch": 7.404, "grad_norm": 1.0894447565078735, "learning_rate": 2e-05, "loss": 0.04315583, "step": 3702 }, { "epoch": 7.406, "grad_norm": 1.2034350633621216, "learning_rate": 2e-05, "loss": 0.04093555, "step": 3703 }, { "epoch": 7.408, "grad_norm": 1.7974828481674194, "learning_rate": 2e-05, "loss": 0.04162872, "step": 3704 }, { "epoch": 7.41, "grad_norm": 1.0051512718200684, "learning_rate": 2e-05, "loss": 0.04721901, "step": 3705 }, { "epoch": 7.412, "grad_norm": 1.5743627548217773, "learning_rate": 2e-05, "loss": 0.0513151, "step": 3706 }, { "epoch": 7.414, "grad_norm": 1.272755742073059, "learning_rate": 2e-05, "loss": 0.03197399, "step": 3707 }, { "epoch": 7.416, "grad_norm": 1.102290153503418, "learning_rate": 2e-05, "loss": 0.04256657, "step": 3708 }, { "epoch": 7.418, "grad_norm": 1.4086740016937256, "learning_rate": 2e-05, "loss": 0.03907021, "step": 3709 }, { "epoch": 7.42, "grad_norm": 1.2285701036453247, "learning_rate": 2e-05, "loss": 0.03561983, "step": 3710 }, { "epoch": 7.422, "grad_norm": 1.2538796663284302, "learning_rate": 2e-05, "loss": 0.03297718, "step": 3711 }, { "epoch": 7.424, "grad_norm": 1.9031695127487183, "learning_rate": 2e-05, "loss": 0.03688716, "step": 3712 }, { "epoch": 7.426, "grad_norm": 1.2903622388839722, "learning_rate": 2e-05, "loss": 0.0448916, "step": 3713 }, { "epoch": 7.428, "grad_norm": 0.876520037651062, "learning_rate": 2e-05, "loss": 0.02553332, "step": 3714 }, { "epoch": 7.43, "grad_norm": 1.1201696395874023, "learning_rate": 2e-05, "loss": 0.03491214, "step": 3715 }, { "epoch": 7.432, "grad_norm": 1.2083535194396973, "learning_rate": 2e-05, "loss": 0.04699305, "step": 3716 }, { "epoch": 7.434, "grad_norm": 1.0845590829849243, "learning_rate": 2e-05, "loss": 0.04281496, "step": 3717 }, { "epoch": 7.436, "grad_norm": 1.1783486604690552, "learning_rate": 2e-05, "loss": 0.04126569, "step": 3718 }, { "epoch": 7.438, "grad_norm": 1.1123327016830444, "learning_rate": 2e-05, "loss": 0.04437939, "step": 3719 }, { "epoch": 7.44, "grad_norm": 1.0133553743362427, "learning_rate": 2e-05, "loss": 0.02839744, "step": 3720 }, { "epoch": 7.442, "grad_norm": 1.140036702156067, "learning_rate": 2e-05, "loss": 0.03378626, "step": 3721 }, { "epoch": 7.444, "grad_norm": 1.5404988527297974, "learning_rate": 2e-05, "loss": 0.04855625, "step": 3722 }, { "epoch": 7.446, "grad_norm": 1.3541570901870728, "learning_rate": 2e-05, "loss": 0.03609692, "step": 3723 }, { "epoch": 7.448, "grad_norm": 1.0215586423873901, "learning_rate": 2e-05, "loss": 0.03211108, "step": 3724 }, { "epoch": 7.45, "grad_norm": 1.1272186040878296, "learning_rate": 2e-05, "loss": 0.03291052, "step": 3725 }, { "epoch": 7.452, "grad_norm": 1.4234660863876343, "learning_rate": 2e-05, "loss": 0.03390593, "step": 3726 }, { "epoch": 7.454, "grad_norm": 2.013270854949951, "learning_rate": 2e-05, "loss": 0.0521251, "step": 3727 }, { "epoch": 7.456, "grad_norm": 2.130812168121338, "learning_rate": 2e-05, "loss": 0.03762313, "step": 3728 }, { "epoch": 7.458, "grad_norm": 1.8372830152511597, "learning_rate": 2e-05, "loss": 0.02940102, "step": 3729 }, { "epoch": 7.46, "grad_norm": 1.0431795120239258, "learning_rate": 2e-05, "loss": 0.03809291, "step": 3730 }, { "epoch": 7.462, "grad_norm": 2.549889087677002, "learning_rate": 2e-05, "loss": 0.04966386, "step": 3731 }, { "epoch": 7.464, "grad_norm": 1.0917586088180542, "learning_rate": 2e-05, "loss": 0.03641233, "step": 3732 }, { "epoch": 7.466, "grad_norm": 1.4873236417770386, "learning_rate": 2e-05, "loss": 0.05473822, "step": 3733 }, { "epoch": 7.468, "grad_norm": 1.1505955457687378, "learning_rate": 2e-05, "loss": 0.03198997, "step": 3734 }, { "epoch": 7.47, "grad_norm": 1.05476713180542, "learning_rate": 2e-05, "loss": 0.0523077, "step": 3735 }, { "epoch": 7.4719999999999995, "grad_norm": 0.9075616002082825, "learning_rate": 2e-05, "loss": 0.03430272, "step": 3736 }, { "epoch": 7.474, "grad_norm": 1.4882951974868774, "learning_rate": 2e-05, "loss": 0.04742388, "step": 3737 }, { "epoch": 7.476, "grad_norm": 1.3199410438537598, "learning_rate": 2e-05, "loss": 0.04811357, "step": 3738 }, { "epoch": 7.478, "grad_norm": 1.4998040199279785, "learning_rate": 2e-05, "loss": 0.05781774, "step": 3739 }, { "epoch": 7.48, "grad_norm": 1.1763900518417358, "learning_rate": 2e-05, "loss": 0.04293488, "step": 3740 }, { "epoch": 7.482, "grad_norm": 1.2047046422958374, "learning_rate": 2e-05, "loss": 0.03302346, "step": 3741 }, { "epoch": 7.484, "grad_norm": 2.634044885635376, "learning_rate": 2e-05, "loss": 0.04552647, "step": 3742 }, { "epoch": 7.486, "grad_norm": 1.2086098194122314, "learning_rate": 2e-05, "loss": 0.0345159, "step": 3743 }, { "epoch": 7.4879999999999995, "grad_norm": 0.8682928085327148, "learning_rate": 2e-05, "loss": 0.02553735, "step": 3744 }, { "epoch": 7.49, "grad_norm": 1.7869946956634521, "learning_rate": 2e-05, "loss": 0.04635962, "step": 3745 }, { "epoch": 7.492, "grad_norm": 2.081479072570801, "learning_rate": 2e-05, "loss": 0.05424014, "step": 3746 }, { "epoch": 7.494, "grad_norm": 1.328266978263855, "learning_rate": 2e-05, "loss": 0.04198888, "step": 3747 }, { "epoch": 7.496, "grad_norm": 0.9337484836578369, "learning_rate": 2e-05, "loss": 0.02734605, "step": 3748 }, { "epoch": 7.498, "grad_norm": 1.2273976802825928, "learning_rate": 2e-05, "loss": 0.0385808, "step": 3749 }, { "epoch": 7.5, "grad_norm": 1.4536179304122925, "learning_rate": 2e-05, "loss": 0.03167406, "step": 3750 }, { "epoch": 7.502, "grad_norm": 2.3166580200195312, "learning_rate": 2e-05, "loss": 0.07199101, "step": 3751 }, { "epoch": 7.504, "grad_norm": 0.7799525856971741, "learning_rate": 2e-05, "loss": 0.02388366, "step": 3752 }, { "epoch": 7.506, "grad_norm": 1.3459086418151855, "learning_rate": 2e-05, "loss": 0.04671092, "step": 3753 }, { "epoch": 7.508, "grad_norm": 1.533898949623108, "learning_rate": 2e-05, "loss": 0.04242814, "step": 3754 }, { "epoch": 7.51, "grad_norm": 0.7353763580322266, "learning_rate": 2e-05, "loss": 0.02716245, "step": 3755 }, { "epoch": 7.5120000000000005, "grad_norm": 1.5440877676010132, "learning_rate": 2e-05, "loss": 0.0432301, "step": 3756 }, { "epoch": 7.514, "grad_norm": 1.2611180543899536, "learning_rate": 2e-05, "loss": 0.04433353, "step": 3757 }, { "epoch": 7.516, "grad_norm": 1.3201874494552612, "learning_rate": 2e-05, "loss": 0.03974119, "step": 3758 }, { "epoch": 7.518, "grad_norm": 0.8395015001296997, "learning_rate": 2e-05, "loss": 0.02220454, "step": 3759 }, { "epoch": 7.52, "grad_norm": 1.2028474807739258, "learning_rate": 2e-05, "loss": 0.02910957, "step": 3760 }, { "epoch": 7.522, "grad_norm": 1.3701330423355103, "learning_rate": 2e-05, "loss": 0.04647592, "step": 3761 }, { "epoch": 7.524, "grad_norm": 0.9634074568748474, "learning_rate": 2e-05, "loss": 0.02413558, "step": 3762 }, { "epoch": 7.526, "grad_norm": 0.8971022367477417, "learning_rate": 2e-05, "loss": 0.03300086, "step": 3763 }, { "epoch": 7.5280000000000005, "grad_norm": 1.1393944025039673, "learning_rate": 2e-05, "loss": 0.02892333, "step": 3764 }, { "epoch": 7.53, "grad_norm": 1.0472612380981445, "learning_rate": 2e-05, "loss": 0.0462171, "step": 3765 }, { "epoch": 7.532, "grad_norm": 1.1346007585525513, "learning_rate": 2e-05, "loss": 0.04081227, "step": 3766 }, { "epoch": 7.534, "grad_norm": 0.8995129466056824, "learning_rate": 2e-05, "loss": 0.0335181, "step": 3767 }, { "epoch": 7.536, "grad_norm": 1.1206213235855103, "learning_rate": 2e-05, "loss": 0.04538394, "step": 3768 }, { "epoch": 7.538, "grad_norm": 0.9280003309249878, "learning_rate": 2e-05, "loss": 0.03115066, "step": 3769 }, { "epoch": 7.54, "grad_norm": 0.8659487962722778, "learning_rate": 2e-05, "loss": 0.02806683, "step": 3770 }, { "epoch": 7.542, "grad_norm": 1.2042627334594727, "learning_rate": 2e-05, "loss": 0.04584879, "step": 3771 }, { "epoch": 7.5440000000000005, "grad_norm": 1.244455337524414, "learning_rate": 2e-05, "loss": 0.04704358, "step": 3772 }, { "epoch": 7.546, "grad_norm": 1.3987654447555542, "learning_rate": 2e-05, "loss": 0.04331089, "step": 3773 }, { "epoch": 7.548, "grad_norm": 2.0381100177764893, "learning_rate": 2e-05, "loss": 0.04024776, "step": 3774 }, { "epoch": 7.55, "grad_norm": 1.6020790338516235, "learning_rate": 2e-05, "loss": 0.03940343, "step": 3775 }, { "epoch": 7.552, "grad_norm": 3.575554370880127, "learning_rate": 2e-05, "loss": 0.05969448, "step": 3776 }, { "epoch": 7.554, "grad_norm": 1.3182324171066284, "learning_rate": 2e-05, "loss": 0.04598895, "step": 3777 }, { "epoch": 7.556, "grad_norm": 1.0324612855911255, "learning_rate": 2e-05, "loss": 0.03442844, "step": 3778 }, { "epoch": 7.558, "grad_norm": 1.1073555946350098, "learning_rate": 2e-05, "loss": 0.03667542, "step": 3779 }, { "epoch": 7.5600000000000005, "grad_norm": 1.5646885633468628, "learning_rate": 2e-05, "loss": 0.0411161, "step": 3780 }, { "epoch": 7.562, "grad_norm": 1.5337741374969482, "learning_rate": 2e-05, "loss": 0.04769251, "step": 3781 }, { "epoch": 7.564, "grad_norm": 1.8881093263626099, "learning_rate": 2e-05, "loss": 0.05436054, "step": 3782 }, { "epoch": 7.566, "grad_norm": 0.9238749146461487, "learning_rate": 2e-05, "loss": 0.02898631, "step": 3783 }, { "epoch": 7.568, "grad_norm": 1.6122301816940308, "learning_rate": 2e-05, "loss": 0.04194048, "step": 3784 }, { "epoch": 7.57, "grad_norm": 1.4319827556610107, "learning_rate": 2e-05, "loss": 0.04231936, "step": 3785 }, { "epoch": 7.572, "grad_norm": 1.4001187086105347, "learning_rate": 2e-05, "loss": 0.03585372, "step": 3786 }, { "epoch": 7.574, "grad_norm": 1.1153039932250977, "learning_rate": 2e-05, "loss": 0.04051266, "step": 3787 }, { "epoch": 7.576, "grad_norm": 0.8442473411560059, "learning_rate": 2e-05, "loss": 0.03473689, "step": 3788 }, { "epoch": 7.578, "grad_norm": 1.271088719367981, "learning_rate": 2e-05, "loss": 0.02845667, "step": 3789 }, { "epoch": 7.58, "grad_norm": 0.9689573049545288, "learning_rate": 2e-05, "loss": 0.03425846, "step": 3790 }, { "epoch": 7.582, "grad_norm": 2.7118847370147705, "learning_rate": 2e-05, "loss": 0.04166003, "step": 3791 }, { "epoch": 7.584, "grad_norm": 0.8275643587112427, "learning_rate": 2e-05, "loss": 0.03083588, "step": 3792 }, { "epoch": 7.586, "grad_norm": 1.0270956754684448, "learning_rate": 2e-05, "loss": 0.03857341, "step": 3793 }, { "epoch": 7.588, "grad_norm": 1.2780073881149292, "learning_rate": 2e-05, "loss": 0.04923874, "step": 3794 }, { "epoch": 7.59, "grad_norm": 2.031608819961548, "learning_rate": 2e-05, "loss": 0.03907903, "step": 3795 }, { "epoch": 7.592, "grad_norm": 1.017505168914795, "learning_rate": 2e-05, "loss": 0.03970684, "step": 3796 }, { "epoch": 7.594, "grad_norm": 1.2672163248062134, "learning_rate": 2e-05, "loss": 0.03399092, "step": 3797 }, { "epoch": 7.596, "grad_norm": 1.2368937730789185, "learning_rate": 2e-05, "loss": 0.04289121, "step": 3798 }, { "epoch": 7.598, "grad_norm": 1.1858489513397217, "learning_rate": 2e-05, "loss": 0.03806916, "step": 3799 }, { "epoch": 7.6, "grad_norm": 1.6546827554702759, "learning_rate": 2e-05, "loss": 0.05514735, "step": 3800 }, { "epoch": 7.602, "grad_norm": 1.270649790763855, "learning_rate": 2e-05, "loss": 0.04329472, "step": 3801 }, { "epoch": 7.604, "grad_norm": 1.1243587732315063, "learning_rate": 2e-05, "loss": 0.03258464, "step": 3802 }, { "epoch": 7.606, "grad_norm": 0.945201575756073, "learning_rate": 2e-05, "loss": 0.03114973, "step": 3803 }, { "epoch": 7.608, "grad_norm": 1.2511130571365356, "learning_rate": 2e-05, "loss": 0.03684647, "step": 3804 }, { "epoch": 7.61, "grad_norm": 0.826673150062561, "learning_rate": 2e-05, "loss": 0.03303421, "step": 3805 }, { "epoch": 7.612, "grad_norm": 1.1407649517059326, "learning_rate": 2e-05, "loss": 0.0360745, "step": 3806 }, { "epoch": 7.614, "grad_norm": 1.4660755395889282, "learning_rate": 2e-05, "loss": 0.04614862, "step": 3807 }, { "epoch": 7.616, "grad_norm": 0.9007794857025146, "learning_rate": 2e-05, "loss": 0.02910686, "step": 3808 }, { "epoch": 7.618, "grad_norm": 1.5231950283050537, "learning_rate": 2e-05, "loss": 0.03653279, "step": 3809 }, { "epoch": 7.62, "grad_norm": 1.6285333633422852, "learning_rate": 2e-05, "loss": 0.05814054, "step": 3810 }, { "epoch": 7.622, "grad_norm": 1.2553471326828003, "learning_rate": 2e-05, "loss": 0.0405231, "step": 3811 }, { "epoch": 7.624, "grad_norm": 1.0823054313659668, "learning_rate": 2e-05, "loss": 0.03695492, "step": 3812 }, { "epoch": 7.626, "grad_norm": 1.1861990690231323, "learning_rate": 2e-05, "loss": 0.02691978, "step": 3813 }, { "epoch": 7.628, "grad_norm": 1.5524547100067139, "learning_rate": 2e-05, "loss": 0.03528023, "step": 3814 }, { "epoch": 7.63, "grad_norm": 1.6202709674835205, "learning_rate": 2e-05, "loss": 0.04605823, "step": 3815 }, { "epoch": 7.632, "grad_norm": 1.045693278312683, "learning_rate": 2e-05, "loss": 0.03231712, "step": 3816 }, { "epoch": 7.634, "grad_norm": 1.6261032819747925, "learning_rate": 2e-05, "loss": 0.03298339, "step": 3817 }, { "epoch": 7.636, "grad_norm": 1.228010654449463, "learning_rate": 2e-05, "loss": 0.03658097, "step": 3818 }, { "epoch": 7.638, "grad_norm": 0.8162320256233215, "learning_rate": 2e-05, "loss": 0.01852017, "step": 3819 }, { "epoch": 7.64, "grad_norm": 1.354498267173767, "learning_rate": 2e-05, "loss": 0.04582691, "step": 3820 }, { "epoch": 7.642, "grad_norm": 1.324304223060608, "learning_rate": 2e-05, "loss": 0.0527965, "step": 3821 }, { "epoch": 7.644, "grad_norm": 1.3460594415664673, "learning_rate": 2e-05, "loss": 0.04437679, "step": 3822 }, { "epoch": 7.646, "grad_norm": 1.8109791278839111, "learning_rate": 2e-05, "loss": 0.05773157, "step": 3823 }, { "epoch": 7.648, "grad_norm": 0.8195428252220154, "learning_rate": 2e-05, "loss": 0.03061136, "step": 3824 }, { "epoch": 7.65, "grad_norm": 1.9469859600067139, "learning_rate": 2e-05, "loss": 0.0574123, "step": 3825 }, { "epoch": 7.652, "grad_norm": 1.1457821130752563, "learning_rate": 2e-05, "loss": 0.0371997, "step": 3826 }, { "epoch": 7.654, "grad_norm": 1.103453516960144, "learning_rate": 2e-05, "loss": 0.0368687, "step": 3827 }, { "epoch": 7.656, "grad_norm": 1.2994762659072876, "learning_rate": 2e-05, "loss": 0.0307833, "step": 3828 }, { "epoch": 7.658, "grad_norm": 1.4787354469299316, "learning_rate": 2e-05, "loss": 0.05036329, "step": 3829 }, { "epoch": 7.66, "grad_norm": 1.737073540687561, "learning_rate": 2e-05, "loss": 0.05516548, "step": 3830 }, { "epoch": 7.662, "grad_norm": 1.2701951265335083, "learning_rate": 2e-05, "loss": 0.04208244, "step": 3831 }, { "epoch": 7.664, "grad_norm": 1.3121992349624634, "learning_rate": 2e-05, "loss": 0.03150842, "step": 3832 }, { "epoch": 7.666, "grad_norm": 1.8181569576263428, "learning_rate": 2e-05, "loss": 0.05234169, "step": 3833 }, { "epoch": 7.668, "grad_norm": 1.0843565464019775, "learning_rate": 2e-05, "loss": 0.0417839, "step": 3834 }, { "epoch": 7.67, "grad_norm": 1.234837293624878, "learning_rate": 2e-05, "loss": 0.0390937, "step": 3835 }, { "epoch": 7.672, "grad_norm": 1.1480792760849, "learning_rate": 2e-05, "loss": 0.04374005, "step": 3836 }, { "epoch": 7.674, "grad_norm": 1.059192419052124, "learning_rate": 2e-05, "loss": 0.04086533, "step": 3837 }, { "epoch": 7.676, "grad_norm": 1.7075737714767456, "learning_rate": 2e-05, "loss": 0.03282085, "step": 3838 }, { "epoch": 7.678, "grad_norm": 1.618431568145752, "learning_rate": 2e-05, "loss": 0.04020979, "step": 3839 }, { "epoch": 7.68, "grad_norm": 1.8895968198776245, "learning_rate": 2e-05, "loss": 0.04384016, "step": 3840 }, { "epoch": 7.682, "grad_norm": 1.3534232378005981, "learning_rate": 2e-05, "loss": 0.03620755, "step": 3841 }, { "epoch": 7.684, "grad_norm": 1.356849193572998, "learning_rate": 2e-05, "loss": 0.04807463, "step": 3842 }, { "epoch": 7.686, "grad_norm": 1.5438176393508911, "learning_rate": 2e-05, "loss": 0.05163705, "step": 3843 }, { "epoch": 7.688, "grad_norm": 1.372164249420166, "learning_rate": 2e-05, "loss": 0.03780467, "step": 3844 }, { "epoch": 7.6899999999999995, "grad_norm": 1.1597192287445068, "learning_rate": 2e-05, "loss": 0.05566886, "step": 3845 }, { "epoch": 7.692, "grad_norm": 1.3201731443405151, "learning_rate": 2e-05, "loss": 0.03277762, "step": 3846 }, { "epoch": 7.694, "grad_norm": 1.905457615852356, "learning_rate": 2e-05, "loss": 0.0537452, "step": 3847 }, { "epoch": 7.696, "grad_norm": 0.8842202425003052, "learning_rate": 2e-05, "loss": 0.03273363, "step": 3848 }, { "epoch": 7.698, "grad_norm": 1.1162315607070923, "learning_rate": 2e-05, "loss": 0.04938876, "step": 3849 }, { "epoch": 7.7, "grad_norm": 1.254486322402954, "learning_rate": 2e-05, "loss": 0.04622002, "step": 3850 }, { "epoch": 7.702, "grad_norm": 1.5367838144302368, "learning_rate": 2e-05, "loss": 0.04362921, "step": 3851 }, { "epoch": 7.704, "grad_norm": 0.9276086091995239, "learning_rate": 2e-05, "loss": 0.02649395, "step": 3852 }, { "epoch": 7.7059999999999995, "grad_norm": 0.979318380355835, "learning_rate": 2e-05, "loss": 0.03367149, "step": 3853 }, { "epoch": 7.708, "grad_norm": 1.7237340211868286, "learning_rate": 2e-05, "loss": 0.04611293, "step": 3854 }, { "epoch": 7.71, "grad_norm": 1.4677069187164307, "learning_rate": 2e-05, "loss": 0.05508875, "step": 3855 }, { "epoch": 7.712, "grad_norm": 0.9862273335456848, "learning_rate": 2e-05, "loss": 0.03654676, "step": 3856 }, { "epoch": 7.714, "grad_norm": 0.9318107962608337, "learning_rate": 2e-05, "loss": 0.0445799, "step": 3857 }, { "epoch": 7.716, "grad_norm": 0.9310228824615479, "learning_rate": 2e-05, "loss": 0.04785854, "step": 3858 }, { "epoch": 7.718, "grad_norm": 1.3670527935028076, "learning_rate": 2e-05, "loss": 0.04865241, "step": 3859 }, { "epoch": 7.72, "grad_norm": 1.0226151943206787, "learning_rate": 2e-05, "loss": 0.04341766, "step": 3860 }, { "epoch": 7.7219999999999995, "grad_norm": 1.1123560667037964, "learning_rate": 2e-05, "loss": 0.04054319, "step": 3861 }, { "epoch": 7.724, "grad_norm": 0.7814924716949463, "learning_rate": 2e-05, "loss": 0.02791172, "step": 3862 }, { "epoch": 7.726, "grad_norm": 1.8218283653259277, "learning_rate": 2e-05, "loss": 0.05562104, "step": 3863 }, { "epoch": 7.728, "grad_norm": 1.1887874603271484, "learning_rate": 2e-05, "loss": 0.05122073, "step": 3864 }, { "epoch": 7.73, "grad_norm": 0.9236597418785095, "learning_rate": 2e-05, "loss": 0.03433614, "step": 3865 }, { "epoch": 7.732, "grad_norm": 1.039346694946289, "learning_rate": 2e-05, "loss": 0.0390777, "step": 3866 }, { "epoch": 7.734, "grad_norm": 1.0751581192016602, "learning_rate": 2e-05, "loss": 0.03655767, "step": 3867 }, { "epoch": 7.736, "grad_norm": 0.9537973999977112, "learning_rate": 2e-05, "loss": 0.03307111, "step": 3868 }, { "epoch": 7.7379999999999995, "grad_norm": 1.3233144283294678, "learning_rate": 2e-05, "loss": 0.03881076, "step": 3869 }, { "epoch": 7.74, "grad_norm": 1.856916069984436, "learning_rate": 2e-05, "loss": 0.04945555, "step": 3870 }, { "epoch": 7.742, "grad_norm": 1.624302625656128, "learning_rate": 2e-05, "loss": 0.02870607, "step": 3871 }, { "epoch": 7.744, "grad_norm": 1.2497841119766235, "learning_rate": 2e-05, "loss": 0.03191271, "step": 3872 }, { "epoch": 7.746, "grad_norm": 1.5549039840698242, "learning_rate": 2e-05, "loss": 0.05997951, "step": 3873 }, { "epoch": 7.748, "grad_norm": 1.6951433420181274, "learning_rate": 2e-05, "loss": 0.03407031, "step": 3874 }, { "epoch": 7.75, "grad_norm": 1.858009934425354, "learning_rate": 2e-05, "loss": 0.04417756, "step": 3875 }, { "epoch": 7.752, "grad_norm": 1.0219511985778809, "learning_rate": 2e-05, "loss": 0.03360288, "step": 3876 }, { "epoch": 7.754, "grad_norm": 1.0470843315124512, "learning_rate": 2e-05, "loss": 0.03277028, "step": 3877 }, { "epoch": 7.756, "grad_norm": 0.9141106009483337, "learning_rate": 2e-05, "loss": 0.0280673, "step": 3878 }, { "epoch": 7.758, "grad_norm": 1.319383978843689, "learning_rate": 2e-05, "loss": 0.03298033, "step": 3879 }, { "epoch": 7.76, "grad_norm": 1.0344136953353882, "learning_rate": 2e-05, "loss": 0.0288705, "step": 3880 }, { "epoch": 7.7620000000000005, "grad_norm": 1.0950968265533447, "learning_rate": 2e-05, "loss": 0.03109436, "step": 3881 }, { "epoch": 7.764, "grad_norm": 0.9126579165458679, "learning_rate": 2e-05, "loss": 0.03889386, "step": 3882 }, { "epoch": 7.766, "grad_norm": 1.3138798475265503, "learning_rate": 2e-05, "loss": 0.0424635, "step": 3883 }, { "epoch": 7.768, "grad_norm": 1.1475924253463745, "learning_rate": 2e-05, "loss": 0.03910866, "step": 3884 }, { "epoch": 7.77, "grad_norm": 2.1356704235076904, "learning_rate": 2e-05, "loss": 0.0535647, "step": 3885 }, { "epoch": 7.772, "grad_norm": 1.0045274496078491, "learning_rate": 2e-05, "loss": 0.03423846, "step": 3886 }, { "epoch": 7.774, "grad_norm": 1.1507409811019897, "learning_rate": 2e-05, "loss": 0.04929558, "step": 3887 }, { "epoch": 7.776, "grad_norm": 1.5284314155578613, "learning_rate": 2e-05, "loss": 0.04414826, "step": 3888 }, { "epoch": 7.7780000000000005, "grad_norm": 0.8679183721542358, "learning_rate": 2e-05, "loss": 0.02339435, "step": 3889 }, { "epoch": 7.78, "grad_norm": 1.0844906568527222, "learning_rate": 2e-05, "loss": 0.04693407, "step": 3890 }, { "epoch": 7.782, "grad_norm": 0.9902662038803101, "learning_rate": 2e-05, "loss": 0.03398534, "step": 3891 }, { "epoch": 7.784, "grad_norm": 1.2675637006759644, "learning_rate": 2e-05, "loss": 0.0334352, "step": 3892 }, { "epoch": 7.786, "grad_norm": 1.0097109079360962, "learning_rate": 2e-05, "loss": 0.04098812, "step": 3893 }, { "epoch": 7.788, "grad_norm": 1.3529424667358398, "learning_rate": 2e-05, "loss": 0.0438382, "step": 3894 }, { "epoch": 7.79, "grad_norm": 1.5297242403030396, "learning_rate": 2e-05, "loss": 0.04014076, "step": 3895 }, { "epoch": 7.792, "grad_norm": 1.0129755735397339, "learning_rate": 2e-05, "loss": 0.04218669, "step": 3896 }, { "epoch": 7.7940000000000005, "grad_norm": 1.4536404609680176, "learning_rate": 2e-05, "loss": 0.04014093, "step": 3897 }, { "epoch": 7.796, "grad_norm": 0.9335437417030334, "learning_rate": 2e-05, "loss": 0.02187639, "step": 3898 }, { "epoch": 7.798, "grad_norm": 2.970377206802368, "learning_rate": 2e-05, "loss": 0.04358038, "step": 3899 }, { "epoch": 7.8, "grad_norm": 1.4541501998901367, "learning_rate": 2e-05, "loss": 0.03977223, "step": 3900 }, { "epoch": 7.802, "grad_norm": 1.3246153593063354, "learning_rate": 2e-05, "loss": 0.053044, "step": 3901 }, { "epoch": 7.804, "grad_norm": 0.95296710729599, "learning_rate": 2e-05, "loss": 0.02642838, "step": 3902 }, { "epoch": 7.806, "grad_norm": 0.9451963305473328, "learning_rate": 2e-05, "loss": 0.02925847, "step": 3903 }, { "epoch": 7.808, "grad_norm": 1.3081765174865723, "learning_rate": 2e-05, "loss": 0.03797865, "step": 3904 }, { "epoch": 7.8100000000000005, "grad_norm": 1.706273078918457, "learning_rate": 2e-05, "loss": 0.03233082, "step": 3905 }, { "epoch": 7.812, "grad_norm": 0.9737005233764648, "learning_rate": 2e-05, "loss": 0.0305077, "step": 3906 }, { "epoch": 7.814, "grad_norm": 1.010351538658142, "learning_rate": 2e-05, "loss": 0.03701336, "step": 3907 }, { "epoch": 7.816, "grad_norm": 0.9679312705993652, "learning_rate": 2e-05, "loss": 0.03479642, "step": 3908 }, { "epoch": 7.818, "grad_norm": 0.9951714277267456, "learning_rate": 2e-05, "loss": 0.02633725, "step": 3909 }, { "epoch": 7.82, "grad_norm": 0.7395930886268616, "learning_rate": 2e-05, "loss": 0.02599102, "step": 3910 }, { "epoch": 7.822, "grad_norm": 1.1766316890716553, "learning_rate": 2e-05, "loss": 0.04053992, "step": 3911 }, { "epoch": 7.824, "grad_norm": 0.7809333801269531, "learning_rate": 2e-05, "loss": 0.02733547, "step": 3912 }, { "epoch": 7.826, "grad_norm": 1.6496003866195679, "learning_rate": 2e-05, "loss": 0.03772386, "step": 3913 }, { "epoch": 7.828, "grad_norm": 0.8800923824310303, "learning_rate": 2e-05, "loss": 0.02738228, "step": 3914 }, { "epoch": 7.83, "grad_norm": 1.893088698387146, "learning_rate": 2e-05, "loss": 0.06726804, "step": 3915 }, { "epoch": 7.832, "grad_norm": 1.6585403680801392, "learning_rate": 2e-05, "loss": 0.04652392, "step": 3916 }, { "epoch": 7.834, "grad_norm": 0.9095177054405212, "learning_rate": 2e-05, "loss": 0.03011854, "step": 3917 }, { "epoch": 7.836, "grad_norm": 0.9463668465614319, "learning_rate": 2e-05, "loss": 0.02677562, "step": 3918 }, { "epoch": 7.838, "grad_norm": 1.6362755298614502, "learning_rate": 2e-05, "loss": 0.04790834, "step": 3919 }, { "epoch": 7.84, "grad_norm": 1.5378211736679077, "learning_rate": 2e-05, "loss": 0.04531316, "step": 3920 }, { "epoch": 7.842, "grad_norm": 1.5984619855880737, "learning_rate": 2e-05, "loss": 0.03611154, "step": 3921 }, { "epoch": 7.844, "grad_norm": 1.1584720611572266, "learning_rate": 2e-05, "loss": 0.03605816, "step": 3922 }, { "epoch": 7.846, "grad_norm": 1.3745046854019165, "learning_rate": 2e-05, "loss": 0.05591146, "step": 3923 }, { "epoch": 7.848, "grad_norm": 1.197592854499817, "learning_rate": 2e-05, "loss": 0.03576121, "step": 3924 }, { "epoch": 7.85, "grad_norm": 0.728506863117218, "learning_rate": 2e-05, "loss": 0.02342383, "step": 3925 }, { "epoch": 7.852, "grad_norm": 1.1891478300094604, "learning_rate": 2e-05, "loss": 0.03850963, "step": 3926 }, { "epoch": 7.854, "grad_norm": 0.9041287302970886, "learning_rate": 2e-05, "loss": 0.0280861, "step": 3927 }, { "epoch": 7.856, "grad_norm": 0.8925413489341736, "learning_rate": 2e-05, "loss": 0.03245504, "step": 3928 }, { "epoch": 7.858, "grad_norm": 1.4786744117736816, "learning_rate": 2e-05, "loss": 0.05717735, "step": 3929 }, { "epoch": 7.86, "grad_norm": 1.1402781009674072, "learning_rate": 2e-05, "loss": 0.0326164, "step": 3930 }, { "epoch": 7.862, "grad_norm": 1.5494673252105713, "learning_rate": 2e-05, "loss": 0.03744037, "step": 3931 }, { "epoch": 7.864, "grad_norm": 1.35979425907135, "learning_rate": 2e-05, "loss": 0.04251961, "step": 3932 }, { "epoch": 7.866, "grad_norm": 1.2372130155563354, "learning_rate": 2e-05, "loss": 0.03939464, "step": 3933 }, { "epoch": 7.868, "grad_norm": 1.3939666748046875, "learning_rate": 2e-05, "loss": 0.04251079, "step": 3934 }, { "epoch": 7.87, "grad_norm": 1.2696725130081177, "learning_rate": 2e-05, "loss": 0.03698645, "step": 3935 }, { "epoch": 7.872, "grad_norm": 0.9012156128883362, "learning_rate": 2e-05, "loss": 0.02759615, "step": 3936 }, { "epoch": 7.874, "grad_norm": 1.097001075744629, "learning_rate": 2e-05, "loss": 0.03927659, "step": 3937 }, { "epoch": 7.876, "grad_norm": 1.1862810850143433, "learning_rate": 2e-05, "loss": 0.04554678, "step": 3938 }, { "epoch": 7.878, "grad_norm": 1.4030312299728394, "learning_rate": 2e-05, "loss": 0.03782191, "step": 3939 }, { "epoch": 7.88, "grad_norm": 1.5502287149429321, "learning_rate": 2e-05, "loss": 0.04240962, "step": 3940 }, { "epoch": 7.882, "grad_norm": 0.9080986976623535, "learning_rate": 2e-05, "loss": 0.02589066, "step": 3941 }, { "epoch": 7.884, "grad_norm": 1.7748860120773315, "learning_rate": 2e-05, "loss": 0.05778245, "step": 3942 }, { "epoch": 7.886, "grad_norm": 0.9814969301223755, "learning_rate": 2e-05, "loss": 0.03408955, "step": 3943 }, { "epoch": 7.888, "grad_norm": 1.237975835800171, "learning_rate": 2e-05, "loss": 0.0312886, "step": 3944 }, { "epoch": 7.89, "grad_norm": 1.0338196754455566, "learning_rate": 2e-05, "loss": 0.02565856, "step": 3945 }, { "epoch": 7.892, "grad_norm": 1.5750430822372437, "learning_rate": 2e-05, "loss": 0.05202571, "step": 3946 }, { "epoch": 7.894, "grad_norm": 1.1655192375183105, "learning_rate": 2e-05, "loss": 0.03870409, "step": 3947 }, { "epoch": 7.896, "grad_norm": 1.081615924835205, "learning_rate": 2e-05, "loss": 0.0323765, "step": 3948 }, { "epoch": 7.898, "grad_norm": 1.7128093242645264, "learning_rate": 2e-05, "loss": 0.03540624, "step": 3949 }, { "epoch": 7.9, "grad_norm": 1.6066638231277466, "learning_rate": 2e-05, "loss": 0.02993422, "step": 3950 }, { "epoch": 7.902, "grad_norm": 1.2403615713119507, "learning_rate": 2e-05, "loss": 0.04615326, "step": 3951 }, { "epoch": 7.904, "grad_norm": 0.9988969564437866, "learning_rate": 2e-05, "loss": 0.02401605, "step": 3952 }, { "epoch": 7.906, "grad_norm": 1.619643211364746, "learning_rate": 2e-05, "loss": 0.04283322, "step": 3953 }, { "epoch": 7.908, "grad_norm": 1.1637741327285767, "learning_rate": 2e-05, "loss": 0.03699302, "step": 3954 }, { "epoch": 7.91, "grad_norm": 2.1744768619537354, "learning_rate": 2e-05, "loss": 0.04263964, "step": 3955 }, { "epoch": 7.912, "grad_norm": 1.6165152788162231, "learning_rate": 2e-05, "loss": 0.0457342, "step": 3956 }, { "epoch": 7.914, "grad_norm": 1.2020422220230103, "learning_rate": 2e-05, "loss": 0.04034745, "step": 3957 }, { "epoch": 7.916, "grad_norm": 1.3157758712768555, "learning_rate": 2e-05, "loss": 0.04018557, "step": 3958 }, { "epoch": 7.918, "grad_norm": 1.2066090106964111, "learning_rate": 2e-05, "loss": 0.03669915, "step": 3959 }, { "epoch": 7.92, "grad_norm": 1.1166319847106934, "learning_rate": 2e-05, "loss": 0.03603387, "step": 3960 }, { "epoch": 7.922, "grad_norm": 0.8651148676872253, "learning_rate": 2e-05, "loss": 0.02808144, "step": 3961 }, { "epoch": 7.924, "grad_norm": 1.0431410074234009, "learning_rate": 2e-05, "loss": 0.03563861, "step": 3962 }, { "epoch": 7.926, "grad_norm": 1.4120630025863647, "learning_rate": 2e-05, "loss": 0.0318082, "step": 3963 }, { "epoch": 7.928, "grad_norm": 1.0506173372268677, "learning_rate": 2e-05, "loss": 0.03654406, "step": 3964 }, { "epoch": 7.93, "grad_norm": 0.9748714566230774, "learning_rate": 2e-05, "loss": 0.03358399, "step": 3965 }, { "epoch": 7.932, "grad_norm": 1.3131837844848633, "learning_rate": 2e-05, "loss": 0.03587186, "step": 3966 }, { "epoch": 7.934, "grad_norm": 1.5987757444381714, "learning_rate": 2e-05, "loss": 0.0590549, "step": 3967 }, { "epoch": 7.936, "grad_norm": 1.2557575702667236, "learning_rate": 2e-05, "loss": 0.0346074, "step": 3968 }, { "epoch": 7.938, "grad_norm": 1.3639891147613525, "learning_rate": 2e-05, "loss": 0.04513712, "step": 3969 }, { "epoch": 7.9399999999999995, "grad_norm": 0.6656497716903687, "learning_rate": 2e-05, "loss": 0.02078253, "step": 3970 }, { "epoch": 7.942, "grad_norm": 1.1551125049591064, "learning_rate": 2e-05, "loss": 0.03505882, "step": 3971 }, { "epoch": 7.944, "grad_norm": 1.7174309492111206, "learning_rate": 2e-05, "loss": 0.0455719, "step": 3972 }, { "epoch": 7.946, "grad_norm": 1.0670101642608643, "learning_rate": 2e-05, "loss": 0.03656618, "step": 3973 }, { "epoch": 7.948, "grad_norm": 1.4278584718704224, "learning_rate": 2e-05, "loss": 0.03429253, "step": 3974 }, { "epoch": 7.95, "grad_norm": 0.9087181091308594, "learning_rate": 2e-05, "loss": 0.02340866, "step": 3975 }, { "epoch": 7.952, "grad_norm": 1.058005928993225, "learning_rate": 2e-05, "loss": 0.03532021, "step": 3976 }, { "epoch": 7.954, "grad_norm": 1.096143364906311, "learning_rate": 2e-05, "loss": 0.02949866, "step": 3977 }, { "epoch": 7.9559999999999995, "grad_norm": 1.3344593048095703, "learning_rate": 2e-05, "loss": 0.04850399, "step": 3978 }, { "epoch": 7.958, "grad_norm": 1.1242055892944336, "learning_rate": 2e-05, "loss": 0.03923218, "step": 3979 }, { "epoch": 7.96, "grad_norm": 0.976310670375824, "learning_rate": 2e-05, "loss": 0.03507767, "step": 3980 }, { "epoch": 7.962, "grad_norm": 1.3097518682479858, "learning_rate": 2e-05, "loss": 0.03616764, "step": 3981 }, { "epoch": 7.964, "grad_norm": 0.8863202333450317, "learning_rate": 2e-05, "loss": 0.02762227, "step": 3982 }, { "epoch": 7.966, "grad_norm": 1.392021894454956, "learning_rate": 2e-05, "loss": 0.04287278, "step": 3983 }, { "epoch": 7.968, "grad_norm": 1.6524382829666138, "learning_rate": 2e-05, "loss": 0.04503299, "step": 3984 }, { "epoch": 7.97, "grad_norm": 1.2091068029403687, "learning_rate": 2e-05, "loss": 0.02638647, "step": 3985 }, { "epoch": 7.9719999999999995, "grad_norm": 1.1132644414901733, "learning_rate": 2e-05, "loss": 0.03839584, "step": 3986 }, { "epoch": 7.974, "grad_norm": 1.1247128248214722, "learning_rate": 2e-05, "loss": 0.03635703, "step": 3987 }, { "epoch": 7.976, "grad_norm": 0.9727789163589478, "learning_rate": 2e-05, "loss": 0.03537959, "step": 3988 }, { "epoch": 7.978, "grad_norm": 1.0374754667282104, "learning_rate": 2e-05, "loss": 0.0368279, "step": 3989 }, { "epoch": 7.98, "grad_norm": 1.889565348625183, "learning_rate": 2e-05, "loss": 0.05415813, "step": 3990 }, { "epoch": 7.982, "grad_norm": 1.1596338748931885, "learning_rate": 2e-05, "loss": 0.03456059, "step": 3991 }, { "epoch": 7.984, "grad_norm": 1.263301134109497, "learning_rate": 2e-05, "loss": 0.03879981, "step": 3992 }, { "epoch": 7.986, "grad_norm": 1.1385409832000732, "learning_rate": 2e-05, "loss": 0.03879371, "step": 3993 }, { "epoch": 7.9879999999999995, "grad_norm": 1.085569977760315, "learning_rate": 2e-05, "loss": 0.03336813, "step": 3994 }, { "epoch": 7.99, "grad_norm": 1.242136836051941, "learning_rate": 2e-05, "loss": 0.03714422, "step": 3995 }, { "epoch": 7.992, "grad_norm": 1.3992183208465576, "learning_rate": 2e-05, "loss": 0.03560712, "step": 3996 }, { "epoch": 7.994, "grad_norm": 1.3204437494277954, "learning_rate": 2e-05, "loss": 0.0404392, "step": 3997 }, { "epoch": 7.996, "grad_norm": 1.0440654754638672, "learning_rate": 2e-05, "loss": 0.04046052, "step": 3998 }, { "epoch": 7.998, "grad_norm": 0.9351603984832764, "learning_rate": 2e-05, "loss": 0.04053178, "step": 3999 }, { "epoch": 8.0, "grad_norm": 1.1394661664962769, "learning_rate": 2e-05, "loss": 0.03156887, "step": 4000 }, { "epoch": 8.0, "eval_performance": { "AngleClassification_1": 0.98, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9281437125748503, "Equal_1": 0.982, "Equal_2": 0.9101796407185628, "Equal_3": 0.8323353293413174, "LineComparison_1": 1.0, "LineComparison_2": 0.9940119760479041, "LineComparison_3": 0.9880239520958084, "Parallel_1": 0.9779559118236473, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.694, "Perpendicular_1": 0.988, "Perpendicular_2": 0.86, "Perpendicular_3": 0.48296593186372744, "PointLiesOnCircle_1": 0.9973279893119573, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9916666666666667, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9799599198396793, "PointLiesOnLine_3": 0.8582834331337326 }, "eval_runtime": 225.6955, "eval_samples_per_second": 46.523, "eval_steps_per_second": 0.93, "step": 4000 }, { "epoch": 8.002, "grad_norm": 1.4119209051132202, "learning_rate": 2e-05, "loss": 0.03219637, "step": 4001 }, { "epoch": 8.004, "grad_norm": 1.2177033424377441, "learning_rate": 2e-05, "loss": 0.04212372, "step": 4002 }, { "epoch": 8.006, "grad_norm": 1.0056850910186768, "learning_rate": 2e-05, "loss": 0.04112868, "step": 4003 }, { "epoch": 8.008, "grad_norm": 1.0876402854919434, "learning_rate": 2e-05, "loss": 0.03480848, "step": 4004 }, { "epoch": 8.01, "grad_norm": 1.0041884183883667, "learning_rate": 2e-05, "loss": 0.0385941, "step": 4005 }, { "epoch": 8.012, "grad_norm": 1.2166727781295776, "learning_rate": 2e-05, "loss": 0.04547575, "step": 4006 }, { "epoch": 8.014, "grad_norm": 2.71073317527771, "learning_rate": 2e-05, "loss": 0.04807276, "step": 4007 }, { "epoch": 8.016, "grad_norm": 1.5562191009521484, "learning_rate": 2e-05, "loss": 0.04967951, "step": 4008 }, { "epoch": 8.018, "grad_norm": 1.029298186302185, "learning_rate": 2e-05, "loss": 0.03618919, "step": 4009 }, { "epoch": 8.02, "grad_norm": 1.322064757347107, "learning_rate": 2e-05, "loss": 0.04659977, "step": 4010 }, { "epoch": 8.022, "grad_norm": 1.305221438407898, "learning_rate": 2e-05, "loss": 0.03063146, "step": 4011 }, { "epoch": 8.024, "grad_norm": 1.521439552307129, "learning_rate": 2e-05, "loss": 0.02769434, "step": 4012 }, { "epoch": 8.026, "grad_norm": 0.8861672878265381, "learning_rate": 2e-05, "loss": 0.03443494, "step": 4013 }, { "epoch": 8.028, "grad_norm": 1.7962417602539062, "learning_rate": 2e-05, "loss": 0.05254519, "step": 4014 }, { "epoch": 8.03, "grad_norm": 1.1259509325027466, "learning_rate": 2e-05, "loss": 0.03992435, "step": 4015 }, { "epoch": 8.032, "grad_norm": 1.1400800943374634, "learning_rate": 2e-05, "loss": 0.04231308, "step": 4016 }, { "epoch": 8.034, "grad_norm": 0.9604584574699402, "learning_rate": 2e-05, "loss": 0.03868266, "step": 4017 }, { "epoch": 8.036, "grad_norm": 1.3197420835494995, "learning_rate": 2e-05, "loss": 0.0575745, "step": 4018 }, { "epoch": 8.038, "grad_norm": 2.3887784481048584, "learning_rate": 2e-05, "loss": 0.03125923, "step": 4019 }, { "epoch": 8.04, "grad_norm": 0.8534474968910217, "learning_rate": 2e-05, "loss": 0.02953053, "step": 4020 }, { "epoch": 8.042, "grad_norm": 1.0247678756713867, "learning_rate": 2e-05, "loss": 0.03511064, "step": 4021 }, { "epoch": 8.044, "grad_norm": 2.3081367015838623, "learning_rate": 2e-05, "loss": 0.0526976, "step": 4022 }, { "epoch": 8.046, "grad_norm": 1.5508410930633545, "learning_rate": 2e-05, "loss": 0.04298474, "step": 4023 }, { "epoch": 8.048, "grad_norm": 2.399048328399658, "learning_rate": 2e-05, "loss": 0.05028696, "step": 4024 }, { "epoch": 8.05, "grad_norm": 1.3377678394317627, "learning_rate": 2e-05, "loss": 0.03465681, "step": 4025 }, { "epoch": 8.052, "grad_norm": 1.5176995992660522, "learning_rate": 2e-05, "loss": 0.04952892, "step": 4026 }, { "epoch": 8.054, "grad_norm": 1.4206205606460571, "learning_rate": 2e-05, "loss": 0.0465099, "step": 4027 }, { "epoch": 8.056, "grad_norm": 1.2566664218902588, "learning_rate": 2e-05, "loss": 0.04825858, "step": 4028 }, { "epoch": 8.058, "grad_norm": 1.1745660305023193, "learning_rate": 2e-05, "loss": 0.02664936, "step": 4029 }, { "epoch": 8.06, "grad_norm": 0.8195359110832214, "learning_rate": 2e-05, "loss": 0.02955087, "step": 4030 }, { "epoch": 8.062, "grad_norm": 1.212228775024414, "learning_rate": 2e-05, "loss": 0.04336803, "step": 4031 }, { "epoch": 8.064, "grad_norm": 1.1255489587783813, "learning_rate": 2e-05, "loss": 0.04810651, "step": 4032 }, { "epoch": 8.066, "grad_norm": 1.8382498025894165, "learning_rate": 2e-05, "loss": 0.05709913, "step": 4033 }, { "epoch": 8.068, "grad_norm": 1.3335249423980713, "learning_rate": 2e-05, "loss": 0.05081996, "step": 4034 }, { "epoch": 8.07, "grad_norm": 1.3270105123519897, "learning_rate": 2e-05, "loss": 0.02735442, "step": 4035 }, { "epoch": 8.072, "grad_norm": 1.1317691802978516, "learning_rate": 2e-05, "loss": 0.03835673, "step": 4036 }, { "epoch": 8.074, "grad_norm": 1.1773289442062378, "learning_rate": 2e-05, "loss": 0.04438383, "step": 4037 }, { "epoch": 8.076, "grad_norm": 1.1699061393737793, "learning_rate": 2e-05, "loss": 0.03660499, "step": 4038 }, { "epoch": 8.078, "grad_norm": 1.5686216354370117, "learning_rate": 2e-05, "loss": 0.03659562, "step": 4039 }, { "epoch": 8.08, "grad_norm": 0.9857733249664307, "learning_rate": 2e-05, "loss": 0.02955357, "step": 4040 }, { "epoch": 8.082, "grad_norm": 1.5287553071975708, "learning_rate": 2e-05, "loss": 0.04695636, "step": 4041 }, { "epoch": 8.084, "grad_norm": 1.1321133375167847, "learning_rate": 2e-05, "loss": 0.03398918, "step": 4042 }, { "epoch": 8.086, "grad_norm": 1.0832672119140625, "learning_rate": 2e-05, "loss": 0.03769496, "step": 4043 }, { "epoch": 8.088, "grad_norm": 1.110114336013794, "learning_rate": 2e-05, "loss": 0.03376863, "step": 4044 }, { "epoch": 8.09, "grad_norm": 0.9534098505973816, "learning_rate": 2e-05, "loss": 0.03374486, "step": 4045 }, { "epoch": 8.092, "grad_norm": 0.9631739854812622, "learning_rate": 2e-05, "loss": 0.03460173, "step": 4046 }, { "epoch": 8.094, "grad_norm": 1.2686132192611694, "learning_rate": 2e-05, "loss": 0.04320743, "step": 4047 }, { "epoch": 8.096, "grad_norm": 0.974510908126831, "learning_rate": 2e-05, "loss": 0.03443984, "step": 4048 }, { "epoch": 8.098, "grad_norm": 1.1675100326538086, "learning_rate": 2e-05, "loss": 0.03407607, "step": 4049 }, { "epoch": 8.1, "grad_norm": 1.340183138847351, "learning_rate": 2e-05, "loss": 0.04262073, "step": 4050 }, { "epoch": 8.102, "grad_norm": 1.032210111618042, "learning_rate": 2e-05, "loss": 0.03945736, "step": 4051 }, { "epoch": 8.104, "grad_norm": 0.9768326282501221, "learning_rate": 2e-05, "loss": 0.03582599, "step": 4052 }, { "epoch": 8.106, "grad_norm": 0.8922199606895447, "learning_rate": 2e-05, "loss": 0.03436149, "step": 4053 }, { "epoch": 8.108, "grad_norm": 1.0165106058120728, "learning_rate": 2e-05, "loss": 0.0350354, "step": 4054 }, { "epoch": 8.11, "grad_norm": 1.2602726221084595, "learning_rate": 2e-05, "loss": 0.0410298, "step": 4055 }, { "epoch": 8.112, "grad_norm": 1.1547839641571045, "learning_rate": 2e-05, "loss": 0.03948238, "step": 4056 }, { "epoch": 8.114, "grad_norm": 1.3977960348129272, "learning_rate": 2e-05, "loss": 0.03890977, "step": 4057 }, { "epoch": 8.116, "grad_norm": 0.9865804314613342, "learning_rate": 2e-05, "loss": 0.03936205, "step": 4058 }, { "epoch": 8.118, "grad_norm": 1.0338565111160278, "learning_rate": 2e-05, "loss": 0.03758217, "step": 4059 }, { "epoch": 8.12, "grad_norm": 0.9402679800987244, "learning_rate": 2e-05, "loss": 0.02572946, "step": 4060 }, { "epoch": 8.122, "grad_norm": 1.1585195064544678, "learning_rate": 2e-05, "loss": 0.02765628, "step": 4061 }, { "epoch": 8.124, "grad_norm": 1.1797926425933838, "learning_rate": 2e-05, "loss": 0.03309508, "step": 4062 }, { "epoch": 8.126, "grad_norm": 2.9512665271759033, "learning_rate": 2e-05, "loss": 0.04922073, "step": 4063 }, { "epoch": 8.128, "grad_norm": 0.9916607141494751, "learning_rate": 2e-05, "loss": 0.03255268, "step": 4064 }, { "epoch": 8.13, "grad_norm": 1.2742884159088135, "learning_rate": 2e-05, "loss": 0.03564069, "step": 4065 }, { "epoch": 8.132, "grad_norm": 0.7824550271034241, "learning_rate": 2e-05, "loss": 0.02065559, "step": 4066 }, { "epoch": 8.134, "grad_norm": 1.0986707210540771, "learning_rate": 2e-05, "loss": 0.03769509, "step": 4067 }, { "epoch": 8.136, "grad_norm": 0.7767794728279114, "learning_rate": 2e-05, "loss": 0.02436521, "step": 4068 }, { "epoch": 8.138, "grad_norm": 0.9393649697303772, "learning_rate": 2e-05, "loss": 0.02832365, "step": 4069 }, { "epoch": 8.14, "grad_norm": 1.0402870178222656, "learning_rate": 2e-05, "loss": 0.04314765, "step": 4070 }, { "epoch": 8.142, "grad_norm": 0.7851106524467468, "learning_rate": 2e-05, "loss": 0.0254925, "step": 4071 }, { "epoch": 8.144, "grad_norm": 0.872813880443573, "learning_rate": 2e-05, "loss": 0.02209342, "step": 4072 }, { "epoch": 8.146, "grad_norm": 1.0557291507720947, "learning_rate": 2e-05, "loss": 0.03590484, "step": 4073 }, { "epoch": 8.148, "grad_norm": 2.341787815093994, "learning_rate": 2e-05, "loss": 0.03920754, "step": 4074 }, { "epoch": 8.15, "grad_norm": 1.0240808725357056, "learning_rate": 2e-05, "loss": 0.0294111, "step": 4075 }, { "epoch": 8.152, "grad_norm": 1.6792032718658447, "learning_rate": 2e-05, "loss": 0.03478878, "step": 4076 }, { "epoch": 8.154, "grad_norm": 1.5784943103790283, "learning_rate": 2e-05, "loss": 0.05027056, "step": 4077 }, { "epoch": 8.156, "grad_norm": 2.692805528640747, "learning_rate": 2e-05, "loss": 0.04727435, "step": 4078 }, { "epoch": 8.158, "grad_norm": 5.189362525939941, "learning_rate": 2e-05, "loss": 0.04740378, "step": 4079 }, { "epoch": 8.16, "grad_norm": 0.8995183706283569, "learning_rate": 2e-05, "loss": 0.02884007, "step": 4080 }, { "epoch": 8.162, "grad_norm": 1.0312719345092773, "learning_rate": 2e-05, "loss": 0.03807984, "step": 4081 }, { "epoch": 8.164, "grad_norm": 1.2754626274108887, "learning_rate": 2e-05, "loss": 0.04430226, "step": 4082 }, { "epoch": 8.166, "grad_norm": 1.613770604133606, "learning_rate": 2e-05, "loss": 0.03124395, "step": 4083 }, { "epoch": 8.168, "grad_norm": 1.4395604133605957, "learning_rate": 2e-05, "loss": 0.04359567, "step": 4084 }, { "epoch": 8.17, "grad_norm": 0.8833742141723633, "learning_rate": 2e-05, "loss": 0.03563846, "step": 4085 }, { "epoch": 8.172, "grad_norm": 0.9417271018028259, "learning_rate": 2e-05, "loss": 0.03022023, "step": 4086 }, { "epoch": 8.174, "grad_norm": 1.921120524406433, "learning_rate": 2e-05, "loss": 0.05529236, "step": 4087 }, { "epoch": 8.176, "grad_norm": 1.0336357355117798, "learning_rate": 2e-05, "loss": 0.02654794, "step": 4088 }, { "epoch": 8.178, "grad_norm": 1.1937464475631714, "learning_rate": 2e-05, "loss": 0.0407705, "step": 4089 }, { "epoch": 8.18, "grad_norm": 0.9433906674385071, "learning_rate": 2e-05, "loss": 0.02974091, "step": 4090 }, { "epoch": 8.182, "grad_norm": 0.9415001273155212, "learning_rate": 2e-05, "loss": 0.03482451, "step": 4091 }, { "epoch": 8.184, "grad_norm": 1.079895257949829, "learning_rate": 2e-05, "loss": 0.03403471, "step": 4092 }, { "epoch": 8.186, "grad_norm": 0.7046507000923157, "learning_rate": 2e-05, "loss": 0.02133265, "step": 4093 }, { "epoch": 8.188, "grad_norm": 0.9372413158416748, "learning_rate": 2e-05, "loss": 0.02843397, "step": 4094 }, { "epoch": 8.19, "grad_norm": 2.013688087463379, "learning_rate": 2e-05, "loss": 0.03952524, "step": 4095 }, { "epoch": 8.192, "grad_norm": 1.4509131908416748, "learning_rate": 2e-05, "loss": 0.04074506, "step": 4096 }, { "epoch": 8.194, "grad_norm": 2.134747266769409, "learning_rate": 2e-05, "loss": 0.04865081, "step": 4097 }, { "epoch": 8.196, "grad_norm": 1.7605409622192383, "learning_rate": 2e-05, "loss": 0.04180622, "step": 4098 }, { "epoch": 8.198, "grad_norm": 1.2503048181533813, "learning_rate": 2e-05, "loss": 0.03402548, "step": 4099 }, { "epoch": 8.2, "grad_norm": 1.3167998790740967, "learning_rate": 2e-05, "loss": 0.0470066, "step": 4100 }, { "epoch": 8.202, "grad_norm": 0.9883024096488953, "learning_rate": 2e-05, "loss": 0.03507129, "step": 4101 }, { "epoch": 8.204, "grad_norm": 2.5922324657440186, "learning_rate": 2e-05, "loss": 0.03988982, "step": 4102 }, { "epoch": 8.206, "grad_norm": 0.9423674941062927, "learning_rate": 2e-05, "loss": 0.0306279, "step": 4103 }, { "epoch": 8.208, "grad_norm": 1.5130466222763062, "learning_rate": 2e-05, "loss": 0.04762103, "step": 4104 }, { "epoch": 8.21, "grad_norm": 2.259579658508301, "learning_rate": 2e-05, "loss": 0.02771044, "step": 4105 }, { "epoch": 8.212, "grad_norm": 1.7975808382034302, "learning_rate": 2e-05, "loss": 0.06337868, "step": 4106 }, { "epoch": 8.214, "grad_norm": 1.5278784036636353, "learning_rate": 2e-05, "loss": 0.04260637, "step": 4107 }, { "epoch": 8.216, "grad_norm": 1.3236819505691528, "learning_rate": 2e-05, "loss": 0.04156256, "step": 4108 }, { "epoch": 8.218, "grad_norm": 1.2842603921890259, "learning_rate": 2e-05, "loss": 0.04233573, "step": 4109 }, { "epoch": 8.22, "grad_norm": 1.0574822425842285, "learning_rate": 2e-05, "loss": 0.0349409, "step": 4110 }, { "epoch": 8.222, "grad_norm": 0.7956651449203491, "learning_rate": 2e-05, "loss": 0.02626521, "step": 4111 }, { "epoch": 8.224, "grad_norm": 1.029059886932373, "learning_rate": 2e-05, "loss": 0.03388299, "step": 4112 }, { "epoch": 8.226, "grad_norm": 1.0778995752334595, "learning_rate": 2e-05, "loss": 0.03479785, "step": 4113 }, { "epoch": 8.228, "grad_norm": 1.3440552949905396, "learning_rate": 2e-05, "loss": 0.03567082, "step": 4114 }, { "epoch": 8.23, "grad_norm": 0.7402258515357971, "learning_rate": 2e-05, "loss": 0.01955719, "step": 4115 }, { "epoch": 8.232, "grad_norm": 1.2183341979980469, "learning_rate": 2e-05, "loss": 0.02689785, "step": 4116 }, { "epoch": 8.234, "grad_norm": 1.0224542617797852, "learning_rate": 2e-05, "loss": 0.05021556, "step": 4117 }, { "epoch": 8.236, "grad_norm": 1.136129379272461, "learning_rate": 2e-05, "loss": 0.04270374, "step": 4118 }, { "epoch": 8.238, "grad_norm": 0.9221569895744324, "learning_rate": 2e-05, "loss": 0.03427961, "step": 4119 }, { "epoch": 8.24, "grad_norm": 1.804606318473816, "learning_rate": 2e-05, "loss": 0.04040957, "step": 4120 }, { "epoch": 8.242, "grad_norm": 1.0163843631744385, "learning_rate": 2e-05, "loss": 0.03375972, "step": 4121 }, { "epoch": 8.244, "grad_norm": 1.2835637331008911, "learning_rate": 2e-05, "loss": 0.04067679, "step": 4122 }, { "epoch": 8.246, "grad_norm": 1.0977541208267212, "learning_rate": 2e-05, "loss": 0.03378581, "step": 4123 }, { "epoch": 8.248, "grad_norm": 1.2473630905151367, "learning_rate": 2e-05, "loss": 0.02938252, "step": 4124 }, { "epoch": 8.25, "grad_norm": 1.0864611864089966, "learning_rate": 2e-05, "loss": 0.02955379, "step": 4125 }, { "epoch": 8.252, "grad_norm": 0.7222666144371033, "learning_rate": 2e-05, "loss": 0.0249244, "step": 4126 }, { "epoch": 8.254, "grad_norm": 1.1524958610534668, "learning_rate": 2e-05, "loss": 0.03233204, "step": 4127 }, { "epoch": 8.256, "grad_norm": 1.1087329387664795, "learning_rate": 2e-05, "loss": 0.04439083, "step": 4128 }, { "epoch": 8.258, "grad_norm": 1.1859476566314697, "learning_rate": 2e-05, "loss": 0.02757011, "step": 4129 }, { "epoch": 8.26, "grad_norm": 0.8382571339607239, "learning_rate": 2e-05, "loss": 0.0306482, "step": 4130 }, { "epoch": 8.262, "grad_norm": 1.179650902748108, "learning_rate": 2e-05, "loss": 0.03184071, "step": 4131 }, { "epoch": 8.264, "grad_norm": 1.7352696657180786, "learning_rate": 2e-05, "loss": 0.04226511, "step": 4132 }, { "epoch": 8.266, "grad_norm": 1.112328052520752, "learning_rate": 2e-05, "loss": 0.03704386, "step": 4133 }, { "epoch": 8.268, "grad_norm": 1.0117770433425903, "learning_rate": 2e-05, "loss": 0.02762735, "step": 4134 }, { "epoch": 8.27, "grad_norm": 1.1358014345169067, "learning_rate": 2e-05, "loss": 0.04140215, "step": 4135 }, { "epoch": 8.272, "grad_norm": 1.426727294921875, "learning_rate": 2e-05, "loss": 0.05300343, "step": 4136 }, { "epoch": 8.274000000000001, "grad_norm": 0.792512834072113, "learning_rate": 2e-05, "loss": 0.02391674, "step": 4137 }, { "epoch": 8.276, "grad_norm": 1.3733137845993042, "learning_rate": 2e-05, "loss": 0.04097043, "step": 4138 }, { "epoch": 8.278, "grad_norm": 1.960314393043518, "learning_rate": 2e-05, "loss": 0.03319813, "step": 4139 }, { "epoch": 8.28, "grad_norm": 0.823380172252655, "learning_rate": 2e-05, "loss": 0.02641078, "step": 4140 }, { "epoch": 8.282, "grad_norm": 0.937158465385437, "learning_rate": 2e-05, "loss": 0.02914222, "step": 4141 }, { "epoch": 8.284, "grad_norm": 1.572863221168518, "learning_rate": 2e-05, "loss": 0.04503759, "step": 4142 }, { "epoch": 8.286, "grad_norm": 1.6610914468765259, "learning_rate": 2e-05, "loss": 0.04227349, "step": 4143 }, { "epoch": 8.288, "grad_norm": 1.1219995021820068, "learning_rate": 2e-05, "loss": 0.03582253, "step": 4144 }, { "epoch": 8.29, "grad_norm": 1.0500918626785278, "learning_rate": 2e-05, "loss": 0.03674741, "step": 4145 }, { "epoch": 8.292, "grad_norm": 0.904999315738678, "learning_rate": 2e-05, "loss": 0.03100994, "step": 4146 }, { "epoch": 8.294, "grad_norm": 1.3050191402435303, "learning_rate": 2e-05, "loss": 0.0361178, "step": 4147 }, { "epoch": 8.296, "grad_norm": 1.304196834564209, "learning_rate": 2e-05, "loss": 0.03543191, "step": 4148 }, { "epoch": 8.298, "grad_norm": 1.4014991521835327, "learning_rate": 2e-05, "loss": 0.03380605, "step": 4149 }, { "epoch": 8.3, "grad_norm": 0.9975857734680176, "learning_rate": 2e-05, "loss": 0.03645799, "step": 4150 }, { "epoch": 8.302, "grad_norm": 1.058785319328308, "learning_rate": 2e-05, "loss": 0.03003718, "step": 4151 }, { "epoch": 8.304, "grad_norm": 0.996228814125061, "learning_rate": 2e-05, "loss": 0.03437356, "step": 4152 }, { "epoch": 8.306, "grad_norm": 1.0564597845077515, "learning_rate": 2e-05, "loss": 0.04481754, "step": 4153 }, { "epoch": 8.308, "grad_norm": 1.2097054719924927, "learning_rate": 2e-05, "loss": 0.03743169, "step": 4154 }, { "epoch": 8.31, "grad_norm": 0.9662020206451416, "learning_rate": 2e-05, "loss": 0.04509608, "step": 4155 }, { "epoch": 8.312, "grad_norm": 0.9394364953041077, "learning_rate": 2e-05, "loss": 0.02996207, "step": 4156 }, { "epoch": 8.314, "grad_norm": 0.8993962407112122, "learning_rate": 2e-05, "loss": 0.02919, "step": 4157 }, { "epoch": 8.316, "grad_norm": 1.556430697441101, "learning_rate": 2e-05, "loss": 0.05358632, "step": 4158 }, { "epoch": 8.318, "grad_norm": 1.1967846155166626, "learning_rate": 2e-05, "loss": 0.03593311, "step": 4159 }, { "epoch": 8.32, "grad_norm": 1.4170066118240356, "learning_rate": 2e-05, "loss": 0.04683593, "step": 4160 }, { "epoch": 8.322, "grad_norm": 1.1824311017990112, "learning_rate": 2e-05, "loss": 0.04749808, "step": 4161 }, { "epoch": 8.324, "grad_norm": 1.3930622339248657, "learning_rate": 2e-05, "loss": 0.03375048, "step": 4162 }, { "epoch": 8.326, "grad_norm": 1.4637404680252075, "learning_rate": 2e-05, "loss": 0.04472891, "step": 4163 }, { "epoch": 8.328, "grad_norm": 2.0344045162200928, "learning_rate": 2e-05, "loss": 0.04480408, "step": 4164 }, { "epoch": 8.33, "grad_norm": 1.2924295663833618, "learning_rate": 2e-05, "loss": 0.04290893, "step": 4165 }, { "epoch": 8.332, "grad_norm": 1.129252314567566, "learning_rate": 2e-05, "loss": 0.04976633, "step": 4166 }, { "epoch": 8.334, "grad_norm": 1.3810979127883911, "learning_rate": 2e-05, "loss": 0.04157915, "step": 4167 }, { "epoch": 8.336, "grad_norm": 1.7910168170928955, "learning_rate": 2e-05, "loss": 0.04355691, "step": 4168 }, { "epoch": 8.338, "grad_norm": 0.9811388850212097, "learning_rate": 2e-05, "loss": 0.02883729, "step": 4169 }, { "epoch": 8.34, "grad_norm": 0.9929591417312622, "learning_rate": 2e-05, "loss": 0.03289343, "step": 4170 }, { "epoch": 8.342, "grad_norm": 1.4678994417190552, "learning_rate": 2e-05, "loss": 0.03899784, "step": 4171 }, { "epoch": 8.344, "grad_norm": 1.05734121799469, "learning_rate": 2e-05, "loss": 0.03796952, "step": 4172 }, { "epoch": 8.346, "grad_norm": 0.8461377620697021, "learning_rate": 2e-05, "loss": 0.03108793, "step": 4173 }, { "epoch": 8.348, "grad_norm": 0.8991321921348572, "learning_rate": 2e-05, "loss": 0.03290858, "step": 4174 }, { "epoch": 8.35, "grad_norm": 1.3600754737854004, "learning_rate": 2e-05, "loss": 0.05726715, "step": 4175 }, { "epoch": 8.352, "grad_norm": 0.977905809879303, "learning_rate": 2e-05, "loss": 0.03884137, "step": 4176 }, { "epoch": 8.354, "grad_norm": 1.0791375637054443, "learning_rate": 2e-05, "loss": 0.0427704, "step": 4177 }, { "epoch": 8.356, "grad_norm": 1.0275250673294067, "learning_rate": 2e-05, "loss": 0.0352735, "step": 4178 }, { "epoch": 8.358, "grad_norm": 1.00509774684906, "learning_rate": 2e-05, "loss": 0.02992057, "step": 4179 }, { "epoch": 8.36, "grad_norm": 1.1546076536178589, "learning_rate": 2e-05, "loss": 0.04918251, "step": 4180 }, { "epoch": 8.362, "grad_norm": 1.1534440517425537, "learning_rate": 2e-05, "loss": 0.03584889, "step": 4181 }, { "epoch": 8.364, "grad_norm": 1.3825478553771973, "learning_rate": 2e-05, "loss": 0.02998423, "step": 4182 }, { "epoch": 8.366, "grad_norm": 1.0779460668563843, "learning_rate": 2e-05, "loss": 0.03730501, "step": 4183 }, { "epoch": 8.368, "grad_norm": 1.360336184501648, "learning_rate": 2e-05, "loss": 0.05474132, "step": 4184 }, { "epoch": 8.37, "grad_norm": 0.9718125462532043, "learning_rate": 2e-05, "loss": 0.03339643, "step": 4185 }, { "epoch": 8.372, "grad_norm": 1.410914659500122, "learning_rate": 2e-05, "loss": 0.04282892, "step": 4186 }, { "epoch": 8.374, "grad_norm": 1.5057727098464966, "learning_rate": 2e-05, "loss": 0.05220322, "step": 4187 }, { "epoch": 8.376, "grad_norm": 1.0408916473388672, "learning_rate": 2e-05, "loss": 0.03753953, "step": 4188 }, { "epoch": 8.378, "grad_norm": 1.4054617881774902, "learning_rate": 2e-05, "loss": 0.04013094, "step": 4189 }, { "epoch": 8.38, "grad_norm": 1.454964518547058, "learning_rate": 2e-05, "loss": 0.04104815, "step": 4190 }, { "epoch": 8.382, "grad_norm": 1.3719788789749146, "learning_rate": 2e-05, "loss": 0.05186041, "step": 4191 }, { "epoch": 8.384, "grad_norm": 2.958334445953369, "learning_rate": 2e-05, "loss": 0.04485861, "step": 4192 }, { "epoch": 8.386, "grad_norm": 0.7732417583465576, "learning_rate": 2e-05, "loss": 0.02735084, "step": 4193 }, { "epoch": 8.388, "grad_norm": 0.8741227984428406, "learning_rate": 2e-05, "loss": 0.02949417, "step": 4194 }, { "epoch": 8.39, "grad_norm": 1.0400797128677368, "learning_rate": 2e-05, "loss": 0.03923693, "step": 4195 }, { "epoch": 8.392, "grad_norm": 1.0331785678863525, "learning_rate": 2e-05, "loss": 0.02930253, "step": 4196 }, { "epoch": 8.394, "grad_norm": 1.2648916244506836, "learning_rate": 2e-05, "loss": 0.04915082, "step": 4197 }, { "epoch": 8.396, "grad_norm": 1.132004737854004, "learning_rate": 2e-05, "loss": 0.04287592, "step": 4198 }, { "epoch": 8.398, "grad_norm": 1.0111182928085327, "learning_rate": 2e-05, "loss": 0.0362489, "step": 4199 }, { "epoch": 8.4, "grad_norm": 1.0471312999725342, "learning_rate": 2e-05, "loss": 0.03947051, "step": 4200 }, { "epoch": 8.402, "grad_norm": 0.7414948344230652, "learning_rate": 2e-05, "loss": 0.02271411, "step": 4201 }, { "epoch": 8.404, "grad_norm": 1.5574841499328613, "learning_rate": 2e-05, "loss": 0.0329715, "step": 4202 }, { "epoch": 8.406, "grad_norm": 1.5412328243255615, "learning_rate": 2e-05, "loss": 0.04185834, "step": 4203 }, { "epoch": 8.408, "grad_norm": 1.1066306829452515, "learning_rate": 2e-05, "loss": 0.03988752, "step": 4204 }, { "epoch": 8.41, "grad_norm": 1.1329011917114258, "learning_rate": 2e-05, "loss": 0.04049644, "step": 4205 }, { "epoch": 8.412, "grad_norm": 1.0475636720657349, "learning_rate": 2e-05, "loss": 0.02770457, "step": 4206 }, { "epoch": 8.414, "grad_norm": 1.4119455814361572, "learning_rate": 2e-05, "loss": 0.03110682, "step": 4207 }, { "epoch": 8.416, "grad_norm": 1.0786715745925903, "learning_rate": 2e-05, "loss": 0.03002795, "step": 4208 }, { "epoch": 8.418, "grad_norm": 1.3182508945465088, "learning_rate": 2e-05, "loss": 0.03994723, "step": 4209 }, { "epoch": 8.42, "grad_norm": 1.2412482500076294, "learning_rate": 2e-05, "loss": 0.03197087, "step": 4210 }, { "epoch": 8.422, "grad_norm": 1.1848154067993164, "learning_rate": 2e-05, "loss": 0.03602168, "step": 4211 }, { "epoch": 8.424, "grad_norm": 1.6346838474273682, "learning_rate": 2e-05, "loss": 0.03341147, "step": 4212 }, { "epoch": 8.426, "grad_norm": 0.9019219279289246, "learning_rate": 2e-05, "loss": 0.02708273, "step": 4213 }, { "epoch": 8.428, "grad_norm": 0.9286878705024719, "learning_rate": 2e-05, "loss": 0.03230337, "step": 4214 }, { "epoch": 8.43, "grad_norm": 1.3544023036956787, "learning_rate": 2e-05, "loss": 0.05526804, "step": 4215 }, { "epoch": 8.432, "grad_norm": 1.3710386753082275, "learning_rate": 2e-05, "loss": 0.04376029, "step": 4216 }, { "epoch": 8.434, "grad_norm": 1.0318108797073364, "learning_rate": 2e-05, "loss": 0.03416612, "step": 4217 }, { "epoch": 8.436, "grad_norm": 2.973123550415039, "learning_rate": 2e-05, "loss": 0.05688485, "step": 4218 }, { "epoch": 8.438, "grad_norm": 1.321817398071289, "learning_rate": 2e-05, "loss": 0.0353661, "step": 4219 }, { "epoch": 8.44, "grad_norm": 1.0643287897109985, "learning_rate": 2e-05, "loss": 0.02888464, "step": 4220 }, { "epoch": 8.442, "grad_norm": 1.104353904724121, "learning_rate": 2e-05, "loss": 0.03522876, "step": 4221 }, { "epoch": 8.444, "grad_norm": 0.897213876247406, "learning_rate": 2e-05, "loss": 0.03471637, "step": 4222 }, { "epoch": 8.446, "grad_norm": 1.5485879182815552, "learning_rate": 2e-05, "loss": 0.03032167, "step": 4223 }, { "epoch": 8.448, "grad_norm": 1.652105450630188, "learning_rate": 2e-05, "loss": 0.02511376, "step": 4224 }, { "epoch": 8.45, "grad_norm": 0.8463980555534363, "learning_rate": 2e-05, "loss": 0.03019558, "step": 4225 }, { "epoch": 8.452, "grad_norm": 0.8331522345542908, "learning_rate": 2e-05, "loss": 0.02027452, "step": 4226 }, { "epoch": 8.454, "grad_norm": 1.4463618993759155, "learning_rate": 2e-05, "loss": 0.0513086, "step": 4227 }, { "epoch": 8.456, "grad_norm": 1.0614899396896362, "learning_rate": 2e-05, "loss": 0.03257486, "step": 4228 }, { "epoch": 8.458, "grad_norm": 1.009953498840332, "learning_rate": 2e-05, "loss": 0.02659165, "step": 4229 }, { "epoch": 8.46, "grad_norm": 1.7296977043151855, "learning_rate": 2e-05, "loss": 0.03357616, "step": 4230 }, { "epoch": 8.462, "grad_norm": 1.013688325881958, "learning_rate": 2e-05, "loss": 0.03329365, "step": 4231 }, { "epoch": 8.464, "grad_norm": 1.3156310319900513, "learning_rate": 2e-05, "loss": 0.03646721, "step": 4232 }, { "epoch": 8.466, "grad_norm": 2.928880214691162, "learning_rate": 2e-05, "loss": 0.03669482, "step": 4233 }, { "epoch": 8.468, "grad_norm": 1.5548429489135742, "learning_rate": 2e-05, "loss": 0.0530042, "step": 4234 }, { "epoch": 8.47, "grad_norm": 1.4686297178268433, "learning_rate": 2e-05, "loss": 0.04117443, "step": 4235 }, { "epoch": 8.472, "grad_norm": 1.2238328456878662, "learning_rate": 2e-05, "loss": 0.03681629, "step": 4236 }, { "epoch": 8.474, "grad_norm": 1.2934352159500122, "learning_rate": 2e-05, "loss": 0.04645056, "step": 4237 }, { "epoch": 8.475999999999999, "grad_norm": 1.0786958932876587, "learning_rate": 2e-05, "loss": 0.03354469, "step": 4238 }, { "epoch": 8.478, "grad_norm": 1.9078060388565063, "learning_rate": 2e-05, "loss": 0.05416323, "step": 4239 }, { "epoch": 8.48, "grad_norm": 0.9229801893234253, "learning_rate": 2e-05, "loss": 0.0248043, "step": 4240 }, { "epoch": 8.482, "grad_norm": 1.0440524816513062, "learning_rate": 2e-05, "loss": 0.02745001, "step": 4241 }, { "epoch": 8.484, "grad_norm": 0.8694452047348022, "learning_rate": 2e-05, "loss": 0.02780109, "step": 4242 }, { "epoch": 8.486, "grad_norm": 1.180073857307434, "learning_rate": 2e-05, "loss": 0.03519845, "step": 4243 }, { "epoch": 8.488, "grad_norm": 1.772081971168518, "learning_rate": 2e-05, "loss": 0.04389036, "step": 4244 }, { "epoch": 8.49, "grad_norm": 1.4524823427200317, "learning_rate": 2e-05, "loss": 0.03568736, "step": 4245 }, { "epoch": 8.492, "grad_norm": 1.1907380819320679, "learning_rate": 2e-05, "loss": 0.04222452, "step": 4246 }, { "epoch": 8.494, "grad_norm": 1.4214314222335815, "learning_rate": 2e-05, "loss": 0.03957452, "step": 4247 }, { "epoch": 8.496, "grad_norm": 1.6145786046981812, "learning_rate": 2e-05, "loss": 0.0533987, "step": 4248 }, { "epoch": 8.498, "grad_norm": 0.8746370673179626, "learning_rate": 2e-05, "loss": 0.02794868, "step": 4249 }, { "epoch": 8.5, "grad_norm": 1.8504583835601807, "learning_rate": 2e-05, "loss": 0.04572233, "step": 4250 }, { "epoch": 8.502, "grad_norm": 1.630309820175171, "learning_rate": 2e-05, "loss": 0.03021394, "step": 4251 }, { "epoch": 8.504, "grad_norm": 1.6555626392364502, "learning_rate": 2e-05, "loss": 0.0516368, "step": 4252 }, { "epoch": 8.506, "grad_norm": 4.540745735168457, "learning_rate": 2e-05, "loss": 0.07011251, "step": 4253 }, { "epoch": 8.508, "grad_norm": 1.2824065685272217, "learning_rate": 2e-05, "loss": 0.03694147, "step": 4254 }, { "epoch": 8.51, "grad_norm": 2.117112874984741, "learning_rate": 2e-05, "loss": 0.04988319, "step": 4255 }, { "epoch": 8.512, "grad_norm": 1.2272472381591797, "learning_rate": 2e-05, "loss": 0.04086391, "step": 4256 }, { "epoch": 8.514, "grad_norm": 1.3374364376068115, "learning_rate": 2e-05, "loss": 0.03726092, "step": 4257 }, { "epoch": 8.516, "grad_norm": 1.26241135597229, "learning_rate": 2e-05, "loss": 0.02950322, "step": 4258 }, { "epoch": 8.518, "grad_norm": 1.7871663570404053, "learning_rate": 2e-05, "loss": 0.04202069, "step": 4259 }, { "epoch": 8.52, "grad_norm": 3.4672632217407227, "learning_rate": 2e-05, "loss": 0.03971061, "step": 4260 }, { "epoch": 8.522, "grad_norm": 1.128089427947998, "learning_rate": 2e-05, "loss": 0.02968075, "step": 4261 }, { "epoch": 8.524000000000001, "grad_norm": 0.9007006883621216, "learning_rate": 2e-05, "loss": 0.03684574, "step": 4262 }, { "epoch": 8.526, "grad_norm": 1.7186260223388672, "learning_rate": 2e-05, "loss": 0.03580798, "step": 4263 }, { "epoch": 8.528, "grad_norm": 1.0435247421264648, "learning_rate": 2e-05, "loss": 0.03076722, "step": 4264 }, { "epoch": 8.53, "grad_norm": 3.8075263500213623, "learning_rate": 2e-05, "loss": 0.03024363, "step": 4265 }, { "epoch": 8.532, "grad_norm": 1.043341040611267, "learning_rate": 2e-05, "loss": 0.03044458, "step": 4266 }, { "epoch": 8.534, "grad_norm": 1.301254391670227, "learning_rate": 2e-05, "loss": 0.02432681, "step": 4267 }, { "epoch": 8.536, "grad_norm": 0.9290913939476013, "learning_rate": 2e-05, "loss": 0.03872669, "step": 4268 }, { "epoch": 8.538, "grad_norm": 1.045127272605896, "learning_rate": 2e-05, "loss": 0.03397749, "step": 4269 }, { "epoch": 8.54, "grad_norm": 1.5763845443725586, "learning_rate": 2e-05, "loss": 0.03451675, "step": 4270 }, { "epoch": 8.542, "grad_norm": 1.2873786687850952, "learning_rate": 2e-05, "loss": 0.04421094, "step": 4271 }, { "epoch": 8.544, "grad_norm": 0.9438953399658203, "learning_rate": 2e-05, "loss": 0.02846372, "step": 4272 }, { "epoch": 8.546, "grad_norm": 0.9400883316993713, "learning_rate": 2e-05, "loss": 0.02589345, "step": 4273 }, { "epoch": 8.548, "grad_norm": 1.0678108930587769, "learning_rate": 2e-05, "loss": 0.03742916, "step": 4274 }, { "epoch": 8.55, "grad_norm": 1.8210856914520264, "learning_rate": 2e-05, "loss": 0.04695328, "step": 4275 }, { "epoch": 8.552, "grad_norm": 1.0226603746414185, "learning_rate": 2e-05, "loss": 0.03043972, "step": 4276 }, { "epoch": 8.554, "grad_norm": 1.7353748083114624, "learning_rate": 2e-05, "loss": 0.05117977, "step": 4277 }, { "epoch": 8.556000000000001, "grad_norm": 1.1166836023330688, "learning_rate": 2e-05, "loss": 0.03510939, "step": 4278 }, { "epoch": 8.558, "grad_norm": 1.272791862487793, "learning_rate": 2e-05, "loss": 0.03132101, "step": 4279 }, { "epoch": 8.56, "grad_norm": 1.1505990028381348, "learning_rate": 2e-05, "loss": 0.03408735, "step": 4280 }, { "epoch": 8.562, "grad_norm": 1.6795024871826172, "learning_rate": 2e-05, "loss": 0.04506549, "step": 4281 }, { "epoch": 8.564, "grad_norm": 1.3078727722167969, "learning_rate": 2e-05, "loss": 0.03593609, "step": 4282 }, { "epoch": 8.566, "grad_norm": 0.8563733100891113, "learning_rate": 2e-05, "loss": 0.03196423, "step": 4283 }, { "epoch": 8.568, "grad_norm": 1.2288391590118408, "learning_rate": 2e-05, "loss": 0.03614033, "step": 4284 }, { "epoch": 8.57, "grad_norm": 1.007755994796753, "learning_rate": 2e-05, "loss": 0.02932066, "step": 4285 }, { "epoch": 8.572, "grad_norm": 1.8930143117904663, "learning_rate": 2e-05, "loss": 0.04287382, "step": 4286 }, { "epoch": 8.574, "grad_norm": 1.6621681451797485, "learning_rate": 2e-05, "loss": 0.04692085, "step": 4287 }, { "epoch": 8.576, "grad_norm": 1.295868992805481, "learning_rate": 2e-05, "loss": 0.03404344, "step": 4288 }, { "epoch": 8.578, "grad_norm": 0.8671032786369324, "learning_rate": 2e-05, "loss": 0.030484, "step": 4289 }, { "epoch": 8.58, "grad_norm": 1.0084338188171387, "learning_rate": 2e-05, "loss": 0.02456804, "step": 4290 }, { "epoch": 8.582, "grad_norm": 1.219338059425354, "learning_rate": 2e-05, "loss": 0.03653606, "step": 4291 }, { "epoch": 8.584, "grad_norm": 0.9035724401473999, "learning_rate": 2e-05, "loss": 0.03740543, "step": 4292 }, { "epoch": 8.586, "grad_norm": 1.0819180011749268, "learning_rate": 2e-05, "loss": 0.03361362, "step": 4293 }, { "epoch": 8.588, "grad_norm": 2.07070255279541, "learning_rate": 2e-05, "loss": 0.05849779, "step": 4294 }, { "epoch": 8.59, "grad_norm": 1.2580983638763428, "learning_rate": 2e-05, "loss": 0.0394201, "step": 4295 }, { "epoch": 8.592, "grad_norm": 2.2226052284240723, "learning_rate": 2e-05, "loss": 0.04181966, "step": 4296 }, { "epoch": 8.594, "grad_norm": 1.8296679258346558, "learning_rate": 2e-05, "loss": 0.04602747, "step": 4297 }, { "epoch": 8.596, "grad_norm": 0.9116762280464172, "learning_rate": 2e-05, "loss": 0.02010448, "step": 4298 }, { "epoch": 8.598, "grad_norm": 0.7344780564308167, "learning_rate": 2e-05, "loss": 0.02482616, "step": 4299 }, { "epoch": 8.6, "grad_norm": 0.8634953498840332, "learning_rate": 2e-05, "loss": 0.02763288, "step": 4300 }, { "epoch": 8.602, "grad_norm": 1.2845823764801025, "learning_rate": 2e-05, "loss": 0.04601606, "step": 4301 }, { "epoch": 8.604, "grad_norm": 1.367843508720398, "learning_rate": 2e-05, "loss": 0.04297215, "step": 4302 }, { "epoch": 8.606, "grad_norm": 2.661747694015503, "learning_rate": 2e-05, "loss": 0.04521809, "step": 4303 }, { "epoch": 8.608, "grad_norm": 1.1709262132644653, "learning_rate": 2e-05, "loss": 0.02654773, "step": 4304 }, { "epoch": 8.61, "grad_norm": 0.8484907746315002, "learning_rate": 2e-05, "loss": 0.02333983, "step": 4305 }, { "epoch": 8.612, "grad_norm": 0.8359749913215637, "learning_rate": 2e-05, "loss": 0.03143027, "step": 4306 }, { "epoch": 8.614, "grad_norm": 1.1071807146072388, "learning_rate": 2e-05, "loss": 0.03338674, "step": 4307 }, { "epoch": 8.616, "grad_norm": 1.5785541534423828, "learning_rate": 2e-05, "loss": 0.03513125, "step": 4308 }, { "epoch": 8.618, "grad_norm": 1.3184837102890015, "learning_rate": 2e-05, "loss": 0.04387352, "step": 4309 }, { "epoch": 8.62, "grad_norm": 0.8719736337661743, "learning_rate": 2e-05, "loss": 0.03716096, "step": 4310 }, { "epoch": 8.622, "grad_norm": 1.2426698207855225, "learning_rate": 2e-05, "loss": 0.04213528, "step": 4311 }, { "epoch": 8.624, "grad_norm": 1.1632554531097412, "learning_rate": 2e-05, "loss": 0.03897891, "step": 4312 }, { "epoch": 8.626, "grad_norm": 1.0779016017913818, "learning_rate": 2e-05, "loss": 0.03166172, "step": 4313 }, { "epoch": 8.628, "grad_norm": 1.575682282447815, "learning_rate": 2e-05, "loss": 0.04232063, "step": 4314 }, { "epoch": 8.63, "grad_norm": 1.0911390781402588, "learning_rate": 2e-05, "loss": 0.03800164, "step": 4315 }, { "epoch": 8.632, "grad_norm": 1.5174480676651, "learning_rate": 2e-05, "loss": 0.03394033, "step": 4316 }, { "epoch": 8.634, "grad_norm": 1.901434302330017, "learning_rate": 2e-05, "loss": 0.04158172, "step": 4317 }, { "epoch": 8.636, "grad_norm": 1.0770238637924194, "learning_rate": 2e-05, "loss": 0.03595822, "step": 4318 }, { "epoch": 8.638, "grad_norm": 1.9225928783416748, "learning_rate": 2e-05, "loss": 0.04130941, "step": 4319 }, { "epoch": 8.64, "grad_norm": 1.609885334968567, "learning_rate": 2e-05, "loss": 0.04103157, "step": 4320 }, { "epoch": 8.642, "grad_norm": 2.190215587615967, "learning_rate": 2e-05, "loss": 0.0473948, "step": 4321 }, { "epoch": 8.644, "grad_norm": 0.8575224876403809, "learning_rate": 2e-05, "loss": 0.03260163, "step": 4322 }, { "epoch": 8.646, "grad_norm": 1.0209393501281738, "learning_rate": 2e-05, "loss": 0.02480337, "step": 4323 }, { "epoch": 8.648, "grad_norm": 0.9516626000404358, "learning_rate": 2e-05, "loss": 0.03523789, "step": 4324 }, { "epoch": 8.65, "grad_norm": 1.1760972738265991, "learning_rate": 2e-05, "loss": 0.04094344, "step": 4325 }, { "epoch": 8.652, "grad_norm": 1.2505104541778564, "learning_rate": 2e-05, "loss": 0.04660025, "step": 4326 }, { "epoch": 8.654, "grad_norm": 1.701697826385498, "learning_rate": 2e-05, "loss": 0.05940346, "step": 4327 }, { "epoch": 8.656, "grad_norm": 0.8905507922172546, "learning_rate": 2e-05, "loss": 0.0389562, "step": 4328 }, { "epoch": 8.658, "grad_norm": 1.3602725267410278, "learning_rate": 2e-05, "loss": 0.05184805, "step": 4329 }, { "epoch": 8.66, "grad_norm": 1.1222376823425293, "learning_rate": 2e-05, "loss": 0.03962405, "step": 4330 }, { "epoch": 8.662, "grad_norm": 1.0586673021316528, "learning_rate": 2e-05, "loss": 0.03376748, "step": 4331 }, { "epoch": 8.664, "grad_norm": 1.0629587173461914, "learning_rate": 2e-05, "loss": 0.03665616, "step": 4332 }, { "epoch": 8.666, "grad_norm": 0.7217963337898254, "learning_rate": 2e-05, "loss": 0.02461688, "step": 4333 }, { "epoch": 8.668, "grad_norm": 0.943494975566864, "learning_rate": 2e-05, "loss": 0.04906441, "step": 4334 }, { "epoch": 8.67, "grad_norm": 1.1171571016311646, "learning_rate": 2e-05, "loss": 0.03371343, "step": 4335 }, { "epoch": 8.672, "grad_norm": 1.0478748083114624, "learning_rate": 2e-05, "loss": 0.03349302, "step": 4336 }, { "epoch": 8.674, "grad_norm": 1.886179804801941, "learning_rate": 2e-05, "loss": 0.03642574, "step": 4337 }, { "epoch": 8.676, "grad_norm": 0.9567756056785583, "learning_rate": 2e-05, "loss": 0.02603189, "step": 4338 }, { "epoch": 8.678, "grad_norm": 1.4050965309143066, "learning_rate": 2e-05, "loss": 0.03317745, "step": 4339 }, { "epoch": 8.68, "grad_norm": 1.2646552324295044, "learning_rate": 2e-05, "loss": 0.02990676, "step": 4340 }, { "epoch": 8.682, "grad_norm": 1.726273775100708, "learning_rate": 2e-05, "loss": 0.0534512, "step": 4341 }, { "epoch": 8.684, "grad_norm": 1.5539257526397705, "learning_rate": 2e-05, "loss": 0.04858878, "step": 4342 }, { "epoch": 8.686, "grad_norm": 1.080551266670227, "learning_rate": 2e-05, "loss": 0.0283501, "step": 4343 }, { "epoch": 8.688, "grad_norm": 1.7172529697418213, "learning_rate": 2e-05, "loss": 0.04520421, "step": 4344 }, { "epoch": 8.69, "grad_norm": 1.2061853408813477, "learning_rate": 2e-05, "loss": 0.04951738, "step": 4345 }, { "epoch": 8.692, "grad_norm": 1.2072381973266602, "learning_rate": 2e-05, "loss": 0.03153531, "step": 4346 }, { "epoch": 8.693999999999999, "grad_norm": 1.209691047668457, "learning_rate": 2e-05, "loss": 0.04145152, "step": 4347 }, { "epoch": 8.696, "grad_norm": 0.8640297055244446, "learning_rate": 2e-05, "loss": 0.02526208, "step": 4348 }, { "epoch": 8.698, "grad_norm": 1.476876974105835, "learning_rate": 2e-05, "loss": 0.04919025, "step": 4349 }, { "epoch": 8.7, "grad_norm": 0.8465504050254822, "learning_rate": 2e-05, "loss": 0.03360989, "step": 4350 }, { "epoch": 8.702, "grad_norm": 1.4732036590576172, "learning_rate": 2e-05, "loss": 0.04423306, "step": 4351 }, { "epoch": 8.704, "grad_norm": 1.0119317770004272, "learning_rate": 2e-05, "loss": 0.03722854, "step": 4352 }, { "epoch": 8.706, "grad_norm": 0.8055739402770996, "learning_rate": 2e-05, "loss": 0.02453724, "step": 4353 }, { "epoch": 8.708, "grad_norm": 1.0698730945587158, "learning_rate": 2e-05, "loss": 0.03628566, "step": 4354 }, { "epoch": 8.71, "grad_norm": 1.9892702102661133, "learning_rate": 2e-05, "loss": 0.03608231, "step": 4355 }, { "epoch": 8.712, "grad_norm": 1.250458002090454, "learning_rate": 2e-05, "loss": 0.03571693, "step": 4356 }, { "epoch": 8.714, "grad_norm": 2.186629056930542, "learning_rate": 2e-05, "loss": 0.04970912, "step": 4357 }, { "epoch": 8.716, "grad_norm": 1.2152903079986572, "learning_rate": 2e-05, "loss": 0.03560323, "step": 4358 }, { "epoch": 8.718, "grad_norm": 0.8866543173789978, "learning_rate": 2e-05, "loss": 0.03801199, "step": 4359 }, { "epoch": 8.72, "grad_norm": 1.0799754858016968, "learning_rate": 2e-05, "loss": 0.03785599, "step": 4360 }, { "epoch": 8.722, "grad_norm": 1.1391751766204834, "learning_rate": 2e-05, "loss": 0.029482, "step": 4361 }, { "epoch": 8.724, "grad_norm": 1.1800816059112549, "learning_rate": 2e-05, "loss": 0.03753895, "step": 4362 }, { "epoch": 8.725999999999999, "grad_norm": 1.2652462720870972, "learning_rate": 2e-05, "loss": 0.04609217, "step": 4363 }, { "epoch": 8.728, "grad_norm": 1.1461265087127686, "learning_rate": 2e-05, "loss": 0.04038265, "step": 4364 }, { "epoch": 8.73, "grad_norm": 1.108725905418396, "learning_rate": 2e-05, "loss": 0.03577749, "step": 4365 }, { "epoch": 8.732, "grad_norm": 1.0410600900650024, "learning_rate": 2e-05, "loss": 0.03118877, "step": 4366 }, { "epoch": 8.734, "grad_norm": 1.4850313663482666, "learning_rate": 2e-05, "loss": 0.05048557, "step": 4367 }, { "epoch": 8.736, "grad_norm": 1.867667555809021, "learning_rate": 2e-05, "loss": 0.04281069, "step": 4368 }, { "epoch": 8.738, "grad_norm": 1.4187918901443481, "learning_rate": 2e-05, "loss": 0.04133852, "step": 4369 }, { "epoch": 8.74, "grad_norm": 0.8809421062469482, "learning_rate": 2e-05, "loss": 0.02871585, "step": 4370 }, { "epoch": 8.742, "grad_norm": 1.2206686735153198, "learning_rate": 2e-05, "loss": 0.03621854, "step": 4371 }, { "epoch": 8.744, "grad_norm": 0.8350837230682373, "learning_rate": 2e-05, "loss": 0.02655823, "step": 4372 }, { "epoch": 8.746, "grad_norm": 0.8585577011108398, "learning_rate": 2e-05, "loss": 0.02701383, "step": 4373 }, { "epoch": 8.748, "grad_norm": 1.260741949081421, "learning_rate": 2e-05, "loss": 0.04151483, "step": 4374 }, { "epoch": 8.75, "grad_norm": 0.6847019791603088, "learning_rate": 2e-05, "loss": 0.02283763, "step": 4375 }, { "epoch": 8.752, "grad_norm": 1.1042121648788452, "learning_rate": 2e-05, "loss": 0.037095, "step": 4376 }, { "epoch": 8.754, "grad_norm": 1.0588191747665405, "learning_rate": 2e-05, "loss": 0.04045224, "step": 4377 }, { "epoch": 8.756, "grad_norm": 0.9876347184181213, "learning_rate": 2e-05, "loss": 0.02354544, "step": 4378 }, { "epoch": 8.758, "grad_norm": 1.634065866470337, "learning_rate": 2e-05, "loss": 0.04165735, "step": 4379 }, { "epoch": 8.76, "grad_norm": 1.4295926094055176, "learning_rate": 2e-05, "loss": 0.03785643, "step": 4380 }, { "epoch": 8.762, "grad_norm": 1.548768401145935, "learning_rate": 2e-05, "loss": 0.03763223, "step": 4381 }, { "epoch": 8.764, "grad_norm": 1.0193872451782227, "learning_rate": 2e-05, "loss": 0.03543913, "step": 4382 }, { "epoch": 8.766, "grad_norm": 1.0400234460830688, "learning_rate": 2e-05, "loss": 0.02763099, "step": 4383 }, { "epoch": 8.768, "grad_norm": 1.3632822036743164, "learning_rate": 2e-05, "loss": 0.03941811, "step": 4384 }, { "epoch": 8.77, "grad_norm": 1.0936450958251953, "learning_rate": 2e-05, "loss": 0.028662, "step": 4385 }, { "epoch": 8.772, "grad_norm": 1.2000219821929932, "learning_rate": 2e-05, "loss": 0.03183241, "step": 4386 }, { "epoch": 8.774000000000001, "grad_norm": 0.721432626247406, "learning_rate": 2e-05, "loss": 0.02328705, "step": 4387 }, { "epoch": 8.776, "grad_norm": 0.9627392888069153, "learning_rate": 2e-05, "loss": 0.03007334, "step": 4388 }, { "epoch": 8.778, "grad_norm": 0.9881150722503662, "learning_rate": 2e-05, "loss": 0.03594098, "step": 4389 }, { "epoch": 8.78, "grad_norm": 1.3902097940444946, "learning_rate": 2e-05, "loss": 0.03261566, "step": 4390 }, { "epoch": 8.782, "grad_norm": 1.6406267881393433, "learning_rate": 2e-05, "loss": 0.06175329, "step": 4391 }, { "epoch": 8.784, "grad_norm": 1.0905884504318237, "learning_rate": 2e-05, "loss": 0.0411981, "step": 4392 }, { "epoch": 8.786, "grad_norm": 1.147599697113037, "learning_rate": 2e-05, "loss": 0.03582589, "step": 4393 }, { "epoch": 8.788, "grad_norm": 0.8982160687446594, "learning_rate": 2e-05, "loss": 0.02215156, "step": 4394 }, { "epoch": 8.79, "grad_norm": 1.2038124799728394, "learning_rate": 2e-05, "loss": 0.03450104, "step": 4395 }, { "epoch": 8.792, "grad_norm": 0.7842474579811096, "learning_rate": 2e-05, "loss": 0.02754783, "step": 4396 }, { "epoch": 8.794, "grad_norm": 1.5107076168060303, "learning_rate": 2e-05, "loss": 0.04883888, "step": 4397 }, { "epoch": 8.796, "grad_norm": 2.030329465866089, "learning_rate": 2e-05, "loss": 0.04400323, "step": 4398 }, { "epoch": 8.798, "grad_norm": 0.9731871485710144, "learning_rate": 2e-05, "loss": 0.03140572, "step": 4399 }, { "epoch": 8.8, "grad_norm": 1.6771141290664673, "learning_rate": 2e-05, "loss": 0.04026034, "step": 4400 }, { "epoch": 8.802, "grad_norm": 1.1904957294464111, "learning_rate": 2e-05, "loss": 0.03219453, "step": 4401 }, { "epoch": 8.804, "grad_norm": 2.506793737411499, "learning_rate": 2e-05, "loss": 0.05005032, "step": 4402 }, { "epoch": 8.806000000000001, "grad_norm": 1.1132240295410156, "learning_rate": 2e-05, "loss": 0.03032156, "step": 4403 }, { "epoch": 8.808, "grad_norm": 1.6065146923065186, "learning_rate": 2e-05, "loss": 0.03724115, "step": 4404 }, { "epoch": 8.81, "grad_norm": 1.5506099462509155, "learning_rate": 2e-05, "loss": 0.0376544, "step": 4405 }, { "epoch": 8.812, "grad_norm": 1.1364389657974243, "learning_rate": 2e-05, "loss": 0.03071968, "step": 4406 }, { "epoch": 8.814, "grad_norm": 1.0018686056137085, "learning_rate": 2e-05, "loss": 0.02822195, "step": 4407 }, { "epoch": 8.816, "grad_norm": 1.4312738180160522, "learning_rate": 2e-05, "loss": 0.0373998, "step": 4408 }, { "epoch": 8.818, "grad_norm": 1.2273539304733276, "learning_rate": 2e-05, "loss": 0.0436803, "step": 4409 }, { "epoch": 8.82, "grad_norm": 1.0249744653701782, "learning_rate": 2e-05, "loss": 0.03977648, "step": 4410 }, { "epoch": 8.822, "grad_norm": 0.9226503372192383, "learning_rate": 2e-05, "loss": 0.02705158, "step": 4411 }, { "epoch": 8.824, "grad_norm": 1.4124457836151123, "learning_rate": 2e-05, "loss": 0.02822359, "step": 4412 }, { "epoch": 8.826, "grad_norm": 1.1609959602355957, "learning_rate": 2e-05, "loss": 0.03335798, "step": 4413 }, { "epoch": 8.828, "grad_norm": 1.5051655769348145, "learning_rate": 2e-05, "loss": 0.0553541, "step": 4414 }, { "epoch": 8.83, "grad_norm": 0.934198260307312, "learning_rate": 2e-05, "loss": 0.0271256, "step": 4415 }, { "epoch": 8.832, "grad_norm": 1.1879706382751465, "learning_rate": 2e-05, "loss": 0.03448817, "step": 4416 }, { "epoch": 8.834, "grad_norm": 1.44901704788208, "learning_rate": 2e-05, "loss": 0.0605602, "step": 4417 }, { "epoch": 8.836, "grad_norm": 0.8853863477706909, "learning_rate": 2e-05, "loss": 0.03229371, "step": 4418 }, { "epoch": 8.838, "grad_norm": 0.9787958264350891, "learning_rate": 2e-05, "loss": 0.04125983, "step": 4419 }, { "epoch": 8.84, "grad_norm": 1.2672247886657715, "learning_rate": 2e-05, "loss": 0.04601629, "step": 4420 }, { "epoch": 8.842, "grad_norm": 1.0602166652679443, "learning_rate": 2e-05, "loss": 0.0245541, "step": 4421 }, { "epoch": 8.844, "grad_norm": 1.4554502964019775, "learning_rate": 2e-05, "loss": 0.0507815, "step": 4422 }, { "epoch": 8.846, "grad_norm": 1.4599976539611816, "learning_rate": 2e-05, "loss": 0.04508867, "step": 4423 }, { "epoch": 8.848, "grad_norm": 0.8949224352836609, "learning_rate": 2e-05, "loss": 0.03331463, "step": 4424 }, { "epoch": 8.85, "grad_norm": 0.9511380195617676, "learning_rate": 2e-05, "loss": 0.03431322, "step": 4425 }, { "epoch": 8.852, "grad_norm": 0.9333038926124573, "learning_rate": 2e-05, "loss": 0.03489791, "step": 4426 }, { "epoch": 8.854, "grad_norm": 1.234848141670227, "learning_rate": 2e-05, "loss": 0.02840705, "step": 4427 }, { "epoch": 8.856, "grad_norm": 1.0672849416732788, "learning_rate": 2e-05, "loss": 0.03522662, "step": 4428 }, { "epoch": 8.858, "grad_norm": 1.2169923782348633, "learning_rate": 2e-05, "loss": 0.04549725, "step": 4429 }, { "epoch": 8.86, "grad_norm": 3.057295799255371, "learning_rate": 2e-05, "loss": 0.03550213, "step": 4430 }, { "epoch": 8.862, "grad_norm": 0.8561052680015564, "learning_rate": 2e-05, "loss": 0.03572192, "step": 4431 }, { "epoch": 8.864, "grad_norm": 1.024088978767395, "learning_rate": 2e-05, "loss": 0.03791398, "step": 4432 }, { "epoch": 8.866, "grad_norm": 1.3080118894577026, "learning_rate": 2e-05, "loss": 0.03233641, "step": 4433 }, { "epoch": 8.868, "grad_norm": 0.8643254637718201, "learning_rate": 2e-05, "loss": 0.02729377, "step": 4434 }, { "epoch": 8.87, "grad_norm": 1.0389906167984009, "learning_rate": 2e-05, "loss": 0.03855084, "step": 4435 }, { "epoch": 8.872, "grad_norm": 1.1684552431106567, "learning_rate": 2e-05, "loss": 0.03235376, "step": 4436 }, { "epoch": 8.874, "grad_norm": 0.9748151302337646, "learning_rate": 2e-05, "loss": 0.03027427, "step": 4437 }, { "epoch": 8.876, "grad_norm": 1.3167574405670166, "learning_rate": 2e-05, "loss": 0.04322438, "step": 4438 }, { "epoch": 8.878, "grad_norm": 1.7637985944747925, "learning_rate": 2e-05, "loss": 0.049634, "step": 4439 }, { "epoch": 8.88, "grad_norm": 1.5339040756225586, "learning_rate": 2e-05, "loss": 0.03757133, "step": 4440 }, { "epoch": 8.882, "grad_norm": 0.9908345341682434, "learning_rate": 2e-05, "loss": 0.0349043, "step": 4441 }, { "epoch": 8.884, "grad_norm": 1.8611232042312622, "learning_rate": 2e-05, "loss": 0.04882191, "step": 4442 }, { "epoch": 8.886, "grad_norm": 1.143393635749817, "learning_rate": 2e-05, "loss": 0.03258184, "step": 4443 }, { "epoch": 8.888, "grad_norm": 1.1683696508407593, "learning_rate": 2e-05, "loss": 0.03544236, "step": 4444 }, { "epoch": 8.89, "grad_norm": 2.8146896362304688, "learning_rate": 2e-05, "loss": 0.04814401, "step": 4445 }, { "epoch": 8.892, "grad_norm": 0.9965981245040894, "learning_rate": 2e-05, "loss": 0.03453904, "step": 4446 }, { "epoch": 8.894, "grad_norm": 1.351223111152649, "learning_rate": 2e-05, "loss": 0.04809322, "step": 4447 }, { "epoch": 8.896, "grad_norm": 1.1808723211288452, "learning_rate": 2e-05, "loss": 0.03474544, "step": 4448 }, { "epoch": 8.898, "grad_norm": 0.8170323371887207, "learning_rate": 2e-05, "loss": 0.0270221, "step": 4449 }, { "epoch": 8.9, "grad_norm": 1.086888313293457, "learning_rate": 2e-05, "loss": 0.03911464, "step": 4450 }, { "epoch": 8.902, "grad_norm": 1.4800477027893066, "learning_rate": 2e-05, "loss": 0.03961758, "step": 4451 }, { "epoch": 8.904, "grad_norm": 2.1147592067718506, "learning_rate": 2e-05, "loss": 0.03302735, "step": 4452 }, { "epoch": 8.906, "grad_norm": 0.9599102735519409, "learning_rate": 2e-05, "loss": 0.03172829, "step": 4453 }, { "epoch": 8.908, "grad_norm": 0.8856299519538879, "learning_rate": 2e-05, "loss": 0.03148354, "step": 4454 }, { "epoch": 8.91, "grad_norm": 1.7772505283355713, "learning_rate": 2e-05, "loss": 0.04477862, "step": 4455 }, { "epoch": 8.912, "grad_norm": 1.0375773906707764, "learning_rate": 2e-05, "loss": 0.03189868, "step": 4456 }, { "epoch": 8.914, "grad_norm": 1.2121745347976685, "learning_rate": 2e-05, "loss": 0.03863291, "step": 4457 }, { "epoch": 8.916, "grad_norm": 0.890414297580719, "learning_rate": 2e-05, "loss": 0.03633838, "step": 4458 }, { "epoch": 8.918, "grad_norm": 1.6410126686096191, "learning_rate": 2e-05, "loss": 0.05018979, "step": 4459 }, { "epoch": 8.92, "grad_norm": 0.9647154211997986, "learning_rate": 2e-05, "loss": 0.03614154, "step": 4460 }, { "epoch": 8.922, "grad_norm": 0.8669952154159546, "learning_rate": 2e-05, "loss": 0.03455111, "step": 4461 }, { "epoch": 8.924, "grad_norm": 0.7547525763511658, "learning_rate": 2e-05, "loss": 0.02110554, "step": 4462 }, { "epoch": 8.926, "grad_norm": 1.6941547393798828, "learning_rate": 2e-05, "loss": 0.03623815, "step": 4463 }, { "epoch": 8.928, "grad_norm": 0.9672108888626099, "learning_rate": 2e-05, "loss": 0.03987022, "step": 4464 }, { "epoch": 8.93, "grad_norm": 0.9219751358032227, "learning_rate": 2e-05, "loss": 0.03178947, "step": 4465 }, { "epoch": 8.932, "grad_norm": 1.1351100206375122, "learning_rate": 2e-05, "loss": 0.03589904, "step": 4466 }, { "epoch": 8.934, "grad_norm": 0.8131705522537231, "learning_rate": 2e-05, "loss": 0.02804996, "step": 4467 }, { "epoch": 8.936, "grad_norm": 1.7447850704193115, "learning_rate": 2e-05, "loss": 0.04747099, "step": 4468 }, { "epoch": 8.938, "grad_norm": 0.9086422324180603, "learning_rate": 2e-05, "loss": 0.03424243, "step": 4469 }, { "epoch": 8.94, "grad_norm": 2.3548171520233154, "learning_rate": 2e-05, "loss": 0.06218503, "step": 4470 }, { "epoch": 8.942, "grad_norm": 1.1623647212982178, "learning_rate": 2e-05, "loss": 0.0374011, "step": 4471 }, { "epoch": 8.943999999999999, "grad_norm": 0.8842362761497498, "learning_rate": 2e-05, "loss": 0.0328421, "step": 4472 }, { "epoch": 8.946, "grad_norm": 1.8156864643096924, "learning_rate": 2e-05, "loss": 0.03803327, "step": 4473 }, { "epoch": 8.948, "grad_norm": 1.0355145931243896, "learning_rate": 2e-05, "loss": 0.02684892, "step": 4474 }, { "epoch": 8.95, "grad_norm": 1.1974067687988281, "learning_rate": 2e-05, "loss": 0.04837372, "step": 4475 }, { "epoch": 8.952, "grad_norm": 1.490455985069275, "learning_rate": 2e-05, "loss": 0.03446172, "step": 4476 }, { "epoch": 8.954, "grad_norm": 2.2048962116241455, "learning_rate": 2e-05, "loss": 0.05069371, "step": 4477 }, { "epoch": 8.956, "grad_norm": 0.9937486052513123, "learning_rate": 2e-05, "loss": 0.02961515, "step": 4478 }, { "epoch": 8.958, "grad_norm": 0.8430362939834595, "learning_rate": 2e-05, "loss": 0.02949478, "step": 4479 }, { "epoch": 8.96, "grad_norm": 1.0599952936172485, "learning_rate": 2e-05, "loss": 0.03857842, "step": 4480 }, { "epoch": 8.962, "grad_norm": 1.1685858964920044, "learning_rate": 2e-05, "loss": 0.02847839, "step": 4481 }, { "epoch": 8.964, "grad_norm": 1.0550990104675293, "learning_rate": 2e-05, "loss": 0.02464252, "step": 4482 }, { "epoch": 8.966, "grad_norm": 1.7941303253173828, "learning_rate": 2e-05, "loss": 0.05934828, "step": 4483 }, { "epoch": 8.968, "grad_norm": 1.026834487915039, "learning_rate": 2e-05, "loss": 0.0214682, "step": 4484 }, { "epoch": 8.97, "grad_norm": 0.7951596975326538, "learning_rate": 2e-05, "loss": 0.03193407, "step": 4485 }, { "epoch": 8.972, "grad_norm": 1.569739580154419, "learning_rate": 2e-05, "loss": 0.05439562, "step": 4486 }, { "epoch": 8.974, "grad_norm": 1.554987907409668, "learning_rate": 2e-05, "loss": 0.04937214, "step": 4487 }, { "epoch": 8.975999999999999, "grad_norm": 1.0768734216690063, "learning_rate": 2e-05, "loss": 0.03868555, "step": 4488 }, { "epoch": 8.978, "grad_norm": 1.2217507362365723, "learning_rate": 2e-05, "loss": 0.03699178, "step": 4489 }, { "epoch": 8.98, "grad_norm": 1.1103541851043701, "learning_rate": 2e-05, "loss": 0.0460306, "step": 4490 }, { "epoch": 8.982, "grad_norm": 1.0225930213928223, "learning_rate": 2e-05, "loss": 0.02453739, "step": 4491 }, { "epoch": 8.984, "grad_norm": 1.1269789934158325, "learning_rate": 2e-05, "loss": 0.04750391, "step": 4492 }, { "epoch": 8.986, "grad_norm": 1.6232492923736572, "learning_rate": 2e-05, "loss": 0.0486496, "step": 4493 }, { "epoch": 8.988, "grad_norm": 1.1891883611679077, "learning_rate": 2e-05, "loss": 0.04055459, "step": 4494 }, { "epoch": 8.99, "grad_norm": 0.9137685298919678, "learning_rate": 2e-05, "loss": 0.02941462, "step": 4495 }, { "epoch": 8.992, "grad_norm": 0.9191139936447144, "learning_rate": 2e-05, "loss": 0.04776662, "step": 4496 }, { "epoch": 8.994, "grad_norm": 0.8505673408508301, "learning_rate": 2e-05, "loss": 0.02501973, "step": 4497 }, { "epoch": 8.996, "grad_norm": 1.0725775957107544, "learning_rate": 2e-05, "loss": 0.02366268, "step": 4498 }, { "epoch": 8.998, "grad_norm": 0.9229608178138733, "learning_rate": 2e-05, "loss": 0.029873, "step": 4499 }, { "epoch": 9.0, "grad_norm": 0.9641827344894409, "learning_rate": 2e-05, "loss": 0.03621103, "step": 4500 }, { "epoch": 9.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9560878243512974, "Equal_1": 0.988, "Equal_2": 0.908183632734531, "Equal_3": 0.8423153692614771, "LineComparison_1": 0.998, "LineComparison_2": 0.9960079840319361, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9779559118236473, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.92, "Perpendicular_1": 0.992, "Perpendicular_2": 0.876, "Perpendicular_3": 0.48096192384769537, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9846666666666667, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9141716566866267 }, "eval_runtime": 224.8957, "eval_samples_per_second": 46.688, "eval_steps_per_second": 0.934, "step": 4500 }, { "epoch": 9.002, "grad_norm": 1.6514371633529663, "learning_rate": 2e-05, "loss": 0.05693808, "step": 4501 }, { "epoch": 9.004, "grad_norm": 1.5014028549194336, "learning_rate": 2e-05, "loss": 0.05683583, "step": 4502 }, { "epoch": 9.006, "grad_norm": 1.212156057357788, "learning_rate": 2e-05, "loss": 0.03484202, "step": 4503 }, { "epoch": 9.008, "grad_norm": 1.349173665046692, "learning_rate": 2e-05, "loss": 0.05665319, "step": 4504 }, { "epoch": 9.01, "grad_norm": 1.4876397848129272, "learning_rate": 2e-05, "loss": 0.05099074, "step": 4505 }, { "epoch": 9.012, "grad_norm": 1.1119945049285889, "learning_rate": 2e-05, "loss": 0.03741779, "step": 4506 }, { "epoch": 9.014, "grad_norm": 2.5929832458496094, "learning_rate": 2e-05, "loss": 0.0504611, "step": 4507 }, { "epoch": 9.016, "grad_norm": 1.1812018156051636, "learning_rate": 2e-05, "loss": 0.03695939, "step": 4508 }, { "epoch": 9.018, "grad_norm": 1.2786341905593872, "learning_rate": 2e-05, "loss": 0.05580963, "step": 4509 }, { "epoch": 9.02, "grad_norm": 1.163305401802063, "learning_rate": 2e-05, "loss": 0.05066406, "step": 4510 }, { "epoch": 9.022, "grad_norm": 0.9801599383354187, "learning_rate": 2e-05, "loss": 0.0390154, "step": 4511 }, { "epoch": 9.024, "grad_norm": 1.029821515083313, "learning_rate": 2e-05, "loss": 0.04161731, "step": 4512 }, { "epoch": 9.026, "grad_norm": 3.9281413555145264, "learning_rate": 2e-05, "loss": 0.077631, "step": 4513 }, { "epoch": 9.028, "grad_norm": 1.444425106048584, "learning_rate": 2e-05, "loss": 0.05062033, "step": 4514 }, { "epoch": 9.03, "grad_norm": 2.3030002117156982, "learning_rate": 2e-05, "loss": 0.07756697, "step": 4515 }, { "epoch": 9.032, "grad_norm": 2.027336597442627, "learning_rate": 2e-05, "loss": 0.06770372, "step": 4516 }, { "epoch": 9.034, "grad_norm": 1.304221272468567, "learning_rate": 2e-05, "loss": 0.04145993, "step": 4517 }, { "epoch": 9.036, "grad_norm": 1.26913321018219, "learning_rate": 2e-05, "loss": 0.04168402, "step": 4518 }, { "epoch": 9.038, "grad_norm": 1.0325367450714111, "learning_rate": 2e-05, "loss": 0.03383872, "step": 4519 }, { "epoch": 9.04, "grad_norm": 2.2106847763061523, "learning_rate": 2e-05, "loss": 0.05003514, "step": 4520 }, { "epoch": 9.042, "grad_norm": 3.5478241443634033, "learning_rate": 2e-05, "loss": 0.0616965, "step": 4521 }, { "epoch": 9.044, "grad_norm": 1.2067890167236328, "learning_rate": 2e-05, "loss": 0.04765698, "step": 4522 }, { "epoch": 9.046, "grad_norm": 3.863877534866333, "learning_rate": 2e-05, "loss": 0.05202936, "step": 4523 }, { "epoch": 9.048, "grad_norm": 2.0828495025634766, "learning_rate": 2e-05, "loss": 0.07206304, "step": 4524 }, { "epoch": 9.05, "grad_norm": 1.6689409017562866, "learning_rate": 2e-05, "loss": 0.05431271, "step": 4525 }, { "epoch": 9.052, "grad_norm": 1.0319054126739502, "learning_rate": 2e-05, "loss": 0.03317522, "step": 4526 }, { "epoch": 9.054, "grad_norm": 1.035259485244751, "learning_rate": 2e-05, "loss": 0.04005101, "step": 4527 }, { "epoch": 9.056, "grad_norm": 1.568297028541565, "learning_rate": 2e-05, "loss": 0.05553072, "step": 4528 }, { "epoch": 9.058, "grad_norm": 1.0219393968582153, "learning_rate": 2e-05, "loss": 0.03118332, "step": 4529 }, { "epoch": 9.06, "grad_norm": 1.238663911819458, "learning_rate": 2e-05, "loss": 0.05663264, "step": 4530 }, { "epoch": 9.062, "grad_norm": 1.6836519241333008, "learning_rate": 2e-05, "loss": 0.04448233, "step": 4531 }, { "epoch": 9.064, "grad_norm": 1.1010159254074097, "learning_rate": 2e-05, "loss": 0.04647362, "step": 4532 }, { "epoch": 9.066, "grad_norm": 2.1003518104553223, "learning_rate": 2e-05, "loss": 0.05052432, "step": 4533 }, { "epoch": 9.068, "grad_norm": 2.058155059814453, "learning_rate": 2e-05, "loss": 0.06897796, "step": 4534 }, { "epoch": 9.07, "grad_norm": 1.4129358530044556, "learning_rate": 2e-05, "loss": 0.03951801, "step": 4535 }, { "epoch": 9.072, "grad_norm": 1.4426170587539673, "learning_rate": 2e-05, "loss": 0.05496933, "step": 4536 }, { "epoch": 9.074, "grad_norm": 1.4925137758255005, "learning_rate": 2e-05, "loss": 0.04744648, "step": 4537 }, { "epoch": 9.076, "grad_norm": 1.4621315002441406, "learning_rate": 2e-05, "loss": 0.04011417, "step": 4538 }, { "epoch": 9.078, "grad_norm": 2.001925230026245, "learning_rate": 2e-05, "loss": 0.05460382, "step": 4539 }, { "epoch": 9.08, "grad_norm": 1.546341896057129, "learning_rate": 2e-05, "loss": 0.05653338, "step": 4540 }, { "epoch": 9.082, "grad_norm": 1.0642544031143188, "learning_rate": 2e-05, "loss": 0.03986666, "step": 4541 }, { "epoch": 9.084, "grad_norm": 1.2724580764770508, "learning_rate": 2e-05, "loss": 0.03925284, "step": 4542 }, { "epoch": 9.086, "grad_norm": 1.4420197010040283, "learning_rate": 2e-05, "loss": 0.05068545, "step": 4543 }, { "epoch": 9.088, "grad_norm": 1.974444031715393, "learning_rate": 2e-05, "loss": 0.06601392, "step": 4544 }, { "epoch": 9.09, "grad_norm": 1.5959964990615845, "learning_rate": 2e-05, "loss": 0.06721203, "step": 4545 }, { "epoch": 9.092, "grad_norm": 1.5431723594665527, "learning_rate": 2e-05, "loss": 0.04168814, "step": 4546 }, { "epoch": 9.094, "grad_norm": 2.0330898761749268, "learning_rate": 2e-05, "loss": 0.07048078, "step": 4547 }, { "epoch": 9.096, "grad_norm": 1.527872085571289, "learning_rate": 2e-05, "loss": 0.06067594, "step": 4548 }, { "epoch": 9.098, "grad_norm": 1.701743483543396, "learning_rate": 2e-05, "loss": 0.04963976, "step": 4549 }, { "epoch": 9.1, "grad_norm": 1.6770004034042358, "learning_rate": 2e-05, "loss": 0.05169954, "step": 4550 }, { "epoch": 9.102, "grad_norm": 1.2405482530593872, "learning_rate": 2e-05, "loss": 0.0456303, "step": 4551 }, { "epoch": 9.104, "grad_norm": 1.3076064586639404, "learning_rate": 2e-05, "loss": 0.04175555, "step": 4552 }, { "epoch": 9.106, "grad_norm": 1.487377643585205, "learning_rate": 2e-05, "loss": 0.05938742, "step": 4553 }, { "epoch": 9.108, "grad_norm": 1.3547475337982178, "learning_rate": 2e-05, "loss": 0.06537279, "step": 4554 }, { "epoch": 9.11, "grad_norm": 1.792872428894043, "learning_rate": 2e-05, "loss": 0.05485454, "step": 4555 }, { "epoch": 9.112, "grad_norm": 1.4543389081954956, "learning_rate": 2e-05, "loss": 0.05723123, "step": 4556 }, { "epoch": 9.114, "grad_norm": 2.26961612701416, "learning_rate": 2e-05, "loss": 0.04400116, "step": 4557 }, { "epoch": 9.116, "grad_norm": 1.728104829788208, "learning_rate": 2e-05, "loss": 0.04927929, "step": 4558 }, { "epoch": 9.118, "grad_norm": 1.3740220069885254, "learning_rate": 2e-05, "loss": 0.02859992, "step": 4559 }, { "epoch": 9.12, "grad_norm": 1.4856154918670654, "learning_rate": 2e-05, "loss": 0.04018495, "step": 4560 }, { "epoch": 9.122, "grad_norm": 1.3481768369674683, "learning_rate": 2e-05, "loss": 0.04694747, "step": 4561 }, { "epoch": 9.124, "grad_norm": 1.3610787391662598, "learning_rate": 2e-05, "loss": 0.052116, "step": 4562 }, { "epoch": 9.126, "grad_norm": 2.52278733253479, "learning_rate": 2e-05, "loss": 0.03285094, "step": 4563 }, { "epoch": 9.128, "grad_norm": 1.1811119318008423, "learning_rate": 2e-05, "loss": 0.03408758, "step": 4564 }, { "epoch": 9.13, "grad_norm": 1.5578532218933105, "learning_rate": 2e-05, "loss": 0.04034753, "step": 4565 }, { "epoch": 9.132, "grad_norm": 1.2555309534072876, "learning_rate": 2e-05, "loss": 0.03828382, "step": 4566 }, { "epoch": 9.134, "grad_norm": 0.949913501739502, "learning_rate": 2e-05, "loss": 0.03581467, "step": 4567 }, { "epoch": 9.136, "grad_norm": 1.5006330013275146, "learning_rate": 2e-05, "loss": 0.04063954, "step": 4568 }, { "epoch": 9.138, "grad_norm": 2.155529737472534, "learning_rate": 2e-05, "loss": 0.06960917, "step": 4569 }, { "epoch": 9.14, "grad_norm": 1.295516014099121, "learning_rate": 2e-05, "loss": 0.03258793, "step": 4570 }, { "epoch": 9.142, "grad_norm": 1.7943508625030518, "learning_rate": 2e-05, "loss": 0.04014836, "step": 4571 }, { "epoch": 9.144, "grad_norm": 1.7114983797073364, "learning_rate": 2e-05, "loss": 0.05858638, "step": 4572 }, { "epoch": 9.146, "grad_norm": 2.5269582271575928, "learning_rate": 2e-05, "loss": 0.06841592, "step": 4573 }, { "epoch": 9.148, "grad_norm": 1.0063886642456055, "learning_rate": 2e-05, "loss": 0.03476948, "step": 4574 }, { "epoch": 9.15, "grad_norm": 1.9705594778060913, "learning_rate": 2e-05, "loss": 0.04850863, "step": 4575 }, { "epoch": 9.152, "grad_norm": 1.5661556720733643, "learning_rate": 2e-05, "loss": 0.06399988, "step": 4576 }, { "epoch": 9.154, "grad_norm": 2.0913190841674805, "learning_rate": 2e-05, "loss": 0.04225244, "step": 4577 }, { "epoch": 9.156, "grad_norm": 2.016367197036743, "learning_rate": 2e-05, "loss": 0.06421536, "step": 4578 }, { "epoch": 9.158, "grad_norm": 1.8941526412963867, "learning_rate": 2e-05, "loss": 0.0711637, "step": 4579 }, { "epoch": 9.16, "grad_norm": 1.3853126764297485, "learning_rate": 2e-05, "loss": 0.03800337, "step": 4580 }, { "epoch": 9.162, "grad_norm": 1.3781192302703857, "learning_rate": 2e-05, "loss": 0.05328683, "step": 4581 }, { "epoch": 9.164, "grad_norm": 1.7998543977737427, "learning_rate": 2e-05, "loss": 0.05201303, "step": 4582 }, { "epoch": 9.166, "grad_norm": 1.9822813272476196, "learning_rate": 2e-05, "loss": 0.05697027, "step": 4583 }, { "epoch": 9.168, "grad_norm": 1.7012615203857422, "learning_rate": 2e-05, "loss": 0.04591784, "step": 4584 }, { "epoch": 9.17, "grad_norm": 1.3096288442611694, "learning_rate": 2e-05, "loss": 0.03787319, "step": 4585 }, { "epoch": 9.172, "grad_norm": 1.2845062017440796, "learning_rate": 2e-05, "loss": 0.0427983, "step": 4586 }, { "epoch": 9.174, "grad_norm": 0.934700071811676, "learning_rate": 2e-05, "loss": 0.03356396, "step": 4587 }, { "epoch": 9.176, "grad_norm": 1.6701496839523315, "learning_rate": 2e-05, "loss": 0.04735634, "step": 4588 }, { "epoch": 9.178, "grad_norm": 1.0834697484970093, "learning_rate": 2e-05, "loss": 0.04129692, "step": 4589 }, { "epoch": 9.18, "grad_norm": 1.489410161972046, "learning_rate": 2e-05, "loss": 0.05078772, "step": 4590 }, { "epoch": 9.182, "grad_norm": 0.853066623210907, "learning_rate": 2e-05, "loss": 0.03656086, "step": 4591 }, { "epoch": 9.184, "grad_norm": 1.5769661664962769, "learning_rate": 2e-05, "loss": 0.07200988, "step": 4592 }, { "epoch": 9.186, "grad_norm": 1.4763498306274414, "learning_rate": 2e-05, "loss": 0.05131055, "step": 4593 }, { "epoch": 9.188, "grad_norm": 0.8072667121887207, "learning_rate": 2e-05, "loss": 0.03182596, "step": 4594 }, { "epoch": 9.19, "grad_norm": 1.35880446434021, "learning_rate": 2e-05, "loss": 0.05710747, "step": 4595 }, { "epoch": 9.192, "grad_norm": 2.0865299701690674, "learning_rate": 2e-05, "loss": 0.06729348, "step": 4596 }, { "epoch": 9.194, "grad_norm": 1.229108214378357, "learning_rate": 2e-05, "loss": 0.0496373, "step": 4597 }, { "epoch": 9.196, "grad_norm": 1.1466349363327026, "learning_rate": 2e-05, "loss": 0.04780995, "step": 4598 }, { "epoch": 9.198, "grad_norm": 1.8518072366714478, "learning_rate": 2e-05, "loss": 0.05371061, "step": 4599 }, { "epoch": 9.2, "grad_norm": 1.5944348573684692, "learning_rate": 2e-05, "loss": 0.04627388, "step": 4600 }, { "epoch": 9.202, "grad_norm": 1.8636687994003296, "learning_rate": 2e-05, "loss": 0.03607373, "step": 4601 }, { "epoch": 9.204, "grad_norm": 1.3533059358596802, "learning_rate": 2e-05, "loss": 0.05186944, "step": 4602 }, { "epoch": 9.206, "grad_norm": 1.2731577157974243, "learning_rate": 2e-05, "loss": 0.05566896, "step": 4603 }, { "epoch": 9.208, "grad_norm": 1.8427437543869019, "learning_rate": 2e-05, "loss": 0.05458481, "step": 4604 }, { "epoch": 9.21, "grad_norm": 1.295583724975586, "learning_rate": 2e-05, "loss": 0.0600097, "step": 4605 }, { "epoch": 9.212, "grad_norm": 1.3000692129135132, "learning_rate": 2e-05, "loss": 0.04102925, "step": 4606 }, { "epoch": 9.214, "grad_norm": 1.7395988702774048, "learning_rate": 2e-05, "loss": 0.0625364, "step": 4607 }, { "epoch": 9.216, "grad_norm": 1.5970566272735596, "learning_rate": 2e-05, "loss": 0.05405448, "step": 4608 }, { "epoch": 9.218, "grad_norm": 0.965748131275177, "learning_rate": 2e-05, "loss": 0.03905229, "step": 4609 }, { "epoch": 9.22, "grad_norm": 1.195919394493103, "learning_rate": 2e-05, "loss": 0.04721025, "step": 4610 }, { "epoch": 9.222, "grad_norm": 2.362931489944458, "learning_rate": 2e-05, "loss": 0.07176741, "step": 4611 }, { "epoch": 9.224, "grad_norm": 2.9596593379974365, "learning_rate": 2e-05, "loss": 0.05495764, "step": 4612 }, { "epoch": 9.226, "grad_norm": 1.7503911256790161, "learning_rate": 2e-05, "loss": 0.05036958, "step": 4613 }, { "epoch": 9.228, "grad_norm": 1.1775895357131958, "learning_rate": 2e-05, "loss": 0.0454244, "step": 4614 }, { "epoch": 9.23, "grad_norm": 0.9482311010360718, "learning_rate": 2e-05, "loss": 0.03411316, "step": 4615 }, { "epoch": 9.232, "grad_norm": 1.2667335271835327, "learning_rate": 2e-05, "loss": 0.0496553, "step": 4616 }, { "epoch": 9.234, "grad_norm": 2.0519330501556396, "learning_rate": 2e-05, "loss": 0.04904152, "step": 4617 }, { "epoch": 9.236, "grad_norm": 1.1808348894119263, "learning_rate": 2e-05, "loss": 0.04568075, "step": 4618 }, { "epoch": 9.238, "grad_norm": 1.8402965068817139, "learning_rate": 2e-05, "loss": 0.05634372, "step": 4619 }, { "epoch": 9.24, "grad_norm": 2.25492262840271, "learning_rate": 2e-05, "loss": 0.0613959, "step": 4620 }, { "epoch": 9.242, "grad_norm": 2.354156494140625, "learning_rate": 2e-05, "loss": 0.05199997, "step": 4621 }, { "epoch": 9.244, "grad_norm": 1.6857858896255493, "learning_rate": 2e-05, "loss": 0.0389527, "step": 4622 }, { "epoch": 9.246, "grad_norm": 1.1670353412628174, "learning_rate": 2e-05, "loss": 0.03445141, "step": 4623 }, { "epoch": 9.248, "grad_norm": 1.0673527717590332, "learning_rate": 2e-05, "loss": 0.05272197, "step": 4624 }, { "epoch": 9.25, "grad_norm": 1.4445080757141113, "learning_rate": 2e-05, "loss": 0.05084341, "step": 4625 }, { "epoch": 9.252, "grad_norm": 1.0555553436279297, "learning_rate": 2e-05, "loss": 0.03255808, "step": 4626 }, { "epoch": 9.254, "grad_norm": 1.7626922130584717, "learning_rate": 2e-05, "loss": 0.03573527, "step": 4627 }, { "epoch": 9.256, "grad_norm": 2.6778011322021484, "learning_rate": 2e-05, "loss": 0.03489658, "step": 4628 }, { "epoch": 9.258, "grad_norm": 1.3396369218826294, "learning_rate": 2e-05, "loss": 0.0488304, "step": 4629 }, { "epoch": 9.26, "grad_norm": 1.3381152153015137, "learning_rate": 2e-05, "loss": 0.03899901, "step": 4630 }, { "epoch": 9.262, "grad_norm": 1.5508272647857666, "learning_rate": 2e-05, "loss": 0.05545926, "step": 4631 }, { "epoch": 9.264, "grad_norm": 1.8423733711242676, "learning_rate": 2e-05, "loss": 0.05732977, "step": 4632 }, { "epoch": 9.266, "grad_norm": 2.0445032119750977, "learning_rate": 2e-05, "loss": 0.04052176, "step": 4633 }, { "epoch": 9.268, "grad_norm": 1.5351531505584717, "learning_rate": 2e-05, "loss": 0.04028165, "step": 4634 }, { "epoch": 9.27, "grad_norm": 2.0131118297576904, "learning_rate": 2e-05, "loss": 0.06490766, "step": 4635 }, { "epoch": 9.272, "grad_norm": 2.576608657836914, "learning_rate": 2e-05, "loss": 0.06565543, "step": 4636 }, { "epoch": 9.274000000000001, "grad_norm": 1.7646458148956299, "learning_rate": 2e-05, "loss": 0.0859915, "step": 4637 }, { "epoch": 9.276, "grad_norm": 2.5291850566864014, "learning_rate": 2e-05, "loss": 0.05386726, "step": 4638 }, { "epoch": 9.278, "grad_norm": 0.935114324092865, "learning_rate": 2e-05, "loss": 0.03409642, "step": 4639 }, { "epoch": 9.28, "grad_norm": 1.7105352878570557, "learning_rate": 2e-05, "loss": 0.04219907, "step": 4640 }, { "epoch": 9.282, "grad_norm": 1.1743371486663818, "learning_rate": 2e-05, "loss": 0.03623797, "step": 4641 }, { "epoch": 9.284, "grad_norm": 1.8156753778457642, "learning_rate": 2e-05, "loss": 0.05251084, "step": 4642 }, { "epoch": 9.286, "grad_norm": 3.5523386001586914, "learning_rate": 2e-05, "loss": 0.04586774, "step": 4643 }, { "epoch": 9.288, "grad_norm": 2.256244421005249, "learning_rate": 2e-05, "loss": 0.04985041, "step": 4644 }, { "epoch": 9.29, "grad_norm": 2.2845897674560547, "learning_rate": 2e-05, "loss": 0.05957884, "step": 4645 }, { "epoch": 9.292, "grad_norm": 1.4871472120285034, "learning_rate": 2e-05, "loss": 0.0687966, "step": 4646 }, { "epoch": 9.294, "grad_norm": 1.6461197137832642, "learning_rate": 2e-05, "loss": 0.0592112, "step": 4647 }, { "epoch": 9.296, "grad_norm": 1.4824073314666748, "learning_rate": 2e-05, "loss": 0.05652399, "step": 4648 }, { "epoch": 9.298, "grad_norm": 1.0243291854858398, "learning_rate": 2e-05, "loss": 0.05590088, "step": 4649 }, { "epoch": 9.3, "grad_norm": 1.4069799184799194, "learning_rate": 2e-05, "loss": 0.04285773, "step": 4650 }, { "epoch": 9.302, "grad_norm": 2.897965908050537, "learning_rate": 2e-05, "loss": 0.06073618, "step": 4651 }, { "epoch": 9.304, "grad_norm": 1.3762061595916748, "learning_rate": 2e-05, "loss": 0.05059266, "step": 4652 }, { "epoch": 9.306, "grad_norm": 1.1534329652786255, "learning_rate": 2e-05, "loss": 0.05894226, "step": 4653 }, { "epoch": 9.308, "grad_norm": 1.0113773345947266, "learning_rate": 2e-05, "loss": 0.03314479, "step": 4654 }, { "epoch": 9.31, "grad_norm": 1.4787489175796509, "learning_rate": 2e-05, "loss": 0.0453275, "step": 4655 }, { "epoch": 9.312, "grad_norm": 1.2350947856903076, "learning_rate": 2e-05, "loss": 0.04331245, "step": 4656 }, { "epoch": 9.314, "grad_norm": 1.4923902750015259, "learning_rate": 2e-05, "loss": 0.03696995, "step": 4657 }, { "epoch": 9.316, "grad_norm": 1.5131927728652954, "learning_rate": 2e-05, "loss": 0.05977695, "step": 4658 }, { "epoch": 9.318, "grad_norm": 1.4638057947158813, "learning_rate": 2e-05, "loss": 0.03903423, "step": 4659 }, { "epoch": 9.32, "grad_norm": 1.1074949502944946, "learning_rate": 2e-05, "loss": 0.03583281, "step": 4660 }, { "epoch": 9.322, "grad_norm": 1.7137705087661743, "learning_rate": 2e-05, "loss": 0.05756685, "step": 4661 }, { "epoch": 9.324, "grad_norm": 1.9927157163619995, "learning_rate": 2e-05, "loss": 0.06935163, "step": 4662 }, { "epoch": 9.326, "grad_norm": 0.9441924095153809, "learning_rate": 2e-05, "loss": 0.03335249, "step": 4663 }, { "epoch": 9.328, "grad_norm": 1.244004249572754, "learning_rate": 2e-05, "loss": 0.03979144, "step": 4664 }, { "epoch": 9.33, "grad_norm": 1.2493497133255005, "learning_rate": 2e-05, "loss": 0.03639866, "step": 4665 }, { "epoch": 9.332, "grad_norm": 0.9748623371124268, "learning_rate": 2e-05, "loss": 0.04105096, "step": 4666 }, { "epoch": 9.334, "grad_norm": 1.5983518362045288, "learning_rate": 2e-05, "loss": 0.04762005, "step": 4667 }, { "epoch": 9.336, "grad_norm": 1.1997288465499878, "learning_rate": 2e-05, "loss": 0.04283839, "step": 4668 }, { "epoch": 9.338, "grad_norm": 1.4287093877792358, "learning_rate": 2e-05, "loss": 0.04104313, "step": 4669 }, { "epoch": 9.34, "grad_norm": 3.71083664894104, "learning_rate": 2e-05, "loss": 0.0486076, "step": 4670 }, { "epoch": 9.342, "grad_norm": 1.5926488637924194, "learning_rate": 2e-05, "loss": 0.05330349, "step": 4671 }, { "epoch": 9.344, "grad_norm": 1.5894955396652222, "learning_rate": 2e-05, "loss": 0.0660143, "step": 4672 }, { "epoch": 9.346, "grad_norm": 1.9239521026611328, "learning_rate": 2e-05, "loss": 0.04656222, "step": 4673 }, { "epoch": 9.348, "grad_norm": 1.3735991716384888, "learning_rate": 2e-05, "loss": 0.04413497, "step": 4674 }, { "epoch": 9.35, "grad_norm": 1.8570923805236816, "learning_rate": 2e-05, "loss": 0.05755848, "step": 4675 }, { "epoch": 9.352, "grad_norm": 1.1730197668075562, "learning_rate": 2e-05, "loss": 0.03559911, "step": 4676 }, { "epoch": 9.354, "grad_norm": 1.6115859746932983, "learning_rate": 2e-05, "loss": 0.05688359, "step": 4677 }, { "epoch": 9.356, "grad_norm": 1.2387816905975342, "learning_rate": 2e-05, "loss": 0.04620358, "step": 4678 }, { "epoch": 9.358, "grad_norm": 1.4175556898117065, "learning_rate": 2e-05, "loss": 0.04618993, "step": 4679 }, { "epoch": 9.36, "grad_norm": 1.4951204061508179, "learning_rate": 2e-05, "loss": 0.04638466, "step": 4680 }, { "epoch": 9.362, "grad_norm": 1.4585829973220825, "learning_rate": 2e-05, "loss": 0.05543543, "step": 4681 }, { "epoch": 9.364, "grad_norm": 1.7880911827087402, "learning_rate": 2e-05, "loss": 0.0531502, "step": 4682 }, { "epoch": 9.366, "grad_norm": 2.3550987243652344, "learning_rate": 2e-05, "loss": 0.05489677, "step": 4683 }, { "epoch": 9.368, "grad_norm": 1.5169696807861328, "learning_rate": 2e-05, "loss": 0.03997877, "step": 4684 }, { "epoch": 9.37, "grad_norm": 2.648613691329956, "learning_rate": 2e-05, "loss": 0.06908815, "step": 4685 }, { "epoch": 9.372, "grad_norm": 1.8203058242797852, "learning_rate": 2e-05, "loss": 0.05535856, "step": 4686 }, { "epoch": 9.374, "grad_norm": 1.3426063060760498, "learning_rate": 2e-05, "loss": 0.04036969, "step": 4687 }, { "epoch": 9.376, "grad_norm": 0.8304132223129272, "learning_rate": 2e-05, "loss": 0.02641086, "step": 4688 }, { "epoch": 9.378, "grad_norm": 0.924473762512207, "learning_rate": 2e-05, "loss": 0.02449524, "step": 4689 }, { "epoch": 9.38, "grad_norm": 1.4265344142913818, "learning_rate": 2e-05, "loss": 0.03157543, "step": 4690 }, { "epoch": 9.382, "grad_norm": 1.9417023658752441, "learning_rate": 2e-05, "loss": 0.05121457, "step": 4691 }, { "epoch": 9.384, "grad_norm": 1.5157634019851685, "learning_rate": 2e-05, "loss": 0.06157221, "step": 4692 }, { "epoch": 9.386, "grad_norm": 2.430556535720825, "learning_rate": 2e-05, "loss": 0.058822, "step": 4693 }, { "epoch": 9.388, "grad_norm": 1.0767589807510376, "learning_rate": 2e-05, "loss": 0.04313029, "step": 4694 }, { "epoch": 9.39, "grad_norm": 1.75044584274292, "learning_rate": 2e-05, "loss": 0.06141669, "step": 4695 }, { "epoch": 9.392, "grad_norm": 1.6970949172973633, "learning_rate": 2e-05, "loss": 0.05509343, "step": 4696 }, { "epoch": 9.394, "grad_norm": 2.0073065757751465, "learning_rate": 2e-05, "loss": 0.04960424, "step": 4697 }, { "epoch": 9.396, "grad_norm": 1.4273754358291626, "learning_rate": 2e-05, "loss": 0.05489523, "step": 4698 }, { "epoch": 9.398, "grad_norm": 1.3109307289123535, "learning_rate": 2e-05, "loss": 0.04698121, "step": 4699 }, { "epoch": 9.4, "grad_norm": 1.3388149738311768, "learning_rate": 2e-05, "loss": 0.04047222, "step": 4700 }, { "epoch": 9.402, "grad_norm": 1.3578113317489624, "learning_rate": 2e-05, "loss": 0.0407288, "step": 4701 }, { "epoch": 9.404, "grad_norm": 1.5241144895553589, "learning_rate": 2e-05, "loss": 0.05070861, "step": 4702 }, { "epoch": 9.406, "grad_norm": 1.9592812061309814, "learning_rate": 2e-05, "loss": 0.04564502, "step": 4703 }, { "epoch": 9.408, "grad_norm": 1.1348962783813477, "learning_rate": 2e-05, "loss": 0.03838561, "step": 4704 }, { "epoch": 9.41, "grad_norm": 1.4928836822509766, "learning_rate": 2e-05, "loss": 0.04029758, "step": 4705 }, { "epoch": 9.412, "grad_norm": 1.9070638418197632, "learning_rate": 2e-05, "loss": 0.06180852, "step": 4706 }, { "epoch": 9.414, "grad_norm": 1.1822073459625244, "learning_rate": 2e-05, "loss": 0.0310965, "step": 4707 }, { "epoch": 9.416, "grad_norm": 1.1661112308502197, "learning_rate": 2e-05, "loss": 0.05593649, "step": 4708 }, { "epoch": 9.418, "grad_norm": 1.2493245601654053, "learning_rate": 2e-05, "loss": 0.04332493, "step": 4709 }, { "epoch": 9.42, "grad_norm": 2.689755439758301, "learning_rate": 2e-05, "loss": 0.05411544, "step": 4710 }, { "epoch": 9.422, "grad_norm": 0.9683674573898315, "learning_rate": 2e-05, "loss": 0.04067558, "step": 4711 }, { "epoch": 9.424, "grad_norm": 1.0949651002883911, "learning_rate": 2e-05, "loss": 0.04329629, "step": 4712 }, { "epoch": 9.426, "grad_norm": 1.6994655132293701, "learning_rate": 2e-05, "loss": 0.05947253, "step": 4713 }, { "epoch": 9.428, "grad_norm": 1.421691656112671, "learning_rate": 2e-05, "loss": 0.04533539, "step": 4714 }, { "epoch": 9.43, "grad_norm": 3.5447537899017334, "learning_rate": 2e-05, "loss": 0.05796078, "step": 4715 }, { "epoch": 9.432, "grad_norm": 1.1044230461120605, "learning_rate": 2e-05, "loss": 0.03950178, "step": 4716 }, { "epoch": 9.434, "grad_norm": 1.823502779006958, "learning_rate": 2e-05, "loss": 0.04175462, "step": 4717 }, { "epoch": 9.436, "grad_norm": 1.0664321184158325, "learning_rate": 2e-05, "loss": 0.03864664, "step": 4718 }, { "epoch": 9.438, "grad_norm": 1.5686670541763306, "learning_rate": 2e-05, "loss": 0.0426276, "step": 4719 }, { "epoch": 9.44, "grad_norm": 1.1006217002868652, "learning_rate": 2e-05, "loss": 0.04525345, "step": 4720 }, { "epoch": 9.442, "grad_norm": 1.4460588693618774, "learning_rate": 2e-05, "loss": 0.04516764, "step": 4721 }, { "epoch": 9.444, "grad_norm": 1.7434966564178467, "learning_rate": 2e-05, "loss": 0.05540922, "step": 4722 }, { "epoch": 9.446, "grad_norm": 1.884838581085205, "learning_rate": 2e-05, "loss": 0.06515225, "step": 4723 }, { "epoch": 9.448, "grad_norm": 1.8249808549880981, "learning_rate": 2e-05, "loss": 0.03900361, "step": 4724 }, { "epoch": 9.45, "grad_norm": 0.843631386756897, "learning_rate": 2e-05, "loss": 0.02820579, "step": 4725 }, { "epoch": 9.452, "grad_norm": 1.1317899227142334, "learning_rate": 2e-05, "loss": 0.02753792, "step": 4726 }, { "epoch": 9.454, "grad_norm": 1.9142801761627197, "learning_rate": 2e-05, "loss": 0.05124582, "step": 4727 }, { "epoch": 9.456, "grad_norm": 1.420554757118225, "learning_rate": 2e-05, "loss": 0.049357, "step": 4728 }, { "epoch": 9.458, "grad_norm": 1.742428183555603, "learning_rate": 2e-05, "loss": 0.0452363, "step": 4729 }, { "epoch": 9.46, "grad_norm": 1.4264081716537476, "learning_rate": 2e-05, "loss": 0.04355854, "step": 4730 }, { "epoch": 9.462, "grad_norm": 1.1478997468948364, "learning_rate": 2e-05, "loss": 0.04364795, "step": 4731 }, { "epoch": 9.464, "grad_norm": 1.2513768672943115, "learning_rate": 2e-05, "loss": 0.03645619, "step": 4732 }, { "epoch": 9.466, "grad_norm": 1.4799087047576904, "learning_rate": 2e-05, "loss": 0.05023148, "step": 4733 }, { "epoch": 9.468, "grad_norm": 1.3384833335876465, "learning_rate": 2e-05, "loss": 0.04882913, "step": 4734 }, { "epoch": 9.47, "grad_norm": 1.4373477697372437, "learning_rate": 2e-05, "loss": 0.06012148, "step": 4735 }, { "epoch": 9.472, "grad_norm": 1.1688745021820068, "learning_rate": 2e-05, "loss": 0.04947564, "step": 4736 }, { "epoch": 9.474, "grad_norm": 1.4130213260650635, "learning_rate": 2e-05, "loss": 0.05789327, "step": 4737 }, { "epoch": 9.475999999999999, "grad_norm": 1.1802808046340942, "learning_rate": 2e-05, "loss": 0.04290013, "step": 4738 }, { "epoch": 9.478, "grad_norm": 1.1454658508300781, "learning_rate": 2e-05, "loss": 0.04127076, "step": 4739 }, { "epoch": 9.48, "grad_norm": 1.4601095914840698, "learning_rate": 2e-05, "loss": 0.05415628, "step": 4740 }, { "epoch": 9.482, "grad_norm": 1.9755573272705078, "learning_rate": 2e-05, "loss": 0.04955574, "step": 4741 }, { "epoch": 9.484, "grad_norm": 1.6942572593688965, "learning_rate": 2e-05, "loss": 0.06293144, "step": 4742 }, { "epoch": 9.486, "grad_norm": 1.1458663940429688, "learning_rate": 2e-05, "loss": 0.04004298, "step": 4743 }, { "epoch": 9.488, "grad_norm": 1.4055947065353394, "learning_rate": 2e-05, "loss": 0.03598074, "step": 4744 }, { "epoch": 9.49, "grad_norm": 1.7783430814743042, "learning_rate": 2e-05, "loss": 0.04351512, "step": 4745 }, { "epoch": 9.492, "grad_norm": 1.5568244457244873, "learning_rate": 2e-05, "loss": 0.05658468, "step": 4746 }, { "epoch": 9.494, "grad_norm": 0.8985837697982788, "learning_rate": 2e-05, "loss": 0.03822737, "step": 4747 }, { "epoch": 9.496, "grad_norm": 1.1120895147323608, "learning_rate": 2e-05, "loss": 0.05430552, "step": 4748 }, { "epoch": 9.498, "grad_norm": 1.3257687091827393, "learning_rate": 2e-05, "loss": 0.05184394, "step": 4749 }, { "epoch": 9.5, "grad_norm": 2.949873208999634, "learning_rate": 2e-05, "loss": 0.05240162, "step": 4750 }, { "epoch": 9.502, "grad_norm": 3.3825747966766357, "learning_rate": 2e-05, "loss": 0.04307269, "step": 4751 }, { "epoch": 9.504, "grad_norm": 1.5267915725708008, "learning_rate": 2e-05, "loss": 0.05353354, "step": 4752 }, { "epoch": 9.506, "grad_norm": 1.2166399955749512, "learning_rate": 2e-05, "loss": 0.03677273, "step": 4753 }, { "epoch": 9.508, "grad_norm": 1.535239577293396, "learning_rate": 2e-05, "loss": 0.04334023, "step": 4754 }, { "epoch": 9.51, "grad_norm": 1.9072614908218384, "learning_rate": 2e-05, "loss": 0.03177762, "step": 4755 }, { "epoch": 9.512, "grad_norm": 1.5284210443496704, "learning_rate": 2e-05, "loss": 0.05824082, "step": 4756 }, { "epoch": 9.514, "grad_norm": 1.043893814086914, "learning_rate": 2e-05, "loss": 0.03949745, "step": 4757 }, { "epoch": 9.516, "grad_norm": 2.1516237258911133, "learning_rate": 2e-05, "loss": 0.05799168, "step": 4758 }, { "epoch": 9.518, "grad_norm": 1.489659070968628, "learning_rate": 2e-05, "loss": 0.05394222, "step": 4759 }, { "epoch": 9.52, "grad_norm": 1.9003314971923828, "learning_rate": 2e-05, "loss": 0.07208293, "step": 4760 }, { "epoch": 9.522, "grad_norm": 1.7069766521453857, "learning_rate": 2e-05, "loss": 0.0472491, "step": 4761 }, { "epoch": 9.524000000000001, "grad_norm": 1.266418695449829, "learning_rate": 2e-05, "loss": 0.05125954, "step": 4762 }, { "epoch": 9.526, "grad_norm": 1.1188979148864746, "learning_rate": 2e-05, "loss": 0.0413934, "step": 4763 }, { "epoch": 9.528, "grad_norm": 0.9691184163093567, "learning_rate": 2e-05, "loss": 0.03132164, "step": 4764 }, { "epoch": 9.53, "grad_norm": 1.032204270362854, "learning_rate": 2e-05, "loss": 0.04495644, "step": 4765 }, { "epoch": 9.532, "grad_norm": 1.2957261800765991, "learning_rate": 2e-05, "loss": 0.04635788, "step": 4766 }, { "epoch": 9.534, "grad_norm": 1.161616563796997, "learning_rate": 2e-05, "loss": 0.0276463, "step": 4767 }, { "epoch": 9.536, "grad_norm": 1.1725478172302246, "learning_rate": 2e-05, "loss": 0.04314704, "step": 4768 }, { "epoch": 9.538, "grad_norm": 1.3982518911361694, "learning_rate": 2e-05, "loss": 0.06339908, "step": 4769 }, { "epoch": 9.54, "grad_norm": 1.308406114578247, "learning_rate": 2e-05, "loss": 0.04440673, "step": 4770 }, { "epoch": 9.542, "grad_norm": 1.689806580543518, "learning_rate": 2e-05, "loss": 0.06120706, "step": 4771 }, { "epoch": 9.544, "grad_norm": 1.9290145635604858, "learning_rate": 2e-05, "loss": 0.05553887, "step": 4772 }, { "epoch": 9.546, "grad_norm": 1.7180081605911255, "learning_rate": 2e-05, "loss": 0.04278467, "step": 4773 }, { "epoch": 9.548, "grad_norm": 1.7261685132980347, "learning_rate": 2e-05, "loss": 0.04878137, "step": 4774 }, { "epoch": 9.55, "grad_norm": 1.5006641149520874, "learning_rate": 2e-05, "loss": 0.04991702, "step": 4775 }, { "epoch": 9.552, "grad_norm": 1.826326608657837, "learning_rate": 2e-05, "loss": 0.09697184, "step": 4776 }, { "epoch": 9.554, "grad_norm": 1.0277416706085205, "learning_rate": 2e-05, "loss": 0.03996582, "step": 4777 }, { "epoch": 9.556000000000001, "grad_norm": 3.2796573638916016, "learning_rate": 2e-05, "loss": 0.04204332, "step": 4778 }, { "epoch": 9.558, "grad_norm": 2.1303892135620117, "learning_rate": 2e-05, "loss": 0.04379918, "step": 4779 }, { "epoch": 9.56, "grad_norm": 1.625738263130188, "learning_rate": 2e-05, "loss": 0.06177279, "step": 4780 }, { "epoch": 9.562, "grad_norm": 1.4296420812606812, "learning_rate": 2e-05, "loss": 0.06877504, "step": 4781 }, { "epoch": 9.564, "grad_norm": 2.3310558795928955, "learning_rate": 2e-05, "loss": 0.06073118, "step": 4782 }, { "epoch": 9.566, "grad_norm": 1.4146039485931396, "learning_rate": 2e-05, "loss": 0.05733766, "step": 4783 }, { "epoch": 9.568, "grad_norm": 1.4554498195648193, "learning_rate": 2e-05, "loss": 0.05432015, "step": 4784 }, { "epoch": 9.57, "grad_norm": 1.2609492540359497, "learning_rate": 2e-05, "loss": 0.03215706, "step": 4785 }, { "epoch": 9.572, "grad_norm": 1.01859712600708, "learning_rate": 2e-05, "loss": 0.03957218, "step": 4786 }, { "epoch": 9.574, "grad_norm": 1.4612160921096802, "learning_rate": 2e-05, "loss": 0.04301356, "step": 4787 }, { "epoch": 9.576, "grad_norm": 1.6574594974517822, "learning_rate": 2e-05, "loss": 0.06428871, "step": 4788 }, { "epoch": 9.578, "grad_norm": 3.813023328781128, "learning_rate": 2e-05, "loss": 0.04893555, "step": 4789 }, { "epoch": 9.58, "grad_norm": 1.7202383279800415, "learning_rate": 2e-05, "loss": 0.05013211, "step": 4790 }, { "epoch": 9.582, "grad_norm": 0.97543865442276, "learning_rate": 2e-05, "loss": 0.03125712, "step": 4791 }, { "epoch": 9.584, "grad_norm": 1.4746700525283813, "learning_rate": 2e-05, "loss": 0.05268769, "step": 4792 }, { "epoch": 9.586, "grad_norm": 1.760599970817566, "learning_rate": 2e-05, "loss": 0.05366766, "step": 4793 }, { "epoch": 9.588, "grad_norm": 1.789549708366394, "learning_rate": 2e-05, "loss": 0.0536264, "step": 4794 }, { "epoch": 9.59, "grad_norm": 1.7900644540786743, "learning_rate": 2e-05, "loss": 0.06783197, "step": 4795 }, { "epoch": 9.592, "grad_norm": 1.0539871454238892, "learning_rate": 2e-05, "loss": 0.03785194, "step": 4796 }, { "epoch": 9.594, "grad_norm": 1.4373170137405396, "learning_rate": 2e-05, "loss": 0.05752947, "step": 4797 }, { "epoch": 9.596, "grad_norm": 1.999144196510315, "learning_rate": 2e-05, "loss": 0.05707481, "step": 4798 }, { "epoch": 9.598, "grad_norm": 1.2666716575622559, "learning_rate": 2e-05, "loss": 0.05010325, "step": 4799 }, { "epoch": 9.6, "grad_norm": 0.9124451279640198, "learning_rate": 2e-05, "loss": 0.03530227, "step": 4800 }, { "epoch": 9.602, "grad_norm": 1.0427782535552979, "learning_rate": 2e-05, "loss": 0.04283682, "step": 4801 }, { "epoch": 9.604, "grad_norm": 0.9843890070915222, "learning_rate": 2e-05, "loss": 0.04103471, "step": 4802 }, { "epoch": 9.606, "grad_norm": 1.0473707914352417, "learning_rate": 2e-05, "loss": 0.04284954, "step": 4803 }, { "epoch": 9.608, "grad_norm": 0.885628879070282, "learning_rate": 2e-05, "loss": 0.03729761, "step": 4804 }, { "epoch": 9.61, "grad_norm": 1.2665205001831055, "learning_rate": 2e-05, "loss": 0.05133078, "step": 4805 }, { "epoch": 9.612, "grad_norm": 0.7963083386421204, "learning_rate": 2e-05, "loss": 0.03142844, "step": 4806 }, { "epoch": 9.614, "grad_norm": 1.1994826793670654, "learning_rate": 2e-05, "loss": 0.0482708, "step": 4807 }, { "epoch": 9.616, "grad_norm": 1.1817045211791992, "learning_rate": 2e-05, "loss": 0.03601782, "step": 4808 }, { "epoch": 9.618, "grad_norm": 1.5643788576126099, "learning_rate": 2e-05, "loss": 0.03845805, "step": 4809 }, { "epoch": 9.62, "grad_norm": 1.7584844827651978, "learning_rate": 2e-05, "loss": 0.0413286, "step": 4810 }, { "epoch": 9.622, "grad_norm": 1.8276487588882446, "learning_rate": 2e-05, "loss": 0.05386912, "step": 4811 }, { "epoch": 9.624, "grad_norm": 1.1118780374526978, "learning_rate": 2e-05, "loss": 0.04545128, "step": 4812 }, { "epoch": 9.626, "grad_norm": 2.065323829650879, "learning_rate": 2e-05, "loss": 0.03349657, "step": 4813 }, { "epoch": 9.628, "grad_norm": 0.9711107611656189, "learning_rate": 2e-05, "loss": 0.03508176, "step": 4814 }, { "epoch": 9.63, "grad_norm": 1.1863573789596558, "learning_rate": 2e-05, "loss": 0.04443147, "step": 4815 }, { "epoch": 9.632, "grad_norm": 1.39435613155365, "learning_rate": 2e-05, "loss": 0.06546235, "step": 4816 }, { "epoch": 9.634, "grad_norm": 1.143619418144226, "learning_rate": 2e-05, "loss": 0.05091481, "step": 4817 }, { "epoch": 9.636, "grad_norm": 1.198135495185852, "learning_rate": 2e-05, "loss": 0.03812053, "step": 4818 }, { "epoch": 9.638, "grad_norm": 0.8819257616996765, "learning_rate": 2e-05, "loss": 0.03131585, "step": 4819 }, { "epoch": 9.64, "grad_norm": 0.9608490467071533, "learning_rate": 2e-05, "loss": 0.0372583, "step": 4820 }, { "epoch": 9.642, "grad_norm": 1.5134555101394653, "learning_rate": 2e-05, "loss": 0.03930507, "step": 4821 }, { "epoch": 9.644, "grad_norm": 1.8514659404754639, "learning_rate": 2e-05, "loss": 0.04383114, "step": 4822 }, { "epoch": 9.646, "grad_norm": 1.261040449142456, "learning_rate": 2e-05, "loss": 0.04548458, "step": 4823 }, { "epoch": 9.648, "grad_norm": 2.214454174041748, "learning_rate": 2e-05, "loss": 0.0620476, "step": 4824 }, { "epoch": 9.65, "grad_norm": 1.333584189414978, "learning_rate": 2e-05, "loss": 0.057661, "step": 4825 }, { "epoch": 9.652, "grad_norm": 1.0919182300567627, "learning_rate": 2e-05, "loss": 0.04643912, "step": 4826 }, { "epoch": 9.654, "grad_norm": 1.4603396654129028, "learning_rate": 2e-05, "loss": 0.05329787, "step": 4827 }, { "epoch": 9.656, "grad_norm": 1.371373176574707, "learning_rate": 2e-05, "loss": 0.04948402, "step": 4828 }, { "epoch": 9.658, "grad_norm": 1.5884400606155396, "learning_rate": 2e-05, "loss": 0.05298093, "step": 4829 }, { "epoch": 9.66, "grad_norm": 1.106804609298706, "learning_rate": 2e-05, "loss": 0.03604655, "step": 4830 }, { "epoch": 9.662, "grad_norm": 2.2202231884002686, "learning_rate": 2e-05, "loss": 0.04735537, "step": 4831 }, { "epoch": 9.664, "grad_norm": 2.013181209564209, "learning_rate": 2e-05, "loss": 0.08114439, "step": 4832 }, { "epoch": 9.666, "grad_norm": 1.9070613384246826, "learning_rate": 2e-05, "loss": 0.04722995, "step": 4833 }, { "epoch": 9.668, "grad_norm": 1.7545431852340698, "learning_rate": 2e-05, "loss": 0.04931739, "step": 4834 }, { "epoch": 9.67, "grad_norm": 1.3694254159927368, "learning_rate": 2e-05, "loss": 0.03651468, "step": 4835 }, { "epoch": 9.672, "grad_norm": 0.986675500869751, "learning_rate": 2e-05, "loss": 0.02984712, "step": 4836 }, { "epoch": 9.674, "grad_norm": 1.749185562133789, "learning_rate": 2e-05, "loss": 0.04380158, "step": 4837 }, { "epoch": 9.676, "grad_norm": 2.234846353530884, "learning_rate": 2e-05, "loss": 0.04459833, "step": 4838 }, { "epoch": 9.678, "grad_norm": 1.209645390510559, "learning_rate": 2e-05, "loss": 0.03895848, "step": 4839 }, { "epoch": 9.68, "grad_norm": 2.4407246112823486, "learning_rate": 2e-05, "loss": 0.05144349, "step": 4840 }, { "epoch": 9.682, "grad_norm": 1.3178300857543945, "learning_rate": 2e-05, "loss": 0.03554653, "step": 4841 }, { "epoch": 9.684, "grad_norm": 1.2912734746932983, "learning_rate": 2e-05, "loss": 0.05963112, "step": 4842 }, { "epoch": 9.686, "grad_norm": 1.3198248147964478, "learning_rate": 2e-05, "loss": 0.0395943, "step": 4843 }, { "epoch": 9.688, "grad_norm": 1.2845044136047363, "learning_rate": 2e-05, "loss": 0.04196345, "step": 4844 }, { "epoch": 9.69, "grad_norm": 2.066082239151001, "learning_rate": 2e-05, "loss": 0.06731205, "step": 4845 }, { "epoch": 9.692, "grad_norm": 1.1936308145523071, "learning_rate": 2e-05, "loss": 0.03885185, "step": 4846 }, { "epoch": 9.693999999999999, "grad_norm": 1.7781189680099487, "learning_rate": 2e-05, "loss": 0.03905052, "step": 4847 }, { "epoch": 9.696, "grad_norm": 1.339654803276062, "learning_rate": 2e-05, "loss": 0.04398444, "step": 4848 }, { "epoch": 9.698, "grad_norm": 1.0488650798797607, "learning_rate": 2e-05, "loss": 0.04987929, "step": 4849 }, { "epoch": 9.7, "grad_norm": 0.9759476780891418, "learning_rate": 2e-05, "loss": 0.03937261, "step": 4850 }, { "epoch": 9.702, "grad_norm": 1.318277359008789, "learning_rate": 2e-05, "loss": 0.03982365, "step": 4851 }, { "epoch": 9.704, "grad_norm": 1.2131550312042236, "learning_rate": 2e-05, "loss": 0.04867811, "step": 4852 }, { "epoch": 9.706, "grad_norm": 1.8222668170928955, "learning_rate": 2e-05, "loss": 0.06708182, "step": 4853 }, { "epoch": 9.708, "grad_norm": 1.0944008827209473, "learning_rate": 2e-05, "loss": 0.03868869, "step": 4854 }, { "epoch": 9.71, "grad_norm": 1.5051614046096802, "learning_rate": 2e-05, "loss": 0.07919559, "step": 4855 }, { "epoch": 9.712, "grad_norm": 1.0937649011611938, "learning_rate": 2e-05, "loss": 0.04332706, "step": 4856 }, { "epoch": 9.714, "grad_norm": 1.316471815109253, "learning_rate": 2e-05, "loss": 0.04251353, "step": 4857 }, { "epoch": 9.716, "grad_norm": 1.5367369651794434, "learning_rate": 2e-05, "loss": 0.04644439, "step": 4858 }, { "epoch": 9.718, "grad_norm": 2.491387367248535, "learning_rate": 2e-05, "loss": 0.04542371, "step": 4859 }, { "epoch": 9.72, "grad_norm": 0.990361750125885, "learning_rate": 2e-05, "loss": 0.03856748, "step": 4860 }, { "epoch": 9.722, "grad_norm": 0.9742710590362549, "learning_rate": 2e-05, "loss": 0.03343142, "step": 4861 }, { "epoch": 9.724, "grad_norm": 2.8778538703918457, "learning_rate": 2e-05, "loss": 0.04725021, "step": 4862 }, { "epoch": 9.725999999999999, "grad_norm": 1.19037663936615, "learning_rate": 2e-05, "loss": 0.0407683, "step": 4863 }, { "epoch": 9.728, "grad_norm": 1.3334569931030273, "learning_rate": 2e-05, "loss": 0.04024338, "step": 4864 }, { "epoch": 9.73, "grad_norm": 2.018606424331665, "learning_rate": 2e-05, "loss": 0.04522476, "step": 4865 }, { "epoch": 9.732, "grad_norm": 1.435630202293396, "learning_rate": 2e-05, "loss": 0.04703035, "step": 4866 }, { "epoch": 9.734, "grad_norm": 1.2410895824432373, "learning_rate": 2e-05, "loss": 0.04058584, "step": 4867 }, { "epoch": 9.736, "grad_norm": 1.588903546333313, "learning_rate": 2e-05, "loss": 0.03625515, "step": 4868 }, { "epoch": 9.738, "grad_norm": 2.5016534328460693, "learning_rate": 2e-05, "loss": 0.0581548, "step": 4869 }, { "epoch": 9.74, "grad_norm": 1.5118154287338257, "learning_rate": 2e-05, "loss": 0.0549577, "step": 4870 }, { "epoch": 9.742, "grad_norm": 1.6460639238357544, "learning_rate": 2e-05, "loss": 0.04811797, "step": 4871 }, { "epoch": 9.744, "grad_norm": 4.694870948791504, "learning_rate": 2e-05, "loss": 0.05355388, "step": 4872 }, { "epoch": 9.746, "grad_norm": 1.1007333993911743, "learning_rate": 2e-05, "loss": 0.03959399, "step": 4873 }, { "epoch": 9.748, "grad_norm": 1.7420120239257812, "learning_rate": 2e-05, "loss": 0.04218205, "step": 4874 }, { "epoch": 9.75, "grad_norm": 1.3306318521499634, "learning_rate": 2e-05, "loss": 0.05289833, "step": 4875 }, { "epoch": 9.752, "grad_norm": 1.4687063694000244, "learning_rate": 2e-05, "loss": 0.03599568, "step": 4876 }, { "epoch": 9.754, "grad_norm": 1.3621610403060913, "learning_rate": 2e-05, "loss": 0.05598809, "step": 4877 }, { "epoch": 9.756, "grad_norm": 1.803964614868164, "learning_rate": 2e-05, "loss": 0.03704439, "step": 4878 }, { "epoch": 9.758, "grad_norm": 1.4855093955993652, "learning_rate": 2e-05, "loss": 0.05152279, "step": 4879 }, { "epoch": 9.76, "grad_norm": 1.0236130952835083, "learning_rate": 2e-05, "loss": 0.04855911, "step": 4880 }, { "epoch": 9.762, "grad_norm": 1.187638521194458, "learning_rate": 2e-05, "loss": 0.03158501, "step": 4881 }, { "epoch": 9.764, "grad_norm": 2.0507049560546875, "learning_rate": 2e-05, "loss": 0.0431616, "step": 4882 }, { "epoch": 9.766, "grad_norm": 2.3257975578308105, "learning_rate": 2e-05, "loss": 0.06384309, "step": 4883 }, { "epoch": 9.768, "grad_norm": 1.2630629539489746, "learning_rate": 2e-05, "loss": 0.03864199, "step": 4884 }, { "epoch": 9.77, "grad_norm": 1.4038701057434082, "learning_rate": 2e-05, "loss": 0.02960966, "step": 4885 }, { "epoch": 9.772, "grad_norm": 1.4654169082641602, "learning_rate": 2e-05, "loss": 0.04740506, "step": 4886 }, { "epoch": 9.774000000000001, "grad_norm": 0.9857847690582275, "learning_rate": 2e-05, "loss": 0.03219384, "step": 4887 }, { "epoch": 9.776, "grad_norm": 1.209273099899292, "learning_rate": 2e-05, "loss": 0.03910058, "step": 4888 }, { "epoch": 9.778, "grad_norm": 2.3397216796875, "learning_rate": 2e-05, "loss": 0.06353013, "step": 4889 }, { "epoch": 9.78, "grad_norm": 1.6057580709457397, "learning_rate": 2e-05, "loss": 0.0506817, "step": 4890 }, { "epoch": 9.782, "grad_norm": 1.2448252439498901, "learning_rate": 2e-05, "loss": 0.03712595, "step": 4891 }, { "epoch": 9.784, "grad_norm": 1.4985586404800415, "learning_rate": 2e-05, "loss": 0.04543132, "step": 4892 }, { "epoch": 9.786, "grad_norm": 2.1852614879608154, "learning_rate": 2e-05, "loss": 0.03600576, "step": 4893 }, { "epoch": 9.788, "grad_norm": 1.814179539680481, "learning_rate": 2e-05, "loss": 0.07543579, "step": 4894 }, { "epoch": 9.79, "grad_norm": 1.219164490699768, "learning_rate": 2e-05, "loss": 0.03292475, "step": 4895 }, { "epoch": 9.792, "grad_norm": 1.3909167051315308, "learning_rate": 2e-05, "loss": 0.02347614, "step": 4896 }, { "epoch": 9.794, "grad_norm": 2.1311941146850586, "learning_rate": 2e-05, "loss": 0.05728153, "step": 4897 }, { "epoch": 9.796, "grad_norm": 0.7100264430046082, "learning_rate": 2e-05, "loss": 0.02516245, "step": 4898 }, { "epoch": 9.798, "grad_norm": 2.8121373653411865, "learning_rate": 2e-05, "loss": 0.05207011, "step": 4899 }, { "epoch": 9.8, "grad_norm": 1.7172414064407349, "learning_rate": 2e-05, "loss": 0.03684345, "step": 4900 }, { "epoch": 9.802, "grad_norm": 1.0382096767425537, "learning_rate": 2e-05, "loss": 0.05103581, "step": 4901 }, { "epoch": 9.804, "grad_norm": 1.0930532217025757, "learning_rate": 2e-05, "loss": 0.04050636, "step": 4902 }, { "epoch": 9.806000000000001, "grad_norm": 1.7401964664459229, "learning_rate": 2e-05, "loss": 0.052274, "step": 4903 }, { "epoch": 9.808, "grad_norm": 2.4981439113616943, "learning_rate": 2e-05, "loss": 0.08489673, "step": 4904 }, { "epoch": 9.81, "grad_norm": 1.183165431022644, "learning_rate": 2e-05, "loss": 0.03373475, "step": 4905 }, { "epoch": 9.812, "grad_norm": 1.1066116094589233, "learning_rate": 2e-05, "loss": 0.0363532, "step": 4906 }, { "epoch": 9.814, "grad_norm": 1.2488024234771729, "learning_rate": 2e-05, "loss": 0.04837116, "step": 4907 }, { "epoch": 9.816, "grad_norm": 1.4416756629943848, "learning_rate": 2e-05, "loss": 0.04575769, "step": 4908 }, { "epoch": 9.818, "grad_norm": 1.133487343788147, "learning_rate": 2e-05, "loss": 0.04744884, "step": 4909 }, { "epoch": 9.82, "grad_norm": 1.1407426595687866, "learning_rate": 2e-05, "loss": 0.03075367, "step": 4910 }, { "epoch": 9.822, "grad_norm": 1.781827449798584, "learning_rate": 2e-05, "loss": 0.04467312, "step": 4911 }, { "epoch": 9.824, "grad_norm": 1.2072088718414307, "learning_rate": 2e-05, "loss": 0.04478139, "step": 4912 }, { "epoch": 9.826, "grad_norm": 1.2510048151016235, "learning_rate": 2e-05, "loss": 0.04496565, "step": 4913 }, { "epoch": 9.828, "grad_norm": 2.2760701179504395, "learning_rate": 2e-05, "loss": 0.04890842, "step": 4914 }, { "epoch": 9.83, "grad_norm": 1.4407870769500732, "learning_rate": 2e-05, "loss": 0.05081564, "step": 4915 }, { "epoch": 9.832, "grad_norm": 1.5468295812606812, "learning_rate": 2e-05, "loss": 0.03260962, "step": 4916 }, { "epoch": 9.834, "grad_norm": 1.3650826215744019, "learning_rate": 2e-05, "loss": 0.03465262, "step": 4917 }, { "epoch": 9.836, "grad_norm": 1.1097781658172607, "learning_rate": 2e-05, "loss": 0.0332303, "step": 4918 }, { "epoch": 9.838, "grad_norm": 1.083861231803894, "learning_rate": 2e-05, "loss": 0.04787143, "step": 4919 }, { "epoch": 9.84, "grad_norm": 1.8175002336502075, "learning_rate": 2e-05, "loss": 0.06026558, "step": 4920 }, { "epoch": 9.842, "grad_norm": 0.9349530339241028, "learning_rate": 2e-05, "loss": 0.03050534, "step": 4921 }, { "epoch": 9.844, "grad_norm": 1.1388529539108276, "learning_rate": 2e-05, "loss": 0.048526, "step": 4922 }, { "epoch": 9.846, "grad_norm": 1.9644010066986084, "learning_rate": 2e-05, "loss": 0.05371387, "step": 4923 }, { "epoch": 9.848, "grad_norm": 1.6769602298736572, "learning_rate": 2e-05, "loss": 0.04062205, "step": 4924 }, { "epoch": 9.85, "grad_norm": 1.8487584590911865, "learning_rate": 2e-05, "loss": 0.0460755, "step": 4925 }, { "epoch": 9.852, "grad_norm": 0.8803853392601013, "learning_rate": 2e-05, "loss": 0.02864346, "step": 4926 }, { "epoch": 9.854, "grad_norm": 1.780892014503479, "learning_rate": 2e-05, "loss": 0.04363332, "step": 4927 }, { "epoch": 9.856, "grad_norm": 1.1148005723953247, "learning_rate": 2e-05, "loss": 0.04193269, "step": 4928 }, { "epoch": 9.858, "grad_norm": 2.4475464820861816, "learning_rate": 2e-05, "loss": 0.05434407, "step": 4929 }, { "epoch": 9.86, "grad_norm": 1.3744219541549683, "learning_rate": 2e-05, "loss": 0.05382394, "step": 4930 }, { "epoch": 9.862, "grad_norm": 1.0499470233917236, "learning_rate": 2e-05, "loss": 0.03570357, "step": 4931 }, { "epoch": 9.864, "grad_norm": 1.005441665649414, "learning_rate": 2e-05, "loss": 0.03323954, "step": 4932 }, { "epoch": 9.866, "grad_norm": 1.7330776453018188, "learning_rate": 2e-05, "loss": 0.05397016, "step": 4933 }, { "epoch": 9.868, "grad_norm": 2.577805280685425, "learning_rate": 2e-05, "loss": 0.06905188, "step": 4934 }, { "epoch": 9.87, "grad_norm": 0.9506399035453796, "learning_rate": 2e-05, "loss": 0.03506295, "step": 4935 }, { "epoch": 9.872, "grad_norm": 0.7719003558158875, "learning_rate": 2e-05, "loss": 0.02560582, "step": 4936 }, { "epoch": 9.874, "grad_norm": 1.596714735031128, "learning_rate": 2e-05, "loss": 0.04535085, "step": 4937 }, { "epoch": 9.876, "grad_norm": 1.2300113439559937, "learning_rate": 2e-05, "loss": 0.04877425, "step": 4938 }, { "epoch": 9.878, "grad_norm": 1.531198263168335, "learning_rate": 2e-05, "loss": 0.0485093, "step": 4939 }, { "epoch": 9.88, "grad_norm": 1.357596755027771, "learning_rate": 2e-05, "loss": 0.0346534, "step": 4940 }, { "epoch": 9.882, "grad_norm": 1.8261871337890625, "learning_rate": 2e-05, "loss": 0.06768322, "step": 4941 }, { "epoch": 9.884, "grad_norm": 1.8077200651168823, "learning_rate": 2e-05, "loss": 0.04153629, "step": 4942 }, { "epoch": 9.886, "grad_norm": 1.8714913129806519, "learning_rate": 2e-05, "loss": 0.05186781, "step": 4943 }, { "epoch": 9.888, "grad_norm": 1.3624212741851807, "learning_rate": 2e-05, "loss": 0.04509423, "step": 4944 }, { "epoch": 9.89, "grad_norm": 1.4254581928253174, "learning_rate": 2e-05, "loss": 0.03368966, "step": 4945 }, { "epoch": 9.892, "grad_norm": 1.085328221321106, "learning_rate": 2e-05, "loss": 0.03534285, "step": 4946 }, { "epoch": 9.894, "grad_norm": 1.1313482522964478, "learning_rate": 2e-05, "loss": 0.03224604, "step": 4947 }, { "epoch": 9.896, "grad_norm": 1.5917694568634033, "learning_rate": 2e-05, "loss": 0.04878377, "step": 4948 }, { "epoch": 9.898, "grad_norm": 2.9790420532226562, "learning_rate": 2e-05, "loss": 0.06303143, "step": 4949 }, { "epoch": 9.9, "grad_norm": 2.5057618618011475, "learning_rate": 2e-05, "loss": 0.06061789, "step": 4950 }, { "epoch": 9.902, "grad_norm": 1.0685169696807861, "learning_rate": 2e-05, "loss": 0.03603726, "step": 4951 }, { "epoch": 9.904, "grad_norm": 2.2576003074645996, "learning_rate": 2e-05, "loss": 0.04536259, "step": 4952 }, { "epoch": 9.906, "grad_norm": 1.9615432024002075, "learning_rate": 2e-05, "loss": 0.07404278, "step": 4953 }, { "epoch": 9.908, "grad_norm": 1.4725103378295898, "learning_rate": 2e-05, "loss": 0.04490086, "step": 4954 }, { "epoch": 9.91, "grad_norm": 1.0687810182571411, "learning_rate": 2e-05, "loss": 0.04056025, "step": 4955 }, { "epoch": 9.912, "grad_norm": 1.1338154077529907, "learning_rate": 2e-05, "loss": 0.03530625, "step": 4956 }, { "epoch": 9.914, "grad_norm": 1.2431254386901855, "learning_rate": 2e-05, "loss": 0.04536084, "step": 4957 }, { "epoch": 9.916, "grad_norm": 4.506831169128418, "learning_rate": 2e-05, "loss": 0.04210579, "step": 4958 }, { "epoch": 9.918, "grad_norm": 0.8978538513183594, "learning_rate": 2e-05, "loss": 0.03029729, "step": 4959 }, { "epoch": 9.92, "grad_norm": 1.2089909315109253, "learning_rate": 2e-05, "loss": 0.04474664, "step": 4960 }, { "epoch": 9.922, "grad_norm": 1.001044750213623, "learning_rate": 2e-05, "loss": 0.03365429, "step": 4961 }, { "epoch": 9.924, "grad_norm": 1.8939543962478638, "learning_rate": 2e-05, "loss": 0.04662756, "step": 4962 }, { "epoch": 9.926, "grad_norm": 1.8522080183029175, "learning_rate": 2e-05, "loss": 0.06616221, "step": 4963 }, { "epoch": 9.928, "grad_norm": 1.3510562181472778, "learning_rate": 2e-05, "loss": 0.03453474, "step": 4964 }, { "epoch": 9.93, "grad_norm": 1.2540520429611206, "learning_rate": 2e-05, "loss": 0.03054098, "step": 4965 }, { "epoch": 9.932, "grad_norm": 1.2255598306655884, "learning_rate": 2e-05, "loss": 0.03275552, "step": 4966 }, { "epoch": 9.934, "grad_norm": 2.2541887760162354, "learning_rate": 2e-05, "loss": 0.0503389, "step": 4967 }, { "epoch": 9.936, "grad_norm": 1.484110713005066, "learning_rate": 2e-05, "loss": 0.06019883, "step": 4968 }, { "epoch": 9.938, "grad_norm": 1.217021107673645, "learning_rate": 2e-05, "loss": 0.04239278, "step": 4969 }, { "epoch": 9.94, "grad_norm": 1.6136702299118042, "learning_rate": 2e-05, "loss": 0.05467443, "step": 4970 }, { "epoch": 9.942, "grad_norm": 1.154208779335022, "learning_rate": 2e-05, "loss": 0.03876391, "step": 4971 }, { "epoch": 9.943999999999999, "grad_norm": 1.0662761926651, "learning_rate": 2e-05, "loss": 0.03581022, "step": 4972 }, { "epoch": 9.946, "grad_norm": 1.5197198390960693, "learning_rate": 2e-05, "loss": 0.04490343, "step": 4973 }, { "epoch": 9.948, "grad_norm": 1.7308610677719116, "learning_rate": 2e-05, "loss": 0.0444873, "step": 4974 }, { "epoch": 9.95, "grad_norm": 1.7996978759765625, "learning_rate": 2e-05, "loss": 0.06157671, "step": 4975 }, { "epoch": 9.952, "grad_norm": 2.295097827911377, "learning_rate": 2e-05, "loss": 0.05016646, "step": 4976 }, { "epoch": 9.954, "grad_norm": 1.206261396408081, "learning_rate": 2e-05, "loss": 0.0270515, "step": 4977 }, { "epoch": 9.956, "grad_norm": 1.0150620937347412, "learning_rate": 2e-05, "loss": 0.04362907, "step": 4978 }, { "epoch": 9.958, "grad_norm": 1.5656530857086182, "learning_rate": 2e-05, "loss": 0.03449667, "step": 4979 }, { "epoch": 9.96, "grad_norm": 1.2574493885040283, "learning_rate": 2e-05, "loss": 0.04523246, "step": 4980 }, { "epoch": 9.962, "grad_norm": 1.6237354278564453, "learning_rate": 2e-05, "loss": 0.04153569, "step": 4981 }, { "epoch": 9.964, "grad_norm": 1.90591299533844, "learning_rate": 2e-05, "loss": 0.02905798, "step": 4982 }, { "epoch": 9.966, "grad_norm": 2.5041632652282715, "learning_rate": 2e-05, "loss": 0.05663058, "step": 4983 }, { "epoch": 9.968, "grad_norm": 1.3987988233566284, "learning_rate": 2e-05, "loss": 0.06210096, "step": 4984 }, { "epoch": 9.97, "grad_norm": 1.7508735656738281, "learning_rate": 2e-05, "loss": 0.06636174, "step": 4985 }, { "epoch": 9.972, "grad_norm": 1.324049711227417, "learning_rate": 2e-05, "loss": 0.05072082, "step": 4986 }, { "epoch": 9.974, "grad_norm": 1.3123738765716553, "learning_rate": 2e-05, "loss": 0.04027256, "step": 4987 }, { "epoch": 9.975999999999999, "grad_norm": 2.1010961532592773, "learning_rate": 2e-05, "loss": 0.05779121, "step": 4988 }, { "epoch": 9.978, "grad_norm": 1.734845519065857, "learning_rate": 2e-05, "loss": 0.04562132, "step": 4989 }, { "epoch": 9.98, "grad_norm": 1.4502214193344116, "learning_rate": 2e-05, "loss": 0.03613491, "step": 4990 }, { "epoch": 9.982, "grad_norm": 1.9033112525939941, "learning_rate": 2e-05, "loss": 0.05921431, "step": 4991 }, { "epoch": 9.984, "grad_norm": 1.0482875108718872, "learning_rate": 2e-05, "loss": 0.02903022, "step": 4992 }, { "epoch": 9.986, "grad_norm": 1.5659935474395752, "learning_rate": 2e-05, "loss": 0.04814388, "step": 4993 }, { "epoch": 9.988, "grad_norm": 1.1235241889953613, "learning_rate": 2e-05, "loss": 0.0372797, "step": 4994 }, { "epoch": 9.99, "grad_norm": 1.5326329469680786, "learning_rate": 2e-05, "loss": 0.05798367, "step": 4995 }, { "epoch": 9.992, "grad_norm": 1.3622616529464722, "learning_rate": 2e-05, "loss": 0.05164924, "step": 4996 }, { "epoch": 9.994, "grad_norm": 2.1830084323883057, "learning_rate": 2e-05, "loss": 0.04496937, "step": 4997 }, { "epoch": 9.996, "grad_norm": 1.2075526714324951, "learning_rate": 2e-05, "loss": 0.04370778, "step": 4998 }, { "epoch": 9.998, "grad_norm": 1.1224498748779297, "learning_rate": 2e-05, "loss": 0.05310022, "step": 4999 }, { "epoch": 10.0, "grad_norm": 1.6621578931808472, "learning_rate": 2e-05, "loss": 0.06392539, "step": 5000 }, { "epoch": 10.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9500998003992016, "Equal_1": 0.992, "Equal_2": 0.9181636726546906, "Equal_3": 0.8323353293413174, "LineComparison_1": 0.998, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9840319361277445, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.94, "Perpendicular_1": 0.99, "Perpendicular_2": 0.938, "Perpendicular_3": 0.6452905811623246, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9828, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.9500998003992016 }, "eval_runtime": 225.889, "eval_samples_per_second": 46.483, "eval_steps_per_second": 0.93, "step": 5000 }, { "epoch": 10.002, "grad_norm": 1.2116057872772217, "learning_rate": 2e-05, "loss": 0.04208425, "step": 5001 }, { "epoch": 10.004, "grad_norm": 1.2836134433746338, "learning_rate": 2e-05, "loss": 0.03450134, "step": 5002 }, { "epoch": 10.006, "grad_norm": 1.1443209648132324, "learning_rate": 2e-05, "loss": 0.04110379, "step": 5003 }, { "epoch": 10.008, "grad_norm": 1.6402474641799927, "learning_rate": 2e-05, "loss": 0.04672791, "step": 5004 }, { "epoch": 10.01, "grad_norm": 1.1609535217285156, "learning_rate": 2e-05, "loss": 0.03896538, "step": 5005 }, { "epoch": 10.012, "grad_norm": 1.3826688528060913, "learning_rate": 2e-05, "loss": 0.04530947, "step": 5006 }, { "epoch": 10.014, "grad_norm": 1.5272947549819946, "learning_rate": 2e-05, "loss": 0.0594972, "step": 5007 }, { "epoch": 10.016, "grad_norm": 1.0881803035736084, "learning_rate": 2e-05, "loss": 0.03994909, "step": 5008 }, { "epoch": 10.018, "grad_norm": 1.3823893070220947, "learning_rate": 2e-05, "loss": 0.03971476, "step": 5009 }, { "epoch": 10.02, "grad_norm": 1.4883553981781006, "learning_rate": 2e-05, "loss": 0.03705897, "step": 5010 }, { "epoch": 10.022, "grad_norm": 1.7884477376937866, "learning_rate": 2e-05, "loss": 0.06661702, "step": 5011 }, { "epoch": 10.024, "grad_norm": 5.235015392303467, "learning_rate": 2e-05, "loss": 0.06009185, "step": 5012 }, { "epoch": 10.026, "grad_norm": 1.71100914478302, "learning_rate": 2e-05, "loss": 0.06621759, "step": 5013 }, { "epoch": 10.028, "grad_norm": 1.2119653224945068, "learning_rate": 2e-05, "loss": 0.04409834, "step": 5014 }, { "epoch": 10.03, "grad_norm": 1.618872046470642, "learning_rate": 2e-05, "loss": 0.04181359, "step": 5015 }, { "epoch": 10.032, "grad_norm": 1.607222557067871, "learning_rate": 2e-05, "loss": 0.04726337, "step": 5016 }, { "epoch": 10.034, "grad_norm": 2.203791618347168, "learning_rate": 2e-05, "loss": 0.05487993, "step": 5017 }, { "epoch": 10.036, "grad_norm": 1.2739883661270142, "learning_rate": 2e-05, "loss": 0.04979678, "step": 5018 }, { "epoch": 10.038, "grad_norm": 1.437139868736267, "learning_rate": 2e-05, "loss": 0.05425661, "step": 5019 }, { "epoch": 10.04, "grad_norm": 1.1217912435531616, "learning_rate": 2e-05, "loss": 0.04765711, "step": 5020 }, { "epoch": 10.042, "grad_norm": 1.056843876838684, "learning_rate": 2e-05, "loss": 0.03718838, "step": 5021 }, { "epoch": 10.044, "grad_norm": 1.0205343961715698, "learning_rate": 2e-05, "loss": 0.03808871, "step": 5022 }, { "epoch": 10.046, "grad_norm": 1.2816520929336548, "learning_rate": 2e-05, "loss": 0.04973458, "step": 5023 }, { "epoch": 10.048, "grad_norm": 1.1275383234024048, "learning_rate": 2e-05, "loss": 0.03064222, "step": 5024 }, { "epoch": 10.05, "grad_norm": 3.2018489837646484, "learning_rate": 2e-05, "loss": 0.06866911, "step": 5025 }, { "epoch": 10.052, "grad_norm": 1.0388258695602417, "learning_rate": 2e-05, "loss": 0.04599523, "step": 5026 }, { "epoch": 10.054, "grad_norm": 1.1918741464614868, "learning_rate": 2e-05, "loss": 0.0580851, "step": 5027 }, { "epoch": 10.056, "grad_norm": 0.8901442289352417, "learning_rate": 2e-05, "loss": 0.03091174, "step": 5028 }, { "epoch": 10.058, "grad_norm": 1.1134976148605347, "learning_rate": 2e-05, "loss": 0.03725684, "step": 5029 }, { "epoch": 10.06, "grad_norm": 1.7022333145141602, "learning_rate": 2e-05, "loss": 0.05678235, "step": 5030 }, { "epoch": 10.062, "grad_norm": 1.8270437717437744, "learning_rate": 2e-05, "loss": 0.05731597, "step": 5031 }, { "epoch": 10.064, "grad_norm": 7.51585578918457, "learning_rate": 2e-05, "loss": 0.04290658, "step": 5032 }, { "epoch": 10.066, "grad_norm": 1.43076491355896, "learning_rate": 2e-05, "loss": 0.04153984, "step": 5033 }, { "epoch": 10.068, "grad_norm": 1.0161833763122559, "learning_rate": 2e-05, "loss": 0.04405259, "step": 5034 }, { "epoch": 10.07, "grad_norm": 1.9041961431503296, "learning_rate": 2e-05, "loss": 0.04986267, "step": 5035 }, { "epoch": 10.072, "grad_norm": 1.352732539176941, "learning_rate": 2e-05, "loss": 0.03716427, "step": 5036 }, { "epoch": 10.074, "grad_norm": 1.2103217840194702, "learning_rate": 2e-05, "loss": 0.04140213, "step": 5037 }, { "epoch": 10.076, "grad_norm": 1.425744891166687, "learning_rate": 2e-05, "loss": 0.04375656, "step": 5038 }, { "epoch": 10.078, "grad_norm": 1.3066316843032837, "learning_rate": 2e-05, "loss": 0.03641799, "step": 5039 }, { "epoch": 10.08, "grad_norm": 1.422602653503418, "learning_rate": 2e-05, "loss": 0.0417249, "step": 5040 }, { "epoch": 10.082, "grad_norm": 1.1820791959762573, "learning_rate": 2e-05, "loss": 0.03585736, "step": 5041 }, { "epoch": 10.084, "grad_norm": 1.4058547019958496, "learning_rate": 2e-05, "loss": 0.04448502, "step": 5042 }, { "epoch": 10.086, "grad_norm": 1.495457410812378, "learning_rate": 2e-05, "loss": 0.03721453, "step": 5043 }, { "epoch": 10.088, "grad_norm": 1.3821312189102173, "learning_rate": 2e-05, "loss": 0.05367724, "step": 5044 }, { "epoch": 10.09, "grad_norm": 1.3406728506088257, "learning_rate": 2e-05, "loss": 0.04626499, "step": 5045 }, { "epoch": 10.092, "grad_norm": 1.4274239540100098, "learning_rate": 2e-05, "loss": 0.05460577, "step": 5046 }, { "epoch": 10.094, "grad_norm": 1.452859878540039, "learning_rate": 2e-05, "loss": 0.04055486, "step": 5047 }, { "epoch": 10.096, "grad_norm": 1.9316481351852417, "learning_rate": 2e-05, "loss": 0.04859093, "step": 5048 }, { "epoch": 10.098, "grad_norm": 1.0683211088180542, "learning_rate": 2e-05, "loss": 0.04545689, "step": 5049 }, { "epoch": 10.1, "grad_norm": 1.3271669149398804, "learning_rate": 2e-05, "loss": 0.03542973, "step": 5050 }, { "epoch": 10.102, "grad_norm": 1.3472424745559692, "learning_rate": 2e-05, "loss": 0.04254956, "step": 5051 }, { "epoch": 10.104, "grad_norm": 1.326146125793457, "learning_rate": 2e-05, "loss": 0.04960856, "step": 5052 }, { "epoch": 10.106, "grad_norm": 1.507055640220642, "learning_rate": 2e-05, "loss": 0.04574534, "step": 5053 }, { "epoch": 10.108, "grad_norm": 1.9861655235290527, "learning_rate": 2e-05, "loss": 0.04311334, "step": 5054 }, { "epoch": 10.11, "grad_norm": 3.763047933578491, "learning_rate": 2e-05, "loss": 0.06304701, "step": 5055 }, { "epoch": 10.112, "grad_norm": 2.199321746826172, "learning_rate": 2e-05, "loss": 0.05586852, "step": 5056 }, { "epoch": 10.114, "grad_norm": 1.9915794134140015, "learning_rate": 2e-05, "loss": 0.04724769, "step": 5057 }, { "epoch": 10.116, "grad_norm": 1.4775644540786743, "learning_rate": 2e-05, "loss": 0.04547869, "step": 5058 }, { "epoch": 10.118, "grad_norm": 1.8133411407470703, "learning_rate": 2e-05, "loss": 0.0423055, "step": 5059 }, { "epoch": 10.12, "grad_norm": 1.4613059759140015, "learning_rate": 2e-05, "loss": 0.05925657, "step": 5060 }, { "epoch": 10.122, "grad_norm": 0.941746711730957, "learning_rate": 2e-05, "loss": 0.03216568, "step": 5061 }, { "epoch": 10.124, "grad_norm": 1.6490039825439453, "learning_rate": 2e-05, "loss": 0.05666177, "step": 5062 }, { "epoch": 10.126, "grad_norm": 1.3844231367111206, "learning_rate": 2e-05, "loss": 0.02810806, "step": 5063 }, { "epoch": 10.128, "grad_norm": 1.3309664726257324, "learning_rate": 2e-05, "loss": 0.06071011, "step": 5064 }, { "epoch": 10.13, "grad_norm": 1.6288623809814453, "learning_rate": 2e-05, "loss": 0.05567588, "step": 5065 }, { "epoch": 10.132, "grad_norm": 2.0868473052978516, "learning_rate": 2e-05, "loss": 0.0569134, "step": 5066 }, { "epoch": 10.134, "grad_norm": 1.3809058666229248, "learning_rate": 2e-05, "loss": 0.03231725, "step": 5067 }, { "epoch": 10.136, "grad_norm": 1.5914289951324463, "learning_rate": 2e-05, "loss": 0.0451416, "step": 5068 }, { "epoch": 10.138, "grad_norm": 1.60956871509552, "learning_rate": 2e-05, "loss": 0.05465853, "step": 5069 }, { "epoch": 10.14, "grad_norm": 1.5891399383544922, "learning_rate": 2e-05, "loss": 0.04181509, "step": 5070 }, { "epoch": 10.142, "grad_norm": 1.2969509363174438, "learning_rate": 2e-05, "loss": 0.04687794, "step": 5071 }, { "epoch": 10.144, "grad_norm": 1.0293656587600708, "learning_rate": 2e-05, "loss": 0.03168117, "step": 5072 }, { "epoch": 10.146, "grad_norm": 1.391754150390625, "learning_rate": 2e-05, "loss": 0.05118123, "step": 5073 }, { "epoch": 10.148, "grad_norm": 1.8658547401428223, "learning_rate": 2e-05, "loss": 0.05027353, "step": 5074 }, { "epoch": 10.15, "grad_norm": 1.3523226976394653, "learning_rate": 2e-05, "loss": 0.05539804, "step": 5075 }, { "epoch": 10.152, "grad_norm": 1.5844132900238037, "learning_rate": 2e-05, "loss": 0.05847012, "step": 5076 }, { "epoch": 10.154, "grad_norm": 1.7214871644973755, "learning_rate": 2e-05, "loss": 0.04929259, "step": 5077 }, { "epoch": 10.156, "grad_norm": 2.030660629272461, "learning_rate": 2e-05, "loss": 0.04488573, "step": 5078 }, { "epoch": 10.158, "grad_norm": 0.9780464172363281, "learning_rate": 2e-05, "loss": 0.03005782, "step": 5079 }, { "epoch": 10.16, "grad_norm": 1.1975051164627075, "learning_rate": 2e-05, "loss": 0.04913092, "step": 5080 }, { "epoch": 10.162, "grad_norm": 1.3435134887695312, "learning_rate": 2e-05, "loss": 0.04160519, "step": 5081 }, { "epoch": 10.164, "grad_norm": 0.9526854157447815, "learning_rate": 2e-05, "loss": 0.04140642, "step": 5082 }, { "epoch": 10.166, "grad_norm": 0.822665274143219, "learning_rate": 2e-05, "loss": 0.0301406, "step": 5083 }, { "epoch": 10.168, "grad_norm": 1.4586740732192993, "learning_rate": 2e-05, "loss": 0.05270635, "step": 5084 }, { "epoch": 10.17, "grad_norm": 1.8526438474655151, "learning_rate": 2e-05, "loss": 0.04045253, "step": 5085 }, { "epoch": 10.172, "grad_norm": 1.2397280931472778, "learning_rate": 2e-05, "loss": 0.04982067, "step": 5086 }, { "epoch": 10.174, "grad_norm": 1.37135910987854, "learning_rate": 2e-05, "loss": 0.03924649, "step": 5087 }, { "epoch": 10.176, "grad_norm": 0.8350824117660522, "learning_rate": 2e-05, "loss": 0.02943425, "step": 5088 }, { "epoch": 10.178, "grad_norm": 1.2432544231414795, "learning_rate": 2e-05, "loss": 0.04460248, "step": 5089 }, { "epoch": 10.18, "grad_norm": 1.385955810546875, "learning_rate": 2e-05, "loss": 0.04620705, "step": 5090 }, { "epoch": 10.182, "grad_norm": 4.272519111633301, "learning_rate": 2e-05, "loss": 0.04540413, "step": 5091 }, { "epoch": 10.184, "grad_norm": 1.0373469591140747, "learning_rate": 2e-05, "loss": 0.04271317, "step": 5092 }, { "epoch": 10.186, "grad_norm": 1.3342186212539673, "learning_rate": 2e-05, "loss": 0.04814231, "step": 5093 }, { "epoch": 10.188, "grad_norm": 1.6670373678207397, "learning_rate": 2e-05, "loss": 0.05525617, "step": 5094 }, { "epoch": 10.19, "grad_norm": 1.830917239189148, "learning_rate": 2e-05, "loss": 0.06785688, "step": 5095 }, { "epoch": 10.192, "grad_norm": 2.6548359394073486, "learning_rate": 2e-05, "loss": 0.06821053, "step": 5096 }, { "epoch": 10.194, "grad_norm": 1.223729133605957, "learning_rate": 2e-05, "loss": 0.05272523, "step": 5097 }, { "epoch": 10.196, "grad_norm": 1.4436752796173096, "learning_rate": 2e-05, "loss": 0.04337811, "step": 5098 }, { "epoch": 10.198, "grad_norm": 1.4322463274002075, "learning_rate": 2e-05, "loss": 0.03883493, "step": 5099 }, { "epoch": 10.2, "grad_norm": 1.7061482667922974, "learning_rate": 2e-05, "loss": 0.05918795, "step": 5100 }, { "epoch": 10.202, "grad_norm": 1.2914401292800903, "learning_rate": 2e-05, "loss": 0.04848788, "step": 5101 }, { "epoch": 10.204, "grad_norm": 0.9980544447898865, "learning_rate": 2e-05, "loss": 0.03233691, "step": 5102 }, { "epoch": 10.206, "grad_norm": 1.136604905128479, "learning_rate": 2e-05, "loss": 0.04738461, "step": 5103 }, { "epoch": 10.208, "grad_norm": 1.4945024251937866, "learning_rate": 2e-05, "loss": 0.06012742, "step": 5104 }, { "epoch": 10.21, "grad_norm": 0.9514679312705994, "learning_rate": 2e-05, "loss": 0.03237236, "step": 5105 }, { "epoch": 10.212, "grad_norm": 1.2797694206237793, "learning_rate": 2e-05, "loss": 0.03938352, "step": 5106 }, { "epoch": 10.214, "grad_norm": 1.959914207458496, "learning_rate": 2e-05, "loss": 0.04701874, "step": 5107 }, { "epoch": 10.216, "grad_norm": 1.2796992063522339, "learning_rate": 2e-05, "loss": 0.05292726, "step": 5108 }, { "epoch": 10.218, "grad_norm": 0.9753836393356323, "learning_rate": 2e-05, "loss": 0.03651617, "step": 5109 }, { "epoch": 10.22, "grad_norm": 2.557206392288208, "learning_rate": 2e-05, "loss": 0.05746891, "step": 5110 }, { "epoch": 10.222, "grad_norm": 1.489743947982788, "learning_rate": 2e-05, "loss": 0.04898936, "step": 5111 }, { "epoch": 10.224, "grad_norm": 1.5982235670089722, "learning_rate": 2e-05, "loss": 0.05300324, "step": 5112 }, { "epoch": 10.226, "grad_norm": 1.2746562957763672, "learning_rate": 2e-05, "loss": 0.04974872, "step": 5113 }, { "epoch": 10.228, "grad_norm": 1.0127474069595337, "learning_rate": 2e-05, "loss": 0.03322353, "step": 5114 }, { "epoch": 10.23, "grad_norm": 1.3261486291885376, "learning_rate": 2e-05, "loss": 0.04652198, "step": 5115 }, { "epoch": 10.232, "grad_norm": 1.0635287761688232, "learning_rate": 2e-05, "loss": 0.03974827, "step": 5116 }, { "epoch": 10.234, "grad_norm": 1.4248281717300415, "learning_rate": 2e-05, "loss": 0.05829512, "step": 5117 }, { "epoch": 10.236, "grad_norm": 1.211340069770813, "learning_rate": 2e-05, "loss": 0.04442834, "step": 5118 }, { "epoch": 10.238, "grad_norm": 1.4484608173370361, "learning_rate": 2e-05, "loss": 0.06100322, "step": 5119 }, { "epoch": 10.24, "grad_norm": 1.4616215229034424, "learning_rate": 2e-05, "loss": 0.05261508, "step": 5120 }, { "epoch": 10.242, "grad_norm": 1.1967036724090576, "learning_rate": 2e-05, "loss": 0.03709029, "step": 5121 }, { "epoch": 10.244, "grad_norm": 1.0950756072998047, "learning_rate": 2e-05, "loss": 0.04377744, "step": 5122 }, { "epoch": 10.246, "grad_norm": 1.3935307264328003, "learning_rate": 2e-05, "loss": 0.0444581, "step": 5123 }, { "epoch": 10.248, "grad_norm": 1.671621322631836, "learning_rate": 2e-05, "loss": 0.0455098, "step": 5124 }, { "epoch": 10.25, "grad_norm": 1.1205406188964844, "learning_rate": 2e-05, "loss": 0.03173978, "step": 5125 }, { "epoch": 10.252, "grad_norm": 2.075258255004883, "learning_rate": 2e-05, "loss": 0.06263765, "step": 5126 }, { "epoch": 10.254, "grad_norm": 1.3751420974731445, "learning_rate": 2e-05, "loss": 0.04772716, "step": 5127 }, { "epoch": 10.256, "grad_norm": 1.3971940279006958, "learning_rate": 2e-05, "loss": 0.05719724, "step": 5128 }, { "epoch": 10.258, "grad_norm": 1.5217324495315552, "learning_rate": 2e-05, "loss": 0.04593716, "step": 5129 }, { "epoch": 10.26, "grad_norm": 1.2888736724853516, "learning_rate": 2e-05, "loss": 0.03719681, "step": 5130 }, { "epoch": 10.262, "grad_norm": 1.7119133472442627, "learning_rate": 2e-05, "loss": 0.07480539, "step": 5131 }, { "epoch": 10.264, "grad_norm": 1.5947872400283813, "learning_rate": 2e-05, "loss": 0.05440378, "step": 5132 }, { "epoch": 10.266, "grad_norm": 1.064030647277832, "learning_rate": 2e-05, "loss": 0.03951091, "step": 5133 }, { "epoch": 10.268, "grad_norm": 1.627030611038208, "learning_rate": 2e-05, "loss": 0.04296412, "step": 5134 }, { "epoch": 10.27, "grad_norm": 1.4102696180343628, "learning_rate": 2e-05, "loss": 0.05037045, "step": 5135 }, { "epoch": 10.272, "grad_norm": 1.7745003700256348, "learning_rate": 2e-05, "loss": 0.0528553, "step": 5136 }, { "epoch": 10.274000000000001, "grad_norm": 1.1512871980667114, "learning_rate": 2e-05, "loss": 0.03570754, "step": 5137 }, { "epoch": 10.276, "grad_norm": 1.0335047245025635, "learning_rate": 2e-05, "loss": 0.04114594, "step": 5138 }, { "epoch": 10.278, "grad_norm": 1.3740545511245728, "learning_rate": 2e-05, "loss": 0.04401609, "step": 5139 }, { "epoch": 10.28, "grad_norm": 1.1522160768508911, "learning_rate": 2e-05, "loss": 0.03776874, "step": 5140 }, { "epoch": 10.282, "grad_norm": 1.4475064277648926, "learning_rate": 2e-05, "loss": 0.05163924, "step": 5141 }, { "epoch": 10.284, "grad_norm": 1.1450650691986084, "learning_rate": 2e-05, "loss": 0.04147805, "step": 5142 }, { "epoch": 10.286, "grad_norm": 1.0685665607452393, "learning_rate": 2e-05, "loss": 0.03787285, "step": 5143 }, { "epoch": 10.288, "grad_norm": 1.9860632419586182, "learning_rate": 2e-05, "loss": 0.0506893, "step": 5144 }, { "epoch": 10.29, "grad_norm": 1.6984888315200806, "learning_rate": 2e-05, "loss": 0.0406758, "step": 5145 }, { "epoch": 10.292, "grad_norm": 2.2713117599487305, "learning_rate": 2e-05, "loss": 0.03191452, "step": 5146 }, { "epoch": 10.294, "grad_norm": 1.930211067199707, "learning_rate": 2e-05, "loss": 0.05683004, "step": 5147 }, { "epoch": 10.296, "grad_norm": 1.1921262741088867, "learning_rate": 2e-05, "loss": 0.02988086, "step": 5148 }, { "epoch": 10.298, "grad_norm": 1.875165581703186, "learning_rate": 2e-05, "loss": 0.06621813, "step": 5149 }, { "epoch": 10.3, "grad_norm": 1.6744751930236816, "learning_rate": 2e-05, "loss": 0.04445022, "step": 5150 }, { "epoch": 10.302, "grad_norm": 0.9719526171684265, "learning_rate": 2e-05, "loss": 0.04734886, "step": 5151 }, { "epoch": 10.304, "grad_norm": 1.6227507591247559, "learning_rate": 2e-05, "loss": 0.04560577, "step": 5152 }, { "epoch": 10.306, "grad_norm": 1.794191837310791, "learning_rate": 2e-05, "loss": 0.0505556, "step": 5153 }, { "epoch": 10.308, "grad_norm": 1.151733160018921, "learning_rate": 2e-05, "loss": 0.04367539, "step": 5154 }, { "epoch": 10.31, "grad_norm": 1.6733118295669556, "learning_rate": 2e-05, "loss": 0.04313629, "step": 5155 }, { "epoch": 10.312, "grad_norm": 1.6936205625534058, "learning_rate": 2e-05, "loss": 0.0437565, "step": 5156 }, { "epoch": 10.314, "grad_norm": 2.5626566410064697, "learning_rate": 2e-05, "loss": 0.05160926, "step": 5157 }, { "epoch": 10.316, "grad_norm": 1.853179693222046, "learning_rate": 2e-05, "loss": 0.03983745, "step": 5158 }, { "epoch": 10.318, "grad_norm": 1.5023093223571777, "learning_rate": 2e-05, "loss": 0.03771558, "step": 5159 }, { "epoch": 10.32, "grad_norm": 1.9019720554351807, "learning_rate": 2e-05, "loss": 0.06279356, "step": 5160 }, { "epoch": 10.322, "grad_norm": 2.2894985675811768, "learning_rate": 2e-05, "loss": 0.05818196, "step": 5161 }, { "epoch": 10.324, "grad_norm": 1.7504979372024536, "learning_rate": 2e-05, "loss": 0.06713301, "step": 5162 }, { "epoch": 10.326, "grad_norm": 1.562462329864502, "learning_rate": 2e-05, "loss": 0.05433023, "step": 5163 }, { "epoch": 10.328, "grad_norm": 1.9881681203842163, "learning_rate": 2e-05, "loss": 0.04447364, "step": 5164 }, { "epoch": 10.33, "grad_norm": 1.4158987998962402, "learning_rate": 2e-05, "loss": 0.03921591, "step": 5165 }, { "epoch": 10.332, "grad_norm": 2.0077438354492188, "learning_rate": 2e-05, "loss": 0.05047126, "step": 5166 }, { "epoch": 10.334, "grad_norm": 1.2142159938812256, "learning_rate": 2e-05, "loss": 0.04545286, "step": 5167 }, { "epoch": 10.336, "grad_norm": 1.5748765468597412, "learning_rate": 2e-05, "loss": 0.04870406, "step": 5168 }, { "epoch": 10.338, "grad_norm": 0.9389767050743103, "learning_rate": 2e-05, "loss": 0.03963982, "step": 5169 }, { "epoch": 10.34, "grad_norm": 1.6009360551834106, "learning_rate": 2e-05, "loss": 0.06941339, "step": 5170 }, { "epoch": 10.342, "grad_norm": 1.1973956823349, "learning_rate": 2e-05, "loss": 0.04598391, "step": 5171 }, { "epoch": 10.344, "grad_norm": 2.3799076080322266, "learning_rate": 2e-05, "loss": 0.05610751, "step": 5172 }, { "epoch": 10.346, "grad_norm": 0.7986219525337219, "learning_rate": 2e-05, "loss": 0.02260351, "step": 5173 }, { "epoch": 10.348, "grad_norm": 1.5505019426345825, "learning_rate": 2e-05, "loss": 0.06113249, "step": 5174 }, { "epoch": 10.35, "grad_norm": 0.9721316695213318, "learning_rate": 2e-05, "loss": 0.03533325, "step": 5175 }, { "epoch": 10.352, "grad_norm": 1.9684669971466064, "learning_rate": 2e-05, "loss": 0.03828597, "step": 5176 }, { "epoch": 10.354, "grad_norm": 1.580595850944519, "learning_rate": 2e-05, "loss": 0.0529974, "step": 5177 }, { "epoch": 10.356, "grad_norm": 2.088832378387451, "learning_rate": 2e-05, "loss": 0.08070586, "step": 5178 }, { "epoch": 10.358, "grad_norm": 1.4601712226867676, "learning_rate": 2e-05, "loss": 0.04337689, "step": 5179 }, { "epoch": 10.36, "grad_norm": 2.2250285148620605, "learning_rate": 2e-05, "loss": 0.05152196, "step": 5180 }, { "epoch": 10.362, "grad_norm": 2.853736162185669, "learning_rate": 2e-05, "loss": 0.03956318, "step": 5181 }, { "epoch": 10.364, "grad_norm": 1.700832724571228, "learning_rate": 2e-05, "loss": 0.06116625, "step": 5182 }, { "epoch": 10.366, "grad_norm": 1.7752611637115479, "learning_rate": 2e-05, "loss": 0.04830565, "step": 5183 }, { "epoch": 10.368, "grad_norm": 1.9338951110839844, "learning_rate": 2e-05, "loss": 0.04877048, "step": 5184 }, { "epoch": 10.37, "grad_norm": 1.2155956029891968, "learning_rate": 2e-05, "loss": 0.05004797, "step": 5185 }, { "epoch": 10.372, "grad_norm": 1.5574666261672974, "learning_rate": 2e-05, "loss": 0.04321909, "step": 5186 }, { "epoch": 10.374, "grad_norm": 1.8880034685134888, "learning_rate": 2e-05, "loss": 0.05173431, "step": 5187 }, { "epoch": 10.376, "grad_norm": 1.1397514343261719, "learning_rate": 2e-05, "loss": 0.05142686, "step": 5188 }, { "epoch": 10.378, "grad_norm": 1.3969523906707764, "learning_rate": 2e-05, "loss": 0.04138109, "step": 5189 }, { "epoch": 10.38, "grad_norm": 1.154222011566162, "learning_rate": 2e-05, "loss": 0.05076398, "step": 5190 }, { "epoch": 10.382, "grad_norm": 1.415150761604309, "learning_rate": 2e-05, "loss": 0.04445144, "step": 5191 }, { "epoch": 10.384, "grad_norm": 1.0494834184646606, "learning_rate": 2e-05, "loss": 0.0439861, "step": 5192 }, { "epoch": 10.386, "grad_norm": 0.9817904829978943, "learning_rate": 2e-05, "loss": 0.04180618, "step": 5193 }, { "epoch": 10.388, "grad_norm": 1.0904024839401245, "learning_rate": 2e-05, "loss": 0.03312543, "step": 5194 }, { "epoch": 10.39, "grad_norm": 1.0784250497817993, "learning_rate": 2e-05, "loss": 0.0532885, "step": 5195 }, { "epoch": 10.392, "grad_norm": 1.3370542526245117, "learning_rate": 2e-05, "loss": 0.0406116, "step": 5196 }, { "epoch": 10.394, "grad_norm": 1.8610306978225708, "learning_rate": 2e-05, "loss": 0.05870312, "step": 5197 }, { "epoch": 10.396, "grad_norm": 0.9820883870124817, "learning_rate": 2e-05, "loss": 0.03643541, "step": 5198 }, { "epoch": 10.398, "grad_norm": 3.199398994445801, "learning_rate": 2e-05, "loss": 0.06282803, "step": 5199 }, { "epoch": 10.4, "grad_norm": 1.6163692474365234, "learning_rate": 2e-05, "loss": 0.0523924, "step": 5200 }, { "epoch": 10.402, "grad_norm": 1.747156023979187, "learning_rate": 2e-05, "loss": 0.04142829, "step": 5201 }, { "epoch": 10.404, "grad_norm": 1.9031070470809937, "learning_rate": 2e-05, "loss": 0.04023085, "step": 5202 }, { "epoch": 10.406, "grad_norm": 1.4705113172531128, "learning_rate": 2e-05, "loss": 0.06596102, "step": 5203 }, { "epoch": 10.408, "grad_norm": 1.728999137878418, "learning_rate": 2e-05, "loss": 0.04251568, "step": 5204 }, { "epoch": 10.41, "grad_norm": 1.6319468021392822, "learning_rate": 2e-05, "loss": 0.04758106, "step": 5205 }, { "epoch": 10.412, "grad_norm": 1.1645108461380005, "learning_rate": 2e-05, "loss": 0.05253697, "step": 5206 }, { "epoch": 10.414, "grad_norm": 0.9445511102676392, "learning_rate": 2e-05, "loss": 0.02877699, "step": 5207 }, { "epoch": 10.416, "grad_norm": 1.4078420400619507, "learning_rate": 2e-05, "loss": 0.0487856, "step": 5208 }, { "epoch": 10.418, "grad_norm": 1.6007351875305176, "learning_rate": 2e-05, "loss": 0.05624373, "step": 5209 }, { "epoch": 10.42, "grad_norm": 0.9887979626655579, "learning_rate": 2e-05, "loss": 0.03121101, "step": 5210 }, { "epoch": 10.422, "grad_norm": 1.2713853120803833, "learning_rate": 2e-05, "loss": 0.03617209, "step": 5211 }, { "epoch": 10.424, "grad_norm": 1.2160078287124634, "learning_rate": 2e-05, "loss": 0.04750057, "step": 5212 }, { "epoch": 10.426, "grad_norm": 0.7348010540008545, "learning_rate": 2e-05, "loss": 0.0270355, "step": 5213 }, { "epoch": 10.428, "grad_norm": 1.683485507965088, "learning_rate": 2e-05, "loss": 0.05139549, "step": 5214 }, { "epoch": 10.43, "grad_norm": 2.029883623123169, "learning_rate": 2e-05, "loss": 0.04456969, "step": 5215 }, { "epoch": 10.432, "grad_norm": 1.4717226028442383, "learning_rate": 2e-05, "loss": 0.04839714, "step": 5216 }, { "epoch": 10.434, "grad_norm": 1.2671825885772705, "learning_rate": 2e-05, "loss": 0.04938861, "step": 5217 }, { "epoch": 10.436, "grad_norm": 1.608881950378418, "learning_rate": 2e-05, "loss": 0.04657299, "step": 5218 }, { "epoch": 10.438, "grad_norm": 1.2648624181747437, "learning_rate": 2e-05, "loss": 0.04671816, "step": 5219 }, { "epoch": 10.44, "grad_norm": 1.1300170421600342, "learning_rate": 2e-05, "loss": 0.04137243, "step": 5220 }, { "epoch": 10.442, "grad_norm": 3.460570812225342, "learning_rate": 2e-05, "loss": 0.04998839, "step": 5221 }, { "epoch": 10.444, "grad_norm": 2.094118118286133, "learning_rate": 2e-05, "loss": 0.05202382, "step": 5222 }, { "epoch": 10.446, "grad_norm": 1.0621201992034912, "learning_rate": 2e-05, "loss": 0.04093165, "step": 5223 }, { "epoch": 10.448, "grad_norm": 1.3191083669662476, "learning_rate": 2e-05, "loss": 0.05733116, "step": 5224 }, { "epoch": 10.45, "grad_norm": 1.8174514770507812, "learning_rate": 2e-05, "loss": 0.04964017, "step": 5225 }, { "epoch": 10.452, "grad_norm": 1.4243451356887817, "learning_rate": 2e-05, "loss": 0.05140733, "step": 5226 }, { "epoch": 10.454, "grad_norm": 1.5714677572250366, "learning_rate": 2e-05, "loss": 0.04012167, "step": 5227 }, { "epoch": 10.456, "grad_norm": 1.3298113346099854, "learning_rate": 2e-05, "loss": 0.04534006, "step": 5228 }, { "epoch": 10.458, "grad_norm": 1.4573249816894531, "learning_rate": 2e-05, "loss": 0.03835049, "step": 5229 }, { "epoch": 10.46, "grad_norm": 1.0268255472183228, "learning_rate": 2e-05, "loss": 0.03819286, "step": 5230 }, { "epoch": 10.462, "grad_norm": 1.2866380214691162, "learning_rate": 2e-05, "loss": 0.05413576, "step": 5231 }, { "epoch": 10.464, "grad_norm": 1.5915695428848267, "learning_rate": 2e-05, "loss": 0.0433933, "step": 5232 }, { "epoch": 10.466, "grad_norm": 1.1373809576034546, "learning_rate": 2e-05, "loss": 0.03938916, "step": 5233 }, { "epoch": 10.468, "grad_norm": 1.1512118577957153, "learning_rate": 2e-05, "loss": 0.03857891, "step": 5234 }, { "epoch": 10.47, "grad_norm": 3.0995097160339355, "learning_rate": 2e-05, "loss": 0.04354642, "step": 5235 }, { "epoch": 10.472, "grad_norm": 1.3437062501907349, "learning_rate": 2e-05, "loss": 0.05610079, "step": 5236 }, { "epoch": 10.474, "grad_norm": 1.5692955255508423, "learning_rate": 2e-05, "loss": 0.06778474, "step": 5237 }, { "epoch": 10.475999999999999, "grad_norm": 1.0118943452835083, "learning_rate": 2e-05, "loss": 0.04095022, "step": 5238 }, { "epoch": 10.478, "grad_norm": 1.2590157985687256, "learning_rate": 2e-05, "loss": 0.04719796, "step": 5239 }, { "epoch": 10.48, "grad_norm": 1.3623627424240112, "learning_rate": 2e-05, "loss": 0.05637202, "step": 5240 }, { "epoch": 10.482, "grad_norm": 1.4648089408874512, "learning_rate": 2e-05, "loss": 0.04508829, "step": 5241 }, { "epoch": 10.484, "grad_norm": 2.1232337951660156, "learning_rate": 2e-05, "loss": 0.06392019, "step": 5242 }, { "epoch": 10.486, "grad_norm": 0.9321593046188354, "learning_rate": 2e-05, "loss": 0.03444607, "step": 5243 }, { "epoch": 10.488, "grad_norm": 2.0859713554382324, "learning_rate": 2e-05, "loss": 0.05058786, "step": 5244 }, { "epoch": 10.49, "grad_norm": 2.27349853515625, "learning_rate": 2e-05, "loss": 0.0376498, "step": 5245 }, { "epoch": 10.492, "grad_norm": 1.40786612033844, "learning_rate": 2e-05, "loss": 0.0583963, "step": 5246 }, { "epoch": 10.494, "grad_norm": 0.9418710470199585, "learning_rate": 2e-05, "loss": 0.03963462, "step": 5247 }, { "epoch": 10.496, "grad_norm": 1.2764949798583984, "learning_rate": 2e-05, "loss": 0.04944011, "step": 5248 }, { "epoch": 10.498, "grad_norm": 1.3281084299087524, "learning_rate": 2e-05, "loss": 0.0566191, "step": 5249 }, { "epoch": 10.5, "grad_norm": 1.1722790002822876, "learning_rate": 2e-05, "loss": 0.04191694, "step": 5250 }, { "epoch": 10.502, "grad_norm": 2.043038845062256, "learning_rate": 2e-05, "loss": 0.05324342, "step": 5251 }, { "epoch": 10.504, "grad_norm": 1.1352224349975586, "learning_rate": 2e-05, "loss": 0.03782788, "step": 5252 }, { "epoch": 10.506, "grad_norm": 0.9655911326408386, "learning_rate": 2e-05, "loss": 0.03421069, "step": 5253 }, { "epoch": 10.508, "grad_norm": 1.2548446655273438, "learning_rate": 2e-05, "loss": 0.03868668, "step": 5254 }, { "epoch": 10.51, "grad_norm": 1.6412397623062134, "learning_rate": 2e-05, "loss": 0.04071921, "step": 5255 }, { "epoch": 10.512, "grad_norm": 1.2131768465042114, "learning_rate": 2e-05, "loss": 0.03983287, "step": 5256 }, { "epoch": 10.514, "grad_norm": 0.9349754452705383, "learning_rate": 2e-05, "loss": 0.0296357, "step": 5257 }, { "epoch": 10.516, "grad_norm": 1.1567655801773071, "learning_rate": 2e-05, "loss": 0.04256351, "step": 5258 }, { "epoch": 10.518, "grad_norm": 1.8970359563827515, "learning_rate": 2e-05, "loss": 0.05064099, "step": 5259 }, { "epoch": 10.52, "grad_norm": 1.6564329862594604, "learning_rate": 2e-05, "loss": 0.04537952, "step": 5260 }, { "epoch": 10.522, "grad_norm": 1.5826187133789062, "learning_rate": 2e-05, "loss": 0.05799884, "step": 5261 }, { "epoch": 10.524000000000001, "grad_norm": 1.548323154449463, "learning_rate": 2e-05, "loss": 0.04485494, "step": 5262 }, { "epoch": 10.526, "grad_norm": 1.3059474229812622, "learning_rate": 2e-05, "loss": 0.03964861, "step": 5263 }, { "epoch": 10.528, "grad_norm": 1.6953895092010498, "learning_rate": 2e-05, "loss": 0.05630913, "step": 5264 }, { "epoch": 10.53, "grad_norm": 1.119752049446106, "learning_rate": 2e-05, "loss": 0.04040305, "step": 5265 }, { "epoch": 10.532, "grad_norm": 1.5334519147872925, "learning_rate": 2e-05, "loss": 0.03204807, "step": 5266 }, { "epoch": 10.534, "grad_norm": 1.642197847366333, "learning_rate": 2e-05, "loss": 0.05770136, "step": 5267 }, { "epoch": 10.536, "grad_norm": 1.3823909759521484, "learning_rate": 2e-05, "loss": 0.04620174, "step": 5268 }, { "epoch": 10.538, "grad_norm": 1.1530804634094238, "learning_rate": 2e-05, "loss": 0.04625996, "step": 5269 }, { "epoch": 10.54, "grad_norm": 1.3294037580490112, "learning_rate": 2e-05, "loss": 0.03408037, "step": 5270 }, { "epoch": 10.542, "grad_norm": 1.531705617904663, "learning_rate": 2e-05, "loss": 0.04315554, "step": 5271 }, { "epoch": 10.544, "grad_norm": 1.0378473997116089, "learning_rate": 2e-05, "loss": 0.04161209, "step": 5272 }, { "epoch": 10.546, "grad_norm": 3.491225004196167, "learning_rate": 2e-05, "loss": 0.05236822, "step": 5273 }, { "epoch": 10.548, "grad_norm": 2.105463743209839, "learning_rate": 2e-05, "loss": 0.06430991, "step": 5274 }, { "epoch": 10.55, "grad_norm": 1.696533441543579, "learning_rate": 2e-05, "loss": 0.04670978, "step": 5275 }, { "epoch": 10.552, "grad_norm": 1.092452883720398, "learning_rate": 2e-05, "loss": 0.03668426, "step": 5276 }, { "epoch": 10.554, "grad_norm": 1.3232334852218628, "learning_rate": 2e-05, "loss": 0.04462042, "step": 5277 }, { "epoch": 10.556000000000001, "grad_norm": 1.3443771600723267, "learning_rate": 2e-05, "loss": 0.05517515, "step": 5278 }, { "epoch": 10.558, "grad_norm": 1.408677577972412, "learning_rate": 2e-05, "loss": 0.03840728, "step": 5279 }, { "epoch": 10.56, "grad_norm": 2.1185338497161865, "learning_rate": 2e-05, "loss": 0.05658809, "step": 5280 }, { "epoch": 10.562, "grad_norm": 1.205048680305481, "learning_rate": 2e-05, "loss": 0.04198525, "step": 5281 }, { "epoch": 10.564, "grad_norm": 2.283353328704834, "learning_rate": 2e-05, "loss": 0.04845209, "step": 5282 }, { "epoch": 10.566, "grad_norm": 1.6310850381851196, "learning_rate": 2e-05, "loss": 0.05775314, "step": 5283 }, { "epoch": 10.568, "grad_norm": 1.6950896978378296, "learning_rate": 2e-05, "loss": 0.05985184, "step": 5284 }, { "epoch": 10.57, "grad_norm": 2.2704873085021973, "learning_rate": 2e-05, "loss": 0.04732142, "step": 5285 }, { "epoch": 10.572, "grad_norm": 0.9121596217155457, "learning_rate": 2e-05, "loss": 0.02218048, "step": 5286 }, { "epoch": 10.574, "grad_norm": 1.3229914903640747, "learning_rate": 2e-05, "loss": 0.03729546, "step": 5287 }, { "epoch": 10.576, "grad_norm": 1.333988904953003, "learning_rate": 2e-05, "loss": 0.03841446, "step": 5288 }, { "epoch": 10.578, "grad_norm": 1.2878012657165527, "learning_rate": 2e-05, "loss": 0.06038348, "step": 5289 }, { "epoch": 10.58, "grad_norm": 1.76193106174469, "learning_rate": 2e-05, "loss": 0.05278437, "step": 5290 }, { "epoch": 10.582, "grad_norm": 2.2328977584838867, "learning_rate": 2e-05, "loss": 0.06430198, "step": 5291 }, { "epoch": 10.584, "grad_norm": 3.7427806854248047, "learning_rate": 2e-05, "loss": 0.04671086, "step": 5292 }, { "epoch": 10.586, "grad_norm": 1.1657195091247559, "learning_rate": 2e-05, "loss": 0.05584796, "step": 5293 }, { "epoch": 10.588, "grad_norm": 1.4417659044265747, "learning_rate": 2e-05, "loss": 0.04860631, "step": 5294 }, { "epoch": 10.59, "grad_norm": 2.0809431076049805, "learning_rate": 2e-05, "loss": 0.05853728, "step": 5295 }, { "epoch": 10.592, "grad_norm": 1.4285987615585327, "learning_rate": 2e-05, "loss": 0.04397691, "step": 5296 }, { "epoch": 10.594, "grad_norm": 3.4192137718200684, "learning_rate": 2e-05, "loss": 0.05258224, "step": 5297 }, { "epoch": 10.596, "grad_norm": 1.5991066694259644, "learning_rate": 2e-05, "loss": 0.05783376, "step": 5298 }, { "epoch": 10.598, "grad_norm": 2.19337797164917, "learning_rate": 2e-05, "loss": 0.04692521, "step": 5299 }, { "epoch": 10.6, "grad_norm": 0.9187573194503784, "learning_rate": 2e-05, "loss": 0.03587249, "step": 5300 }, { "epoch": 10.602, "grad_norm": 1.4692615270614624, "learning_rate": 2e-05, "loss": 0.04841263, "step": 5301 }, { "epoch": 10.604, "grad_norm": 1.0790823698043823, "learning_rate": 2e-05, "loss": 0.03448083, "step": 5302 }, { "epoch": 10.606, "grad_norm": 1.7811875343322754, "learning_rate": 2e-05, "loss": 0.04472546, "step": 5303 }, { "epoch": 10.608, "grad_norm": 1.7685264348983765, "learning_rate": 2e-05, "loss": 0.06594714, "step": 5304 }, { "epoch": 10.61, "grad_norm": 1.3392972946166992, "learning_rate": 2e-05, "loss": 0.0496632, "step": 5305 }, { "epoch": 10.612, "grad_norm": 1.453410029411316, "learning_rate": 2e-05, "loss": 0.05469693, "step": 5306 }, { "epoch": 10.614, "grad_norm": 1.5418132543563843, "learning_rate": 2e-05, "loss": 0.04416192, "step": 5307 }, { "epoch": 10.616, "grad_norm": 1.4389318227767944, "learning_rate": 2e-05, "loss": 0.03685165, "step": 5308 }, { "epoch": 10.618, "grad_norm": 1.4251545667648315, "learning_rate": 2e-05, "loss": 0.04970672, "step": 5309 }, { "epoch": 10.62, "grad_norm": 1.540438175201416, "learning_rate": 2e-05, "loss": 0.04326715, "step": 5310 }, { "epoch": 10.622, "grad_norm": 1.6067397594451904, "learning_rate": 2e-05, "loss": 0.04353811, "step": 5311 }, { "epoch": 10.624, "grad_norm": 1.7037297487258911, "learning_rate": 2e-05, "loss": 0.04110853, "step": 5312 }, { "epoch": 10.626, "grad_norm": 0.9400355815887451, "learning_rate": 2e-05, "loss": 0.03824214, "step": 5313 }, { "epoch": 10.628, "grad_norm": 1.2696064710617065, "learning_rate": 2e-05, "loss": 0.04225724, "step": 5314 }, { "epoch": 10.63, "grad_norm": 1.627137541770935, "learning_rate": 2e-05, "loss": 0.04458835, "step": 5315 }, { "epoch": 10.632, "grad_norm": 1.3326910734176636, "learning_rate": 2e-05, "loss": 0.04437424, "step": 5316 }, { "epoch": 10.634, "grad_norm": 1.0670576095581055, "learning_rate": 2e-05, "loss": 0.03998389, "step": 5317 }, { "epoch": 10.636, "grad_norm": 1.9328669309616089, "learning_rate": 2e-05, "loss": 0.05344262, "step": 5318 }, { "epoch": 10.638, "grad_norm": 1.3667006492614746, "learning_rate": 2e-05, "loss": 0.0504495, "step": 5319 }, { "epoch": 10.64, "grad_norm": 1.109780192375183, "learning_rate": 2e-05, "loss": 0.04091533, "step": 5320 }, { "epoch": 10.642, "grad_norm": 1.7905231714248657, "learning_rate": 2e-05, "loss": 0.06527205, "step": 5321 }, { "epoch": 10.644, "grad_norm": 1.552513837814331, "learning_rate": 2e-05, "loss": 0.03614276, "step": 5322 }, { "epoch": 10.646, "grad_norm": 1.3932260274887085, "learning_rate": 2e-05, "loss": 0.0487789, "step": 5323 }, { "epoch": 10.648, "grad_norm": 3.5807721614837646, "learning_rate": 2e-05, "loss": 0.05817016, "step": 5324 }, { "epoch": 10.65, "grad_norm": 1.8861697912216187, "learning_rate": 2e-05, "loss": 0.04461693, "step": 5325 }, { "epoch": 10.652, "grad_norm": 1.0382351875305176, "learning_rate": 2e-05, "loss": 0.03932602, "step": 5326 }, { "epoch": 10.654, "grad_norm": 1.748880386352539, "learning_rate": 2e-05, "loss": 0.03908409, "step": 5327 }, { "epoch": 10.656, "grad_norm": 1.3001683950424194, "learning_rate": 2e-05, "loss": 0.04414398, "step": 5328 }, { "epoch": 10.658, "grad_norm": 2.74198842048645, "learning_rate": 2e-05, "loss": 0.0632739, "step": 5329 }, { "epoch": 10.66, "grad_norm": 2.58345627784729, "learning_rate": 2e-05, "loss": 0.05152154, "step": 5330 }, { "epoch": 10.662, "grad_norm": 1.9050062894821167, "learning_rate": 2e-05, "loss": 0.04974983, "step": 5331 }, { "epoch": 10.664, "grad_norm": 1.484490990638733, "learning_rate": 2e-05, "loss": 0.04495884, "step": 5332 }, { "epoch": 10.666, "grad_norm": 1.979741096496582, "learning_rate": 2e-05, "loss": 0.06027087, "step": 5333 }, { "epoch": 10.668, "grad_norm": 1.2895029783248901, "learning_rate": 2e-05, "loss": 0.04649916, "step": 5334 }, { "epoch": 10.67, "grad_norm": 1.1832213401794434, "learning_rate": 2e-05, "loss": 0.03705991, "step": 5335 }, { "epoch": 10.672, "grad_norm": 1.044334053993225, "learning_rate": 2e-05, "loss": 0.03018861, "step": 5336 }, { "epoch": 10.674, "grad_norm": 1.0078890323638916, "learning_rate": 2e-05, "loss": 0.03941938, "step": 5337 }, { "epoch": 10.676, "grad_norm": 0.9854322075843811, "learning_rate": 2e-05, "loss": 0.03826457, "step": 5338 }, { "epoch": 10.678, "grad_norm": 1.7289786338806152, "learning_rate": 2e-05, "loss": 0.04666128, "step": 5339 }, { "epoch": 10.68, "grad_norm": 1.557122826576233, "learning_rate": 2e-05, "loss": 0.05393162, "step": 5340 }, { "epoch": 10.682, "grad_norm": 1.5376005172729492, "learning_rate": 2e-05, "loss": 0.04530572, "step": 5341 }, { "epoch": 10.684, "grad_norm": 1.469239354133606, "learning_rate": 2e-05, "loss": 0.04105308, "step": 5342 }, { "epoch": 10.686, "grad_norm": 1.0721392631530762, "learning_rate": 2e-05, "loss": 0.04845088, "step": 5343 }, { "epoch": 10.688, "grad_norm": 2.661815881729126, "learning_rate": 2e-05, "loss": 0.0507527, "step": 5344 }, { "epoch": 10.69, "grad_norm": 1.695129632949829, "learning_rate": 2e-05, "loss": 0.04042559, "step": 5345 }, { "epoch": 10.692, "grad_norm": 1.3462963104248047, "learning_rate": 2e-05, "loss": 0.03292784, "step": 5346 }, { "epoch": 10.693999999999999, "grad_norm": 1.1509976387023926, "learning_rate": 2e-05, "loss": 0.03733511, "step": 5347 }, { "epoch": 10.696, "grad_norm": 2.0126984119415283, "learning_rate": 2e-05, "loss": 0.06322368, "step": 5348 }, { "epoch": 10.698, "grad_norm": 1.7698595523834229, "learning_rate": 2e-05, "loss": 0.03852696, "step": 5349 }, { "epoch": 10.7, "grad_norm": 1.4363954067230225, "learning_rate": 2e-05, "loss": 0.05684633, "step": 5350 }, { "epoch": 10.702, "grad_norm": 2.493837833404541, "learning_rate": 2e-05, "loss": 0.07636867, "step": 5351 }, { "epoch": 10.704, "grad_norm": 1.069647192955017, "learning_rate": 2e-05, "loss": 0.04397136, "step": 5352 }, { "epoch": 10.706, "grad_norm": 1.1911834478378296, "learning_rate": 2e-05, "loss": 0.03940628, "step": 5353 }, { "epoch": 10.708, "grad_norm": 2.0640709400177, "learning_rate": 2e-05, "loss": 0.05594683, "step": 5354 }, { "epoch": 10.71, "grad_norm": 1.3439741134643555, "learning_rate": 2e-05, "loss": 0.04540187, "step": 5355 }, { "epoch": 10.712, "grad_norm": 1.9055041074752808, "learning_rate": 2e-05, "loss": 0.07221976, "step": 5356 }, { "epoch": 10.714, "grad_norm": 0.98797208070755, "learning_rate": 2e-05, "loss": 0.03278482, "step": 5357 }, { "epoch": 10.716, "grad_norm": 1.5830025672912598, "learning_rate": 2e-05, "loss": 0.04591039, "step": 5358 }, { "epoch": 10.718, "grad_norm": 1.6753437519073486, "learning_rate": 2e-05, "loss": 0.03800264, "step": 5359 }, { "epoch": 10.72, "grad_norm": 1.1719242334365845, "learning_rate": 2e-05, "loss": 0.03649624, "step": 5360 }, { "epoch": 10.722, "grad_norm": 2.2759554386138916, "learning_rate": 2e-05, "loss": 0.03482027, "step": 5361 }, { "epoch": 10.724, "grad_norm": 1.9583686590194702, "learning_rate": 2e-05, "loss": 0.04981028, "step": 5362 }, { "epoch": 10.725999999999999, "grad_norm": 1.8506280183792114, "learning_rate": 2e-05, "loss": 0.05849312, "step": 5363 }, { "epoch": 10.728, "grad_norm": 2.2492103576660156, "learning_rate": 2e-05, "loss": 0.05081097, "step": 5364 }, { "epoch": 10.73, "grad_norm": 1.2220958471298218, "learning_rate": 2e-05, "loss": 0.04709722, "step": 5365 }, { "epoch": 10.732, "grad_norm": 1.1159101724624634, "learning_rate": 2e-05, "loss": 0.03507699, "step": 5366 }, { "epoch": 10.734, "grad_norm": 1.2475676536560059, "learning_rate": 2e-05, "loss": 0.04950461, "step": 5367 }, { "epoch": 10.736, "grad_norm": 1.0842057466506958, "learning_rate": 2e-05, "loss": 0.03558665, "step": 5368 }, { "epoch": 10.738, "grad_norm": 2.7411975860595703, "learning_rate": 2e-05, "loss": 0.06068648, "step": 5369 }, { "epoch": 10.74, "grad_norm": 1.292697787284851, "learning_rate": 2e-05, "loss": 0.05118963, "step": 5370 }, { "epoch": 10.742, "grad_norm": 1.4311535358428955, "learning_rate": 2e-05, "loss": 0.04605145, "step": 5371 }, { "epoch": 10.744, "grad_norm": 1.017180323600769, "learning_rate": 2e-05, "loss": 0.03107076, "step": 5372 }, { "epoch": 10.746, "grad_norm": 1.7383993864059448, "learning_rate": 2e-05, "loss": 0.05666218, "step": 5373 }, { "epoch": 10.748, "grad_norm": 1.4858373403549194, "learning_rate": 2e-05, "loss": 0.0581267, "step": 5374 }, { "epoch": 10.75, "grad_norm": 1.163069248199463, "learning_rate": 2e-05, "loss": 0.0323478, "step": 5375 }, { "epoch": 10.752, "grad_norm": 1.011885404586792, "learning_rate": 2e-05, "loss": 0.04137402, "step": 5376 }, { "epoch": 10.754, "grad_norm": 1.1873332262039185, "learning_rate": 2e-05, "loss": 0.04399034, "step": 5377 }, { "epoch": 10.756, "grad_norm": 1.268231749534607, "learning_rate": 2e-05, "loss": 0.03441441, "step": 5378 }, { "epoch": 10.758, "grad_norm": 1.192875623703003, "learning_rate": 2e-05, "loss": 0.04823461, "step": 5379 }, { "epoch": 10.76, "grad_norm": 1.4787641763687134, "learning_rate": 2e-05, "loss": 0.06237837, "step": 5380 }, { "epoch": 10.762, "grad_norm": 1.1821213960647583, "learning_rate": 2e-05, "loss": 0.04152807, "step": 5381 }, { "epoch": 10.764, "grad_norm": 1.5032027959823608, "learning_rate": 2e-05, "loss": 0.0385498, "step": 5382 }, { "epoch": 10.766, "grad_norm": 1.004870891571045, "learning_rate": 2e-05, "loss": 0.03873428, "step": 5383 }, { "epoch": 10.768, "grad_norm": 1.1021831035614014, "learning_rate": 2e-05, "loss": 0.0336751, "step": 5384 }, { "epoch": 10.77, "grad_norm": 1.8793764114379883, "learning_rate": 2e-05, "loss": 0.04783071, "step": 5385 }, { "epoch": 10.772, "grad_norm": 1.724388599395752, "learning_rate": 2e-05, "loss": 0.04026482, "step": 5386 }, { "epoch": 10.774000000000001, "grad_norm": 2.2936954498291016, "learning_rate": 2e-05, "loss": 0.06052912, "step": 5387 }, { "epoch": 10.776, "grad_norm": 1.5764778852462769, "learning_rate": 2e-05, "loss": 0.05990978, "step": 5388 }, { "epoch": 10.778, "grad_norm": 1.3720786571502686, "learning_rate": 2e-05, "loss": 0.05243789, "step": 5389 }, { "epoch": 10.78, "grad_norm": 1.622320532798767, "learning_rate": 2e-05, "loss": 0.04683674, "step": 5390 }, { "epoch": 10.782, "grad_norm": 1.5055615901947021, "learning_rate": 2e-05, "loss": 0.04304944, "step": 5391 }, { "epoch": 10.784, "grad_norm": 2.1682701110839844, "learning_rate": 2e-05, "loss": 0.04306971, "step": 5392 }, { "epoch": 10.786, "grad_norm": 3.5964715480804443, "learning_rate": 2e-05, "loss": 0.07316145, "step": 5393 }, { "epoch": 10.788, "grad_norm": 1.3190217018127441, "learning_rate": 2e-05, "loss": 0.03355244, "step": 5394 }, { "epoch": 10.79, "grad_norm": 1.0943877696990967, "learning_rate": 2e-05, "loss": 0.03827272, "step": 5395 }, { "epoch": 10.792, "grad_norm": 1.4639499187469482, "learning_rate": 2e-05, "loss": 0.03687127, "step": 5396 }, { "epoch": 10.794, "grad_norm": 1.4996309280395508, "learning_rate": 2e-05, "loss": 0.03592747, "step": 5397 }, { "epoch": 10.796, "grad_norm": 1.646813154220581, "learning_rate": 2e-05, "loss": 0.04305978, "step": 5398 }, { "epoch": 10.798, "grad_norm": 1.54507315158844, "learning_rate": 2e-05, "loss": 0.03623936, "step": 5399 }, { "epoch": 10.8, "grad_norm": 0.8352827429771423, "learning_rate": 2e-05, "loss": 0.03700903, "step": 5400 }, { "epoch": 10.802, "grad_norm": 1.849571704864502, "learning_rate": 2e-05, "loss": 0.03478384, "step": 5401 }, { "epoch": 10.804, "grad_norm": 1.0971314907073975, "learning_rate": 2e-05, "loss": 0.04389728, "step": 5402 }, { "epoch": 10.806000000000001, "grad_norm": 1.6528582572937012, "learning_rate": 2e-05, "loss": 0.04366919, "step": 5403 }, { "epoch": 10.808, "grad_norm": 1.0153130292892456, "learning_rate": 2e-05, "loss": 0.02811515, "step": 5404 }, { "epoch": 10.81, "grad_norm": 1.5921767950057983, "learning_rate": 2e-05, "loss": 0.04679709, "step": 5405 }, { "epoch": 10.812, "grad_norm": 1.4636269807815552, "learning_rate": 2e-05, "loss": 0.03969422, "step": 5406 }, { "epoch": 10.814, "grad_norm": 1.1644132137298584, "learning_rate": 2e-05, "loss": 0.03722133, "step": 5407 }, { "epoch": 10.816, "grad_norm": 1.545884370803833, "learning_rate": 2e-05, "loss": 0.04234442, "step": 5408 }, { "epoch": 10.818, "grad_norm": 1.323274850845337, "learning_rate": 2e-05, "loss": 0.0486748, "step": 5409 }, { "epoch": 10.82, "grad_norm": 1.6836318969726562, "learning_rate": 2e-05, "loss": 0.06580575, "step": 5410 }, { "epoch": 10.822, "grad_norm": 1.7801461219787598, "learning_rate": 2e-05, "loss": 0.04742541, "step": 5411 }, { "epoch": 10.824, "grad_norm": 0.9295609593391418, "learning_rate": 2e-05, "loss": 0.03751448, "step": 5412 }, { "epoch": 10.826, "grad_norm": 1.0475472211837769, "learning_rate": 2e-05, "loss": 0.04267604, "step": 5413 }, { "epoch": 10.828, "grad_norm": 1.7485098838806152, "learning_rate": 2e-05, "loss": 0.06050641, "step": 5414 }, { "epoch": 10.83, "grad_norm": 1.3877590894699097, "learning_rate": 2e-05, "loss": 0.03360231, "step": 5415 }, { "epoch": 10.832, "grad_norm": 2.89982533454895, "learning_rate": 2e-05, "loss": 0.04542317, "step": 5416 }, { "epoch": 10.834, "grad_norm": 2.000558376312256, "learning_rate": 2e-05, "loss": 0.05908286, "step": 5417 }, { "epoch": 10.836, "grad_norm": 1.636086106300354, "learning_rate": 2e-05, "loss": 0.05142832, "step": 5418 }, { "epoch": 10.838, "grad_norm": 2.168187379837036, "learning_rate": 2e-05, "loss": 0.05235282, "step": 5419 }, { "epoch": 10.84, "grad_norm": 3.266932249069214, "learning_rate": 2e-05, "loss": 0.04076022, "step": 5420 }, { "epoch": 10.842, "grad_norm": 2.273758888244629, "learning_rate": 2e-05, "loss": 0.07735814, "step": 5421 }, { "epoch": 10.844, "grad_norm": 1.3135398626327515, "learning_rate": 2e-05, "loss": 0.03994468, "step": 5422 }, { "epoch": 10.846, "grad_norm": 1.5068111419677734, "learning_rate": 2e-05, "loss": 0.04119268, "step": 5423 }, { "epoch": 10.848, "grad_norm": 1.700152039527893, "learning_rate": 2e-05, "loss": 0.04611378, "step": 5424 }, { "epoch": 10.85, "grad_norm": 1.623658537864685, "learning_rate": 2e-05, "loss": 0.04831513, "step": 5425 }, { "epoch": 10.852, "grad_norm": 1.3592655658721924, "learning_rate": 2e-05, "loss": 0.04930335, "step": 5426 }, { "epoch": 10.854, "grad_norm": 1.4724581241607666, "learning_rate": 2e-05, "loss": 0.04035, "step": 5427 }, { "epoch": 10.856, "grad_norm": 1.9414317607879639, "learning_rate": 2e-05, "loss": 0.03376986, "step": 5428 }, { "epoch": 10.858, "grad_norm": 2.865262031555176, "learning_rate": 2e-05, "loss": 0.03764952, "step": 5429 }, { "epoch": 10.86, "grad_norm": 1.0723745822906494, "learning_rate": 2e-05, "loss": 0.03601522, "step": 5430 }, { "epoch": 10.862, "grad_norm": 1.4561656713485718, "learning_rate": 2e-05, "loss": 0.04668137, "step": 5431 }, { "epoch": 10.864, "grad_norm": 1.5199332237243652, "learning_rate": 2e-05, "loss": 0.0522556, "step": 5432 }, { "epoch": 10.866, "grad_norm": 4.452966690063477, "learning_rate": 2e-05, "loss": 0.04853038, "step": 5433 }, { "epoch": 10.868, "grad_norm": 1.0656723976135254, "learning_rate": 2e-05, "loss": 0.0335426, "step": 5434 }, { "epoch": 10.87, "grad_norm": 1.1890794038772583, "learning_rate": 2e-05, "loss": 0.05470164, "step": 5435 }, { "epoch": 10.872, "grad_norm": 1.3932714462280273, "learning_rate": 2e-05, "loss": 0.03396948, "step": 5436 }, { "epoch": 10.874, "grad_norm": 2.452359199523926, "learning_rate": 2e-05, "loss": 0.06212484, "step": 5437 }, { "epoch": 10.876, "grad_norm": 1.1089293956756592, "learning_rate": 2e-05, "loss": 0.04726985, "step": 5438 }, { "epoch": 10.878, "grad_norm": 1.3542646169662476, "learning_rate": 2e-05, "loss": 0.06407127, "step": 5439 }, { "epoch": 10.88, "grad_norm": 2.5120787620544434, "learning_rate": 2e-05, "loss": 0.05100137, "step": 5440 }, { "epoch": 10.882, "grad_norm": 1.2841778993606567, "learning_rate": 2e-05, "loss": 0.04290632, "step": 5441 }, { "epoch": 10.884, "grad_norm": 1.3860931396484375, "learning_rate": 2e-05, "loss": 0.03255866, "step": 5442 }, { "epoch": 10.886, "grad_norm": 1.5695399045944214, "learning_rate": 2e-05, "loss": 0.06652699, "step": 5443 }, { "epoch": 10.888, "grad_norm": 1.373000144958496, "learning_rate": 2e-05, "loss": 0.04752633, "step": 5444 }, { "epoch": 10.89, "grad_norm": 1.3264533281326294, "learning_rate": 2e-05, "loss": 0.03849954, "step": 5445 }, { "epoch": 10.892, "grad_norm": 1.5587037801742554, "learning_rate": 2e-05, "loss": 0.04893732, "step": 5446 }, { "epoch": 10.894, "grad_norm": 2.936749219894409, "learning_rate": 2e-05, "loss": 0.07763853, "step": 5447 }, { "epoch": 10.896, "grad_norm": 1.4607787132263184, "learning_rate": 2e-05, "loss": 0.0395316, "step": 5448 }, { "epoch": 10.898, "grad_norm": 1.377250075340271, "learning_rate": 2e-05, "loss": 0.03756543, "step": 5449 }, { "epoch": 10.9, "grad_norm": 2.3027827739715576, "learning_rate": 2e-05, "loss": 0.05122847, "step": 5450 }, { "epoch": 10.902, "grad_norm": 1.065912127494812, "learning_rate": 2e-05, "loss": 0.04124514, "step": 5451 }, { "epoch": 10.904, "grad_norm": 1.8254315853118896, "learning_rate": 2e-05, "loss": 0.05355402, "step": 5452 }, { "epoch": 10.906, "grad_norm": 2.0440783500671387, "learning_rate": 2e-05, "loss": 0.04196255, "step": 5453 }, { "epoch": 10.908, "grad_norm": 1.8828097581863403, "learning_rate": 2e-05, "loss": 0.04547044, "step": 5454 }, { "epoch": 10.91, "grad_norm": 1.5222113132476807, "learning_rate": 2e-05, "loss": 0.04373585, "step": 5455 }, { "epoch": 10.912, "grad_norm": 1.1752500534057617, "learning_rate": 2e-05, "loss": 0.03388649, "step": 5456 }, { "epoch": 10.914, "grad_norm": 1.1844501495361328, "learning_rate": 2e-05, "loss": 0.035423, "step": 5457 }, { "epoch": 10.916, "grad_norm": 1.7836790084838867, "learning_rate": 2e-05, "loss": 0.06354485, "step": 5458 }, { "epoch": 10.918, "grad_norm": 1.1966222524642944, "learning_rate": 2e-05, "loss": 0.05429721, "step": 5459 }, { "epoch": 10.92, "grad_norm": 1.4878946542739868, "learning_rate": 2e-05, "loss": 0.05581759, "step": 5460 }, { "epoch": 10.922, "grad_norm": 1.8876607418060303, "learning_rate": 2e-05, "loss": 0.04348254, "step": 5461 }, { "epoch": 10.924, "grad_norm": 1.0203397274017334, "learning_rate": 2e-05, "loss": 0.03569401, "step": 5462 }, { "epoch": 10.926, "grad_norm": 1.5369797945022583, "learning_rate": 2e-05, "loss": 0.05227324, "step": 5463 }, { "epoch": 10.928, "grad_norm": 2.961735725402832, "learning_rate": 2e-05, "loss": 0.04804858, "step": 5464 }, { "epoch": 10.93, "grad_norm": 3.0615086555480957, "learning_rate": 2e-05, "loss": 0.05689555, "step": 5465 }, { "epoch": 10.932, "grad_norm": 1.463494062423706, "learning_rate": 2e-05, "loss": 0.03982669, "step": 5466 }, { "epoch": 10.934, "grad_norm": 1.8317439556121826, "learning_rate": 2e-05, "loss": 0.06854389, "step": 5467 }, { "epoch": 10.936, "grad_norm": 1.9723395109176636, "learning_rate": 2e-05, "loss": 0.03869996, "step": 5468 }, { "epoch": 10.938, "grad_norm": 1.3843753337860107, "learning_rate": 2e-05, "loss": 0.03941353, "step": 5469 }, { "epoch": 10.94, "grad_norm": 1.3019388914108276, "learning_rate": 2e-05, "loss": 0.03932222, "step": 5470 }, { "epoch": 10.942, "grad_norm": 2.316476345062256, "learning_rate": 2e-05, "loss": 0.06251483, "step": 5471 }, { "epoch": 10.943999999999999, "grad_norm": 1.169537901878357, "learning_rate": 2e-05, "loss": 0.04718368, "step": 5472 }, { "epoch": 10.946, "grad_norm": 1.271376371383667, "learning_rate": 2e-05, "loss": 0.04028846, "step": 5473 }, { "epoch": 10.948, "grad_norm": 1.7682650089263916, "learning_rate": 2e-05, "loss": 0.0501989, "step": 5474 }, { "epoch": 10.95, "grad_norm": 1.5534263849258423, "learning_rate": 2e-05, "loss": 0.04301309, "step": 5475 }, { "epoch": 10.952, "grad_norm": 1.8759846687316895, "learning_rate": 2e-05, "loss": 0.05785131, "step": 5476 }, { "epoch": 10.954, "grad_norm": 1.1917895078659058, "learning_rate": 2e-05, "loss": 0.04767988, "step": 5477 }, { "epoch": 10.956, "grad_norm": 0.9923660159111023, "learning_rate": 2e-05, "loss": 0.03223813, "step": 5478 }, { "epoch": 10.958, "grad_norm": 1.7956323623657227, "learning_rate": 2e-05, "loss": 0.04907088, "step": 5479 }, { "epoch": 10.96, "grad_norm": 1.3423640727996826, "learning_rate": 2e-05, "loss": 0.04251694, "step": 5480 }, { "epoch": 10.962, "grad_norm": 1.2761433124542236, "learning_rate": 2e-05, "loss": 0.04906294, "step": 5481 }, { "epoch": 10.964, "grad_norm": 1.468414068222046, "learning_rate": 2e-05, "loss": 0.0428313, "step": 5482 }, { "epoch": 10.966, "grad_norm": 1.171011209487915, "learning_rate": 2e-05, "loss": 0.04272577, "step": 5483 }, { "epoch": 10.968, "grad_norm": 2.568819761276245, "learning_rate": 2e-05, "loss": 0.058103, "step": 5484 }, { "epoch": 10.97, "grad_norm": 1.6843867301940918, "learning_rate": 2e-05, "loss": 0.04480528, "step": 5485 }, { "epoch": 10.972, "grad_norm": 1.4692931175231934, "learning_rate": 2e-05, "loss": 0.05085419, "step": 5486 }, { "epoch": 10.974, "grad_norm": 3.0729751586914062, "learning_rate": 2e-05, "loss": 0.05789775, "step": 5487 }, { "epoch": 10.975999999999999, "grad_norm": 1.638168454170227, "learning_rate": 2e-05, "loss": 0.05257408, "step": 5488 }, { "epoch": 10.978, "grad_norm": 1.0241093635559082, "learning_rate": 2e-05, "loss": 0.04093259, "step": 5489 }, { "epoch": 10.98, "grad_norm": 1.4708915948867798, "learning_rate": 2e-05, "loss": 0.06025437, "step": 5490 }, { "epoch": 10.982, "grad_norm": 1.5074944496154785, "learning_rate": 2e-05, "loss": 0.05554744, "step": 5491 }, { "epoch": 10.984, "grad_norm": 0.7679497003555298, "learning_rate": 2e-05, "loss": 0.02836152, "step": 5492 }, { "epoch": 10.986, "grad_norm": 1.4160032272338867, "learning_rate": 2e-05, "loss": 0.05011186, "step": 5493 }, { "epoch": 10.988, "grad_norm": 1.655074119567871, "learning_rate": 2e-05, "loss": 0.05221368, "step": 5494 }, { "epoch": 10.99, "grad_norm": 1.0784094333648682, "learning_rate": 2e-05, "loss": 0.04471947, "step": 5495 }, { "epoch": 10.992, "grad_norm": 1.6569883823394775, "learning_rate": 2e-05, "loss": 0.05950356, "step": 5496 }, { "epoch": 10.994, "grad_norm": 1.2838274240493774, "learning_rate": 2e-05, "loss": 0.05127814, "step": 5497 }, { "epoch": 10.996, "grad_norm": 1.4936411380767822, "learning_rate": 2e-05, "loss": 0.05115497, "step": 5498 }, { "epoch": 10.998, "grad_norm": 1.2831586599349976, "learning_rate": 2e-05, "loss": 0.05277807, "step": 5499 }, { "epoch": 11.0, "grad_norm": 1.3925591707229614, "learning_rate": 2e-05, "loss": 0.0485413, "step": 5500 }, { "epoch": 11.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9660678642714571, "Equal_1": 0.986, "Equal_2": 0.9500998003992016, "Equal_3": 0.844311377245509, "LineComparison_1": 0.998, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9799599198396793, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.942, "Perpendicular_1": 0.994, "Perpendicular_2": 0.962, "Perpendicular_3": 0.7044088176352705, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9912000000000001, "PointLiesOnCircle_3": 0.9863999999999999, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.9640718562874252 }, "eval_runtime": 225.9274, "eval_samples_per_second": 46.475, "eval_steps_per_second": 0.93, "step": 5500 }, { "epoch": 11.002, "grad_norm": 1.5129892826080322, "learning_rate": 2e-05, "loss": 0.0369215, "step": 5501 }, { "epoch": 11.004, "grad_norm": 1.124927282333374, "learning_rate": 2e-05, "loss": 0.04083242, "step": 5502 }, { "epoch": 11.006, "grad_norm": 1.1797329187393188, "learning_rate": 2e-05, "loss": 0.03633253, "step": 5503 }, { "epoch": 11.008, "grad_norm": 1.1094249486923218, "learning_rate": 2e-05, "loss": 0.04322121, "step": 5504 }, { "epoch": 11.01, "grad_norm": 1.8619836568832397, "learning_rate": 2e-05, "loss": 0.04291176, "step": 5505 }, { "epoch": 11.012, "grad_norm": 1.3340626955032349, "learning_rate": 2e-05, "loss": 0.05174651, "step": 5506 }, { "epoch": 11.014, "grad_norm": 1.4056316614151, "learning_rate": 2e-05, "loss": 0.04947824, "step": 5507 }, { "epoch": 11.016, "grad_norm": 1.4125919342041016, "learning_rate": 2e-05, "loss": 0.06489463, "step": 5508 }, { "epoch": 11.018, "grad_norm": 1.839034080505371, "learning_rate": 2e-05, "loss": 0.03509891, "step": 5509 }, { "epoch": 11.02, "grad_norm": 1.1220529079437256, "learning_rate": 2e-05, "loss": 0.03772662, "step": 5510 }, { "epoch": 11.022, "grad_norm": 1.6442408561706543, "learning_rate": 2e-05, "loss": 0.04390422, "step": 5511 }, { "epoch": 11.024, "grad_norm": 1.8351116180419922, "learning_rate": 2e-05, "loss": 0.04628797, "step": 5512 }, { "epoch": 11.026, "grad_norm": 1.601952075958252, "learning_rate": 2e-05, "loss": 0.05928398, "step": 5513 }, { "epoch": 11.028, "grad_norm": 2.127190589904785, "learning_rate": 2e-05, "loss": 0.04890013, "step": 5514 }, { "epoch": 11.03, "grad_norm": 1.522328495979309, "learning_rate": 2e-05, "loss": 0.04225086, "step": 5515 }, { "epoch": 11.032, "grad_norm": 3.6366055011749268, "learning_rate": 2e-05, "loss": 0.05055761, "step": 5516 }, { "epoch": 11.034, "grad_norm": 1.4562708139419556, "learning_rate": 2e-05, "loss": 0.04158041, "step": 5517 }, { "epoch": 11.036, "grad_norm": 1.181654691696167, "learning_rate": 2e-05, "loss": 0.04863023, "step": 5518 }, { "epoch": 11.038, "grad_norm": 1.1800870895385742, "learning_rate": 2e-05, "loss": 0.03472148, "step": 5519 }, { "epoch": 11.04, "grad_norm": 1.0220799446105957, "learning_rate": 2e-05, "loss": 0.03425215, "step": 5520 }, { "epoch": 11.042, "grad_norm": 1.6748689413070679, "learning_rate": 2e-05, "loss": 0.07067639, "step": 5521 }, { "epoch": 11.044, "grad_norm": 0.9950650930404663, "learning_rate": 2e-05, "loss": 0.03885378, "step": 5522 }, { "epoch": 11.046, "grad_norm": 1.2238004207611084, "learning_rate": 2e-05, "loss": 0.04370748, "step": 5523 }, { "epoch": 11.048, "grad_norm": 1.1113420724868774, "learning_rate": 2e-05, "loss": 0.04559463, "step": 5524 }, { "epoch": 11.05, "grad_norm": 1.1755677461624146, "learning_rate": 2e-05, "loss": 0.0371765, "step": 5525 }, { "epoch": 11.052, "grad_norm": 1.283955454826355, "learning_rate": 2e-05, "loss": 0.0462045, "step": 5526 }, { "epoch": 11.054, "grad_norm": 1.543997049331665, "learning_rate": 2e-05, "loss": 0.0458624, "step": 5527 }, { "epoch": 11.056, "grad_norm": 1.6570807695388794, "learning_rate": 2e-05, "loss": 0.03820822, "step": 5528 }, { "epoch": 11.058, "grad_norm": 1.3682162761688232, "learning_rate": 2e-05, "loss": 0.04793447, "step": 5529 }, { "epoch": 11.06, "grad_norm": 1.5750741958618164, "learning_rate": 2e-05, "loss": 0.04384677, "step": 5530 }, { "epoch": 11.062, "grad_norm": 1.293764352798462, "learning_rate": 2e-05, "loss": 0.05241433, "step": 5531 }, { "epoch": 11.064, "grad_norm": 1.5444961786270142, "learning_rate": 2e-05, "loss": 0.0531601, "step": 5532 }, { "epoch": 11.066, "grad_norm": 1.2636035680770874, "learning_rate": 2e-05, "loss": 0.05505327, "step": 5533 }, { "epoch": 11.068, "grad_norm": 0.9716835021972656, "learning_rate": 2e-05, "loss": 0.03556025, "step": 5534 }, { "epoch": 11.07, "grad_norm": 1.1872612237930298, "learning_rate": 2e-05, "loss": 0.03647027, "step": 5535 }, { "epoch": 11.072, "grad_norm": 3.027980089187622, "learning_rate": 2e-05, "loss": 0.03803849, "step": 5536 }, { "epoch": 11.074, "grad_norm": 2.029426097869873, "learning_rate": 2e-05, "loss": 0.04913022, "step": 5537 }, { "epoch": 11.076, "grad_norm": 1.4110368490219116, "learning_rate": 2e-05, "loss": 0.06917428, "step": 5538 }, { "epoch": 11.078, "grad_norm": 1.4160209894180298, "learning_rate": 2e-05, "loss": 0.05024675, "step": 5539 }, { "epoch": 11.08, "grad_norm": 1.265589714050293, "learning_rate": 2e-05, "loss": 0.04419937, "step": 5540 }, { "epoch": 11.082, "grad_norm": 1.3014708757400513, "learning_rate": 2e-05, "loss": 0.05151144, "step": 5541 }, { "epoch": 11.084, "grad_norm": 2.838567018508911, "learning_rate": 2e-05, "loss": 0.05248385, "step": 5542 }, { "epoch": 11.086, "grad_norm": 2.306208610534668, "learning_rate": 2e-05, "loss": 0.04678107, "step": 5543 }, { "epoch": 11.088, "grad_norm": 1.1169698238372803, "learning_rate": 2e-05, "loss": 0.03919671, "step": 5544 }, { "epoch": 11.09, "grad_norm": 1.4747928380966187, "learning_rate": 2e-05, "loss": 0.05265068, "step": 5545 }, { "epoch": 11.092, "grad_norm": 1.2494477033615112, "learning_rate": 2e-05, "loss": 0.04260018, "step": 5546 }, { "epoch": 11.094, "grad_norm": 1.1028614044189453, "learning_rate": 2e-05, "loss": 0.05244143, "step": 5547 }, { "epoch": 11.096, "grad_norm": 0.9844163060188293, "learning_rate": 2e-05, "loss": 0.03798879, "step": 5548 }, { "epoch": 11.098, "grad_norm": 2.351842164993286, "learning_rate": 2e-05, "loss": 0.05634853, "step": 5549 }, { "epoch": 11.1, "grad_norm": 1.7286821603775024, "learning_rate": 2e-05, "loss": 0.05138499, "step": 5550 }, { "epoch": 11.102, "grad_norm": 1.3676533699035645, "learning_rate": 2e-05, "loss": 0.04455975, "step": 5551 }, { "epoch": 11.104, "grad_norm": 1.5137712955474854, "learning_rate": 2e-05, "loss": 0.03361043, "step": 5552 }, { "epoch": 11.106, "grad_norm": 1.4154843091964722, "learning_rate": 2e-05, "loss": 0.051332, "step": 5553 }, { "epoch": 11.108, "grad_norm": 1.6000605821609497, "learning_rate": 2e-05, "loss": 0.03720913, "step": 5554 }, { "epoch": 11.11, "grad_norm": 1.5306816101074219, "learning_rate": 2e-05, "loss": 0.05529812, "step": 5555 }, { "epoch": 11.112, "grad_norm": 1.1378834247589111, "learning_rate": 2e-05, "loss": 0.05321346, "step": 5556 }, { "epoch": 11.114, "grad_norm": 1.012001395225525, "learning_rate": 2e-05, "loss": 0.03534603, "step": 5557 }, { "epoch": 11.116, "grad_norm": 1.6542078256607056, "learning_rate": 2e-05, "loss": 0.05903975, "step": 5558 }, { "epoch": 11.118, "grad_norm": 1.360988974571228, "learning_rate": 2e-05, "loss": 0.04353476, "step": 5559 }, { "epoch": 11.12, "grad_norm": 1.4309282302856445, "learning_rate": 2e-05, "loss": 0.04631358, "step": 5560 }, { "epoch": 11.122, "grad_norm": 1.4173966646194458, "learning_rate": 2e-05, "loss": 0.05445068, "step": 5561 }, { "epoch": 11.124, "grad_norm": 1.5672515630722046, "learning_rate": 2e-05, "loss": 0.05220301, "step": 5562 }, { "epoch": 11.126, "grad_norm": 1.405856966972351, "learning_rate": 2e-05, "loss": 0.0406395, "step": 5563 }, { "epoch": 11.128, "grad_norm": 0.878871500492096, "learning_rate": 2e-05, "loss": 0.02857321, "step": 5564 }, { "epoch": 11.13, "grad_norm": 0.9947690963745117, "learning_rate": 2e-05, "loss": 0.03733744, "step": 5565 }, { "epoch": 11.132, "grad_norm": 1.1135151386260986, "learning_rate": 2e-05, "loss": 0.03903735, "step": 5566 }, { "epoch": 11.134, "grad_norm": 0.9332975149154663, "learning_rate": 2e-05, "loss": 0.03472457, "step": 5567 }, { "epoch": 11.136, "grad_norm": 1.396668791770935, "learning_rate": 2e-05, "loss": 0.05403772, "step": 5568 }, { "epoch": 11.138, "grad_norm": 1.088739037513733, "learning_rate": 2e-05, "loss": 0.04285156, "step": 5569 }, { "epoch": 11.14, "grad_norm": 1.3420660495758057, "learning_rate": 2e-05, "loss": 0.04896339, "step": 5570 }, { "epoch": 11.142, "grad_norm": 1.033286452293396, "learning_rate": 2e-05, "loss": 0.03341709, "step": 5571 }, { "epoch": 11.144, "grad_norm": 1.432984709739685, "learning_rate": 2e-05, "loss": 0.03892455, "step": 5572 }, { "epoch": 11.146, "grad_norm": 1.4586683511734009, "learning_rate": 2e-05, "loss": 0.04999299, "step": 5573 }, { "epoch": 11.148, "grad_norm": 2.0095505714416504, "learning_rate": 2e-05, "loss": 0.0587076, "step": 5574 }, { "epoch": 11.15, "grad_norm": 4.684654712677002, "learning_rate": 2e-05, "loss": 0.04124208, "step": 5575 }, { "epoch": 11.152, "grad_norm": 1.1088297367095947, "learning_rate": 2e-05, "loss": 0.04705612, "step": 5576 }, { "epoch": 11.154, "grad_norm": 1.2322545051574707, "learning_rate": 2e-05, "loss": 0.03180978, "step": 5577 }, { "epoch": 11.156, "grad_norm": 0.9616858959197998, "learning_rate": 2e-05, "loss": 0.03170741, "step": 5578 }, { "epoch": 11.158, "grad_norm": 1.8117586374282837, "learning_rate": 2e-05, "loss": 0.05111115, "step": 5579 }, { "epoch": 11.16, "grad_norm": 1.542199969291687, "learning_rate": 2e-05, "loss": 0.03360421, "step": 5580 }, { "epoch": 11.162, "grad_norm": 0.9329782128334045, "learning_rate": 2e-05, "loss": 0.03266809, "step": 5581 }, { "epoch": 11.164, "grad_norm": 1.5932724475860596, "learning_rate": 2e-05, "loss": 0.04716074, "step": 5582 }, { "epoch": 11.166, "grad_norm": 1.8134902715682983, "learning_rate": 2e-05, "loss": 0.0634952, "step": 5583 }, { "epoch": 11.168, "grad_norm": 2.087653875350952, "learning_rate": 2e-05, "loss": 0.05776756, "step": 5584 }, { "epoch": 11.17, "grad_norm": 1.066960334777832, "learning_rate": 2e-05, "loss": 0.04066526, "step": 5585 }, { "epoch": 11.172, "grad_norm": 2.1875832080841064, "learning_rate": 2e-05, "loss": 0.0559142, "step": 5586 }, { "epoch": 11.174, "grad_norm": 1.7089463472366333, "learning_rate": 2e-05, "loss": 0.0528534, "step": 5587 }, { "epoch": 11.176, "grad_norm": 0.9425379633903503, "learning_rate": 2e-05, "loss": 0.03477708, "step": 5588 }, { "epoch": 11.178, "grad_norm": 2.212904453277588, "learning_rate": 2e-05, "loss": 0.07304186, "step": 5589 }, { "epoch": 11.18, "grad_norm": 1.4487138986587524, "learning_rate": 2e-05, "loss": 0.04018127, "step": 5590 }, { "epoch": 11.182, "grad_norm": 1.331348180770874, "learning_rate": 2e-05, "loss": 0.04178457, "step": 5591 }, { "epoch": 11.184, "grad_norm": 1.067719578742981, "learning_rate": 2e-05, "loss": 0.04530909, "step": 5592 }, { "epoch": 11.186, "grad_norm": 1.5780181884765625, "learning_rate": 2e-05, "loss": 0.060428, "step": 5593 }, { "epoch": 11.188, "grad_norm": 1.1918976306915283, "learning_rate": 2e-05, "loss": 0.04107691, "step": 5594 }, { "epoch": 11.19, "grad_norm": 0.9529772400856018, "learning_rate": 2e-05, "loss": 0.04317258, "step": 5595 }, { "epoch": 11.192, "grad_norm": 1.5676028728485107, "learning_rate": 2e-05, "loss": 0.05463398, "step": 5596 }, { "epoch": 11.194, "grad_norm": 1.5169607400894165, "learning_rate": 2e-05, "loss": 0.04615597, "step": 5597 }, { "epoch": 11.196, "grad_norm": 1.1337791681289673, "learning_rate": 2e-05, "loss": 0.03676087, "step": 5598 }, { "epoch": 11.198, "grad_norm": 1.5967473983764648, "learning_rate": 2e-05, "loss": 0.05457857, "step": 5599 }, { "epoch": 11.2, "grad_norm": 1.1773473024368286, "learning_rate": 2e-05, "loss": 0.04189096, "step": 5600 }, { "epoch": 11.202, "grad_norm": 1.108976125717163, "learning_rate": 2e-05, "loss": 0.03327487, "step": 5601 }, { "epoch": 11.204, "grad_norm": 1.1577305793762207, "learning_rate": 2e-05, "loss": 0.03228642, "step": 5602 }, { "epoch": 11.206, "grad_norm": 1.5257580280303955, "learning_rate": 2e-05, "loss": 0.04674962, "step": 5603 }, { "epoch": 11.208, "grad_norm": 1.5392863750457764, "learning_rate": 2e-05, "loss": 0.04172727, "step": 5604 }, { "epoch": 11.21, "grad_norm": 1.066713571548462, "learning_rate": 2e-05, "loss": 0.03864186, "step": 5605 }, { "epoch": 11.212, "grad_norm": 1.4239394664764404, "learning_rate": 2e-05, "loss": 0.04064202, "step": 5606 }, { "epoch": 11.214, "grad_norm": 1.4958460330963135, "learning_rate": 2e-05, "loss": 0.05499461, "step": 5607 }, { "epoch": 11.216, "grad_norm": 1.1650075912475586, "learning_rate": 2e-05, "loss": 0.04046335, "step": 5608 }, { "epoch": 11.218, "grad_norm": 1.3070014715194702, "learning_rate": 2e-05, "loss": 0.05967714, "step": 5609 }, { "epoch": 11.22, "grad_norm": 1.2771934270858765, "learning_rate": 2e-05, "loss": 0.04526206, "step": 5610 }, { "epoch": 11.222, "grad_norm": 1.0360138416290283, "learning_rate": 2e-05, "loss": 0.04494242, "step": 5611 }, { "epoch": 11.224, "grad_norm": 1.1136980056762695, "learning_rate": 2e-05, "loss": 0.04748058, "step": 5612 }, { "epoch": 11.226, "grad_norm": 1.279535174369812, "learning_rate": 2e-05, "loss": 0.05536846, "step": 5613 }, { "epoch": 11.228, "grad_norm": 1.7254714965820312, "learning_rate": 2e-05, "loss": 0.06319129, "step": 5614 }, { "epoch": 11.23, "grad_norm": 1.015959620475769, "learning_rate": 2e-05, "loss": 0.02996288, "step": 5615 }, { "epoch": 11.232, "grad_norm": 0.9612359404563904, "learning_rate": 2e-05, "loss": 0.03539902, "step": 5616 }, { "epoch": 11.234, "grad_norm": 1.5286574363708496, "learning_rate": 2e-05, "loss": 0.07139138, "step": 5617 }, { "epoch": 11.236, "grad_norm": 1.3936707973480225, "learning_rate": 2e-05, "loss": 0.0443332, "step": 5618 }, { "epoch": 11.238, "grad_norm": 1.503346562385559, "learning_rate": 2e-05, "loss": 0.04167702, "step": 5619 }, { "epoch": 11.24, "grad_norm": 1.0194368362426758, "learning_rate": 2e-05, "loss": 0.03536838, "step": 5620 }, { "epoch": 11.242, "grad_norm": 1.7644449472427368, "learning_rate": 2e-05, "loss": 0.05340856, "step": 5621 }, { "epoch": 11.244, "grad_norm": 0.9795234203338623, "learning_rate": 2e-05, "loss": 0.0327415, "step": 5622 }, { "epoch": 11.246, "grad_norm": 1.0717252492904663, "learning_rate": 2e-05, "loss": 0.0384132, "step": 5623 }, { "epoch": 11.248, "grad_norm": 1.0700078010559082, "learning_rate": 2e-05, "loss": 0.0378229, "step": 5624 }, { "epoch": 11.25, "grad_norm": 1.805286169052124, "learning_rate": 2e-05, "loss": 0.04440291, "step": 5625 }, { "epoch": 11.252, "grad_norm": 1.2775704860687256, "learning_rate": 2e-05, "loss": 0.04251435, "step": 5626 }, { "epoch": 11.254, "grad_norm": 1.4797736406326294, "learning_rate": 2e-05, "loss": 0.03614307, "step": 5627 }, { "epoch": 11.256, "grad_norm": 1.693877935409546, "learning_rate": 2e-05, "loss": 0.05873074, "step": 5628 }, { "epoch": 11.258, "grad_norm": 1.150343418121338, "learning_rate": 2e-05, "loss": 0.02827701, "step": 5629 }, { "epoch": 11.26, "grad_norm": 1.6019048690795898, "learning_rate": 2e-05, "loss": 0.05172317, "step": 5630 }, { "epoch": 11.262, "grad_norm": 1.7073606252670288, "learning_rate": 2e-05, "loss": 0.05386466, "step": 5631 }, { "epoch": 11.264, "grad_norm": 1.7777050733566284, "learning_rate": 2e-05, "loss": 0.0413624, "step": 5632 }, { "epoch": 11.266, "grad_norm": 1.3919041156768799, "learning_rate": 2e-05, "loss": 0.05393788, "step": 5633 }, { "epoch": 11.268, "grad_norm": 1.0232552289962769, "learning_rate": 2e-05, "loss": 0.03678655, "step": 5634 }, { "epoch": 11.27, "grad_norm": 1.0152132511138916, "learning_rate": 2e-05, "loss": 0.03241464, "step": 5635 }, { "epoch": 11.272, "grad_norm": 1.4391683340072632, "learning_rate": 2e-05, "loss": 0.03821418, "step": 5636 }, { "epoch": 11.274000000000001, "grad_norm": 1.8209373950958252, "learning_rate": 2e-05, "loss": 0.03553118, "step": 5637 }, { "epoch": 11.276, "grad_norm": 2.0165960788726807, "learning_rate": 2e-05, "loss": 0.06877641, "step": 5638 }, { "epoch": 11.278, "grad_norm": 2.9020230770111084, "learning_rate": 2e-05, "loss": 0.04713805, "step": 5639 }, { "epoch": 11.28, "grad_norm": 2.445540428161621, "learning_rate": 2e-05, "loss": 0.05795724, "step": 5640 }, { "epoch": 11.282, "grad_norm": 1.4181286096572876, "learning_rate": 2e-05, "loss": 0.06456973, "step": 5641 }, { "epoch": 11.284, "grad_norm": 1.8244922161102295, "learning_rate": 2e-05, "loss": 0.07060762, "step": 5642 }, { "epoch": 11.286, "grad_norm": 1.4301705360412598, "learning_rate": 2e-05, "loss": 0.04882135, "step": 5643 }, { "epoch": 11.288, "grad_norm": 3.9115777015686035, "learning_rate": 2e-05, "loss": 0.06303208, "step": 5644 }, { "epoch": 11.29, "grad_norm": 0.9888519644737244, "learning_rate": 2e-05, "loss": 0.03519781, "step": 5645 }, { "epoch": 11.292, "grad_norm": 1.2361949682235718, "learning_rate": 2e-05, "loss": 0.04311293, "step": 5646 }, { "epoch": 11.294, "grad_norm": 1.072943925857544, "learning_rate": 2e-05, "loss": 0.04199264, "step": 5647 }, { "epoch": 11.296, "grad_norm": 1.0524487495422363, "learning_rate": 2e-05, "loss": 0.02321496, "step": 5648 }, { "epoch": 11.298, "grad_norm": 1.0004122257232666, "learning_rate": 2e-05, "loss": 0.03980121, "step": 5649 }, { "epoch": 11.3, "grad_norm": 2.1076841354370117, "learning_rate": 2e-05, "loss": 0.03681464, "step": 5650 }, { "epoch": 11.302, "grad_norm": 1.1192392110824585, "learning_rate": 2e-05, "loss": 0.03386169, "step": 5651 }, { "epoch": 11.304, "grad_norm": 1.5688713788986206, "learning_rate": 2e-05, "loss": 0.0357337, "step": 5652 }, { "epoch": 11.306, "grad_norm": 1.1565669775009155, "learning_rate": 2e-05, "loss": 0.04379257, "step": 5653 }, { "epoch": 11.308, "grad_norm": 1.0556933879852295, "learning_rate": 2e-05, "loss": 0.0430598, "step": 5654 }, { "epoch": 11.31, "grad_norm": 2.0405824184417725, "learning_rate": 2e-05, "loss": 0.05104763, "step": 5655 }, { "epoch": 11.312, "grad_norm": 1.4970942735671997, "learning_rate": 2e-05, "loss": 0.04528127, "step": 5656 }, { "epoch": 11.314, "grad_norm": 1.692602515220642, "learning_rate": 2e-05, "loss": 0.04161179, "step": 5657 }, { "epoch": 11.316, "grad_norm": 1.2028204202651978, "learning_rate": 2e-05, "loss": 0.03310231, "step": 5658 }, { "epoch": 11.318, "grad_norm": 1.0924016237258911, "learning_rate": 2e-05, "loss": 0.03825938, "step": 5659 }, { "epoch": 11.32, "grad_norm": 0.9232378602027893, "learning_rate": 2e-05, "loss": 0.03457297, "step": 5660 }, { "epoch": 11.322, "grad_norm": 1.5291032791137695, "learning_rate": 2e-05, "loss": 0.04106208, "step": 5661 }, { "epoch": 11.324, "grad_norm": 1.853575587272644, "learning_rate": 2e-05, "loss": 0.05589207, "step": 5662 }, { "epoch": 11.326, "grad_norm": 1.6939690113067627, "learning_rate": 2e-05, "loss": 0.04182136, "step": 5663 }, { "epoch": 11.328, "grad_norm": 1.9784040451049805, "learning_rate": 2e-05, "loss": 0.0381948, "step": 5664 }, { "epoch": 11.33, "grad_norm": 2.3816099166870117, "learning_rate": 2e-05, "loss": 0.04496423, "step": 5665 }, { "epoch": 11.332, "grad_norm": 2.796982526779175, "learning_rate": 2e-05, "loss": 0.06737269, "step": 5666 }, { "epoch": 11.334, "grad_norm": 1.904605507850647, "learning_rate": 2e-05, "loss": 0.04018627, "step": 5667 }, { "epoch": 11.336, "grad_norm": 0.991129994392395, "learning_rate": 2e-05, "loss": 0.03639981, "step": 5668 }, { "epoch": 11.338, "grad_norm": 1.183982014656067, "learning_rate": 2e-05, "loss": 0.04273771, "step": 5669 }, { "epoch": 11.34, "grad_norm": 2.5313045978546143, "learning_rate": 2e-05, "loss": 0.05737273, "step": 5670 }, { "epoch": 11.342, "grad_norm": 1.4500876665115356, "learning_rate": 2e-05, "loss": 0.04879963, "step": 5671 }, { "epoch": 11.344, "grad_norm": 1.2542344331741333, "learning_rate": 2e-05, "loss": 0.03932276, "step": 5672 }, { "epoch": 11.346, "grad_norm": 1.4330394268035889, "learning_rate": 2e-05, "loss": 0.05832843, "step": 5673 }, { "epoch": 11.348, "grad_norm": 1.6690090894699097, "learning_rate": 2e-05, "loss": 0.05061534, "step": 5674 }, { "epoch": 11.35, "grad_norm": 1.520676851272583, "learning_rate": 2e-05, "loss": 0.03686773, "step": 5675 }, { "epoch": 11.352, "grad_norm": 1.6882392168045044, "learning_rate": 2e-05, "loss": 0.05052148, "step": 5676 }, { "epoch": 11.354, "grad_norm": 2.2133548259735107, "learning_rate": 2e-05, "loss": 0.06013411, "step": 5677 }, { "epoch": 11.356, "grad_norm": 1.1263599395751953, "learning_rate": 2e-05, "loss": 0.03182821, "step": 5678 }, { "epoch": 11.358, "grad_norm": 1.657073736190796, "learning_rate": 2e-05, "loss": 0.0534301, "step": 5679 }, { "epoch": 11.36, "grad_norm": 2.299384832382202, "learning_rate": 2e-05, "loss": 0.06933039, "step": 5680 }, { "epoch": 11.362, "grad_norm": 1.6115472316741943, "learning_rate": 2e-05, "loss": 0.05426785, "step": 5681 }, { "epoch": 11.364, "grad_norm": 1.6506305932998657, "learning_rate": 2e-05, "loss": 0.0389406, "step": 5682 }, { "epoch": 11.366, "grad_norm": 1.9598665237426758, "learning_rate": 2e-05, "loss": 0.03853215, "step": 5683 }, { "epoch": 11.368, "grad_norm": 1.5241433382034302, "learning_rate": 2e-05, "loss": 0.05049495, "step": 5684 }, { "epoch": 11.37, "grad_norm": 1.6850847005844116, "learning_rate": 2e-05, "loss": 0.03677547, "step": 5685 }, { "epoch": 11.372, "grad_norm": 2.303267478942871, "learning_rate": 2e-05, "loss": 0.04571587, "step": 5686 }, { "epoch": 11.374, "grad_norm": 2.5179669857025146, "learning_rate": 2e-05, "loss": 0.05148506, "step": 5687 }, { "epoch": 11.376, "grad_norm": 1.8053176403045654, "learning_rate": 2e-05, "loss": 0.06309921, "step": 5688 }, { "epoch": 11.378, "grad_norm": 1.789489984512329, "learning_rate": 2e-05, "loss": 0.0457884, "step": 5689 }, { "epoch": 11.38, "grad_norm": 4.4538254737854, "learning_rate": 2e-05, "loss": 0.07272643, "step": 5690 }, { "epoch": 11.382, "grad_norm": 1.1391524076461792, "learning_rate": 2e-05, "loss": 0.04372241, "step": 5691 }, { "epoch": 11.384, "grad_norm": 1.3350780010223389, "learning_rate": 2e-05, "loss": 0.03800062, "step": 5692 }, { "epoch": 11.386, "grad_norm": 1.3134864568710327, "learning_rate": 2e-05, "loss": 0.04704265, "step": 5693 }, { "epoch": 11.388, "grad_norm": 1.2544198036193848, "learning_rate": 2e-05, "loss": 0.035706, "step": 5694 }, { "epoch": 11.39, "grad_norm": 1.2645329236984253, "learning_rate": 2e-05, "loss": 0.03529374, "step": 5695 }, { "epoch": 11.392, "grad_norm": 1.412612795829773, "learning_rate": 2e-05, "loss": 0.04078054, "step": 5696 }, { "epoch": 11.394, "grad_norm": 1.3931176662445068, "learning_rate": 2e-05, "loss": 0.05750053, "step": 5697 }, { "epoch": 11.396, "grad_norm": 1.3391056060791016, "learning_rate": 2e-05, "loss": 0.03605771, "step": 5698 }, { "epoch": 11.398, "grad_norm": 1.4808599948883057, "learning_rate": 2e-05, "loss": 0.036298, "step": 5699 }, { "epoch": 11.4, "grad_norm": 2.877488374710083, "learning_rate": 2e-05, "loss": 0.06309499, "step": 5700 }, { "epoch": 11.402, "grad_norm": 0.9654765129089355, "learning_rate": 2e-05, "loss": 0.0321345, "step": 5701 }, { "epoch": 11.404, "grad_norm": 1.464184045791626, "learning_rate": 2e-05, "loss": 0.05256267, "step": 5702 }, { "epoch": 11.406, "grad_norm": 0.9864972829818726, "learning_rate": 2e-05, "loss": 0.02655507, "step": 5703 }, { "epoch": 11.408, "grad_norm": 1.1033917665481567, "learning_rate": 2e-05, "loss": 0.04629254, "step": 5704 }, { "epoch": 11.41, "grad_norm": 1.5177485942840576, "learning_rate": 2e-05, "loss": 0.04845208, "step": 5705 }, { "epoch": 11.412, "grad_norm": 1.945288896560669, "learning_rate": 2e-05, "loss": 0.04639967, "step": 5706 }, { "epoch": 11.414, "grad_norm": 1.5955779552459717, "learning_rate": 2e-05, "loss": 0.05710289, "step": 5707 }, { "epoch": 11.416, "grad_norm": 1.453980803489685, "learning_rate": 2e-05, "loss": 0.06085563, "step": 5708 }, { "epoch": 11.418, "grad_norm": 1.9679827690124512, "learning_rate": 2e-05, "loss": 0.05871365, "step": 5709 }, { "epoch": 11.42, "grad_norm": 1.231433391571045, "learning_rate": 2e-05, "loss": 0.0593383, "step": 5710 }, { "epoch": 11.422, "grad_norm": 1.1161562204360962, "learning_rate": 2e-05, "loss": 0.03744623, "step": 5711 }, { "epoch": 11.424, "grad_norm": 1.1079719066619873, "learning_rate": 2e-05, "loss": 0.03283858, "step": 5712 }, { "epoch": 11.426, "grad_norm": 1.652329921722412, "learning_rate": 2e-05, "loss": 0.03690097, "step": 5713 }, { "epoch": 11.428, "grad_norm": 1.5008844137191772, "learning_rate": 2e-05, "loss": 0.04253498, "step": 5714 }, { "epoch": 11.43, "grad_norm": 1.0645948648452759, "learning_rate": 2e-05, "loss": 0.03556978, "step": 5715 }, { "epoch": 11.432, "grad_norm": 1.1024553775787354, "learning_rate": 2e-05, "loss": 0.04269931, "step": 5716 }, { "epoch": 11.434, "grad_norm": 1.032974362373352, "learning_rate": 2e-05, "loss": 0.0450116, "step": 5717 }, { "epoch": 11.436, "grad_norm": 1.2358860969543457, "learning_rate": 2e-05, "loss": 0.05481789, "step": 5718 }, { "epoch": 11.438, "grad_norm": 2.1452412605285645, "learning_rate": 2e-05, "loss": 0.05746187, "step": 5719 }, { "epoch": 11.44, "grad_norm": 1.122179627418518, "learning_rate": 2e-05, "loss": 0.03623831, "step": 5720 }, { "epoch": 11.442, "grad_norm": 1.6602543592453003, "learning_rate": 2e-05, "loss": 0.04046793, "step": 5721 }, { "epoch": 11.444, "grad_norm": 0.9081873297691345, "learning_rate": 2e-05, "loss": 0.02949431, "step": 5722 }, { "epoch": 11.446, "grad_norm": 1.0518242120742798, "learning_rate": 2e-05, "loss": 0.03602842, "step": 5723 }, { "epoch": 11.448, "grad_norm": 1.5249712467193604, "learning_rate": 2e-05, "loss": 0.05497037, "step": 5724 }, { "epoch": 11.45, "grad_norm": 1.1421295404434204, "learning_rate": 2e-05, "loss": 0.04662335, "step": 5725 }, { "epoch": 11.452, "grad_norm": 2.2270679473876953, "learning_rate": 2e-05, "loss": 0.0389347, "step": 5726 }, { "epoch": 11.454, "grad_norm": 1.0608464479446411, "learning_rate": 2e-05, "loss": 0.04459299, "step": 5727 }, { "epoch": 11.456, "grad_norm": 1.0366029739379883, "learning_rate": 2e-05, "loss": 0.03870797, "step": 5728 }, { "epoch": 11.458, "grad_norm": 1.2152073383331299, "learning_rate": 2e-05, "loss": 0.03679901, "step": 5729 }, { "epoch": 11.46, "grad_norm": 1.0074315071105957, "learning_rate": 2e-05, "loss": 0.04235061, "step": 5730 }, { "epoch": 11.462, "grad_norm": 1.4751588106155396, "learning_rate": 2e-05, "loss": 0.02947707, "step": 5731 }, { "epoch": 11.464, "grad_norm": 0.9968901872634888, "learning_rate": 2e-05, "loss": 0.03306929, "step": 5732 }, { "epoch": 11.466, "grad_norm": 1.5751895904541016, "learning_rate": 2e-05, "loss": 0.06498703, "step": 5733 }, { "epoch": 11.468, "grad_norm": 1.337311863899231, "learning_rate": 2e-05, "loss": 0.04465157, "step": 5734 }, { "epoch": 11.47, "grad_norm": 1.1869555711746216, "learning_rate": 2e-05, "loss": 0.02902341, "step": 5735 }, { "epoch": 11.472, "grad_norm": 1.2653625011444092, "learning_rate": 2e-05, "loss": 0.04157927, "step": 5736 }, { "epoch": 11.474, "grad_norm": 2.0331921577453613, "learning_rate": 2e-05, "loss": 0.05621823, "step": 5737 }, { "epoch": 11.475999999999999, "grad_norm": 1.4380760192871094, "learning_rate": 2e-05, "loss": 0.05303129, "step": 5738 }, { "epoch": 11.478, "grad_norm": 1.4929213523864746, "learning_rate": 2e-05, "loss": 0.04175949, "step": 5739 }, { "epoch": 11.48, "grad_norm": 1.4044290781021118, "learning_rate": 2e-05, "loss": 0.04583057, "step": 5740 }, { "epoch": 11.482, "grad_norm": 2.3977174758911133, "learning_rate": 2e-05, "loss": 0.04454009, "step": 5741 }, { "epoch": 11.484, "grad_norm": 1.1002179384231567, "learning_rate": 2e-05, "loss": 0.04475324, "step": 5742 }, { "epoch": 11.486, "grad_norm": 1.3931427001953125, "learning_rate": 2e-05, "loss": 0.04015528, "step": 5743 }, { "epoch": 11.488, "grad_norm": 1.227184772491455, "learning_rate": 2e-05, "loss": 0.02831712, "step": 5744 }, { "epoch": 11.49, "grad_norm": 4.314063549041748, "learning_rate": 2e-05, "loss": 0.04934733, "step": 5745 }, { "epoch": 11.492, "grad_norm": 1.2216967344284058, "learning_rate": 2e-05, "loss": 0.0328696, "step": 5746 }, { "epoch": 11.494, "grad_norm": 1.5306403636932373, "learning_rate": 2e-05, "loss": 0.05731472, "step": 5747 }, { "epoch": 11.496, "grad_norm": 1.4774214029312134, "learning_rate": 2e-05, "loss": 0.04682739, "step": 5748 }, { "epoch": 11.498, "grad_norm": 1.284678339958191, "learning_rate": 2e-05, "loss": 0.04000887, "step": 5749 }, { "epoch": 11.5, "grad_norm": 1.1839613914489746, "learning_rate": 2e-05, "loss": 0.04536203, "step": 5750 }, { "epoch": 11.502, "grad_norm": 1.275244116783142, "learning_rate": 2e-05, "loss": 0.05153922, "step": 5751 }, { "epoch": 11.504, "grad_norm": 1.6596157550811768, "learning_rate": 2e-05, "loss": 0.05041556, "step": 5752 }, { "epoch": 11.506, "grad_norm": 1.5351767539978027, "learning_rate": 2e-05, "loss": 0.05845892, "step": 5753 }, { "epoch": 11.508, "grad_norm": 1.9493751525878906, "learning_rate": 2e-05, "loss": 0.03669079, "step": 5754 }, { "epoch": 11.51, "grad_norm": 1.6401820182800293, "learning_rate": 2e-05, "loss": 0.04640107, "step": 5755 }, { "epoch": 11.512, "grad_norm": 2.0774924755096436, "learning_rate": 2e-05, "loss": 0.04217081, "step": 5756 }, { "epoch": 11.514, "grad_norm": 1.5364662408828735, "learning_rate": 2e-05, "loss": 0.03601849, "step": 5757 }, { "epoch": 11.516, "grad_norm": 2.922870397567749, "learning_rate": 2e-05, "loss": 0.05351526, "step": 5758 }, { "epoch": 11.518, "grad_norm": 1.0814040899276733, "learning_rate": 2e-05, "loss": 0.04114934, "step": 5759 }, { "epoch": 11.52, "grad_norm": 1.1550902128219604, "learning_rate": 2e-05, "loss": 0.03060487, "step": 5760 }, { "epoch": 11.522, "grad_norm": 1.7431031465530396, "learning_rate": 2e-05, "loss": 0.05294985, "step": 5761 }, { "epoch": 11.524000000000001, "grad_norm": 1.0879149436950684, "learning_rate": 2e-05, "loss": 0.0359129, "step": 5762 }, { "epoch": 11.526, "grad_norm": 1.6202856302261353, "learning_rate": 2e-05, "loss": 0.03743608, "step": 5763 }, { "epoch": 11.528, "grad_norm": 1.4314377307891846, "learning_rate": 2e-05, "loss": 0.05245137, "step": 5764 }, { "epoch": 11.53, "grad_norm": 1.4001820087432861, "learning_rate": 2e-05, "loss": 0.05609337, "step": 5765 }, { "epoch": 11.532, "grad_norm": 1.2218937873840332, "learning_rate": 2e-05, "loss": 0.03376921, "step": 5766 }, { "epoch": 11.534, "grad_norm": 1.2859230041503906, "learning_rate": 2e-05, "loss": 0.03596242, "step": 5767 }, { "epoch": 11.536, "grad_norm": 1.3536012172698975, "learning_rate": 2e-05, "loss": 0.0435769, "step": 5768 }, { "epoch": 11.538, "grad_norm": 1.808517336845398, "learning_rate": 2e-05, "loss": 0.0388736, "step": 5769 }, { "epoch": 11.54, "grad_norm": 2.08162260055542, "learning_rate": 2e-05, "loss": 0.05287518, "step": 5770 }, { "epoch": 11.542, "grad_norm": 1.4972813129425049, "learning_rate": 2e-05, "loss": 0.03660382, "step": 5771 }, { "epoch": 11.544, "grad_norm": 1.770861029624939, "learning_rate": 2e-05, "loss": 0.06265189, "step": 5772 }, { "epoch": 11.546, "grad_norm": 1.6629408597946167, "learning_rate": 2e-05, "loss": 0.05481086, "step": 5773 }, { "epoch": 11.548, "grad_norm": 1.014580249786377, "learning_rate": 2e-05, "loss": 0.03411947, "step": 5774 }, { "epoch": 11.55, "grad_norm": 1.5988222360610962, "learning_rate": 2e-05, "loss": 0.05093693, "step": 5775 }, { "epoch": 11.552, "grad_norm": 1.1317811012268066, "learning_rate": 2e-05, "loss": 0.02709729, "step": 5776 }, { "epoch": 11.554, "grad_norm": 1.4191161394119263, "learning_rate": 2e-05, "loss": 0.04657879, "step": 5777 }, { "epoch": 11.556000000000001, "grad_norm": 1.8382493257522583, "learning_rate": 2e-05, "loss": 0.04825822, "step": 5778 }, { "epoch": 11.558, "grad_norm": 1.5734562873840332, "learning_rate": 2e-05, "loss": 0.04330944, "step": 5779 }, { "epoch": 11.56, "grad_norm": 1.4938522577285767, "learning_rate": 2e-05, "loss": 0.04613466, "step": 5780 }, { "epoch": 11.562, "grad_norm": 1.6568859815597534, "learning_rate": 2e-05, "loss": 0.04533406, "step": 5781 }, { "epoch": 11.564, "grad_norm": 1.03166663646698, "learning_rate": 2e-05, "loss": 0.03298513, "step": 5782 }, { "epoch": 11.566, "grad_norm": 2.412790298461914, "learning_rate": 2e-05, "loss": 0.059669, "step": 5783 }, { "epoch": 11.568, "grad_norm": 0.7268760800361633, "learning_rate": 2e-05, "loss": 0.01746318, "step": 5784 }, { "epoch": 11.57, "grad_norm": 3.00822377204895, "learning_rate": 2e-05, "loss": 0.07187882, "step": 5785 }, { "epoch": 11.572, "grad_norm": 1.1581590175628662, "learning_rate": 2e-05, "loss": 0.04789983, "step": 5786 }, { "epoch": 11.574, "grad_norm": 1.113059639930725, "learning_rate": 2e-05, "loss": 0.03713815, "step": 5787 }, { "epoch": 11.576, "grad_norm": 1.0034935474395752, "learning_rate": 2e-05, "loss": 0.03949534, "step": 5788 }, { "epoch": 11.578, "grad_norm": 1.580288290977478, "learning_rate": 2e-05, "loss": 0.03918436, "step": 5789 }, { "epoch": 11.58, "grad_norm": 1.8059066534042358, "learning_rate": 2e-05, "loss": 0.05439914, "step": 5790 }, { "epoch": 11.582, "grad_norm": 1.0139095783233643, "learning_rate": 2e-05, "loss": 0.03699078, "step": 5791 }, { "epoch": 11.584, "grad_norm": 3.5101406574249268, "learning_rate": 2e-05, "loss": 0.05208754, "step": 5792 }, { "epoch": 11.586, "grad_norm": 1.7632066011428833, "learning_rate": 2e-05, "loss": 0.04731216, "step": 5793 }, { "epoch": 11.588, "grad_norm": 1.9159696102142334, "learning_rate": 2e-05, "loss": 0.05665173, "step": 5794 }, { "epoch": 11.59, "grad_norm": 1.7056618928909302, "learning_rate": 2e-05, "loss": 0.05668868, "step": 5795 }, { "epoch": 11.592, "grad_norm": 2.4506165981292725, "learning_rate": 2e-05, "loss": 0.04585192, "step": 5796 }, { "epoch": 11.594, "grad_norm": 1.3681178092956543, "learning_rate": 2e-05, "loss": 0.04944826, "step": 5797 }, { "epoch": 11.596, "grad_norm": 1.9475969076156616, "learning_rate": 2e-05, "loss": 0.04270927, "step": 5798 }, { "epoch": 11.598, "grad_norm": 1.4567699432373047, "learning_rate": 2e-05, "loss": 0.03855619, "step": 5799 }, { "epoch": 11.6, "grad_norm": 1.5527197122573853, "learning_rate": 2e-05, "loss": 0.0483334, "step": 5800 }, { "epoch": 11.602, "grad_norm": 0.9703032970428467, "learning_rate": 2e-05, "loss": 0.03626152, "step": 5801 }, { "epoch": 11.604, "grad_norm": 1.1331628561019897, "learning_rate": 2e-05, "loss": 0.04250154, "step": 5802 }, { "epoch": 11.606, "grad_norm": 1.695320963859558, "learning_rate": 2e-05, "loss": 0.05694439, "step": 5803 }, { "epoch": 11.608, "grad_norm": 1.215254783630371, "learning_rate": 2e-05, "loss": 0.04018516, "step": 5804 }, { "epoch": 11.61, "grad_norm": 0.9276542067527771, "learning_rate": 2e-05, "loss": 0.02274639, "step": 5805 }, { "epoch": 11.612, "grad_norm": 0.9328853487968445, "learning_rate": 2e-05, "loss": 0.03013455, "step": 5806 }, { "epoch": 11.614, "grad_norm": 1.3658262491226196, "learning_rate": 2e-05, "loss": 0.0457868, "step": 5807 }, { "epoch": 11.616, "grad_norm": 1.0299392938613892, "learning_rate": 2e-05, "loss": 0.02719694, "step": 5808 }, { "epoch": 11.618, "grad_norm": 0.9136078357696533, "learning_rate": 2e-05, "loss": 0.0303246, "step": 5809 }, { "epoch": 11.62, "grad_norm": 1.0807710886001587, "learning_rate": 2e-05, "loss": 0.03513505, "step": 5810 }, { "epoch": 11.622, "grad_norm": 1.0366498231887817, "learning_rate": 2e-05, "loss": 0.03853009, "step": 5811 }, { "epoch": 11.624, "grad_norm": 1.1445306539535522, "learning_rate": 2e-05, "loss": 0.03547894, "step": 5812 }, { "epoch": 11.626, "grad_norm": 4.025401592254639, "learning_rate": 2e-05, "loss": 0.06274126, "step": 5813 }, { "epoch": 11.628, "grad_norm": 1.3070099353790283, "learning_rate": 2e-05, "loss": 0.039181, "step": 5814 }, { "epoch": 11.63, "grad_norm": 1.5482231378555298, "learning_rate": 2e-05, "loss": 0.07239227, "step": 5815 }, { "epoch": 11.632, "grad_norm": 2.02384877204895, "learning_rate": 2e-05, "loss": 0.04188669, "step": 5816 }, { "epoch": 11.634, "grad_norm": 1.19016695022583, "learning_rate": 2e-05, "loss": 0.04678681, "step": 5817 }, { "epoch": 11.636, "grad_norm": 1.4057552814483643, "learning_rate": 2e-05, "loss": 0.03698438, "step": 5818 }, { "epoch": 11.638, "grad_norm": 1.5046032667160034, "learning_rate": 2e-05, "loss": 0.04641951, "step": 5819 }, { "epoch": 11.64, "grad_norm": 1.4290348291397095, "learning_rate": 2e-05, "loss": 0.04036142, "step": 5820 }, { "epoch": 11.642, "grad_norm": 1.2966749668121338, "learning_rate": 2e-05, "loss": 0.03925401, "step": 5821 }, { "epoch": 11.644, "grad_norm": 1.6623355150222778, "learning_rate": 2e-05, "loss": 0.0745986, "step": 5822 }, { "epoch": 11.646, "grad_norm": 1.6564867496490479, "learning_rate": 2e-05, "loss": 0.05369641, "step": 5823 }, { "epoch": 11.648, "grad_norm": 1.0130841732025146, "learning_rate": 2e-05, "loss": 0.03603834, "step": 5824 }, { "epoch": 11.65, "grad_norm": 1.6903640031814575, "learning_rate": 2e-05, "loss": 0.05582652, "step": 5825 }, { "epoch": 11.652, "grad_norm": 2.4483182430267334, "learning_rate": 2e-05, "loss": 0.04876352, "step": 5826 }, { "epoch": 11.654, "grad_norm": 1.2262402772903442, "learning_rate": 2e-05, "loss": 0.04363182, "step": 5827 }, { "epoch": 11.656, "grad_norm": 2.6704461574554443, "learning_rate": 2e-05, "loss": 0.0692665, "step": 5828 }, { "epoch": 11.658, "grad_norm": 1.1476116180419922, "learning_rate": 2e-05, "loss": 0.05447911, "step": 5829 }, { "epoch": 11.66, "grad_norm": 1.1374801397323608, "learning_rate": 2e-05, "loss": 0.03698364, "step": 5830 }, { "epoch": 11.662, "grad_norm": 1.74833345413208, "learning_rate": 2e-05, "loss": 0.0340851, "step": 5831 }, { "epoch": 11.664, "grad_norm": 3.1562201976776123, "learning_rate": 2e-05, "loss": 0.04089772, "step": 5832 }, { "epoch": 11.666, "grad_norm": 1.4454296827316284, "learning_rate": 2e-05, "loss": 0.03925513, "step": 5833 }, { "epoch": 11.668, "grad_norm": 1.460183024406433, "learning_rate": 2e-05, "loss": 0.03432395, "step": 5834 }, { "epoch": 11.67, "grad_norm": 1.1005781888961792, "learning_rate": 2e-05, "loss": 0.03778052, "step": 5835 }, { "epoch": 11.672, "grad_norm": 1.2950646877288818, "learning_rate": 2e-05, "loss": 0.05419799, "step": 5836 }, { "epoch": 11.674, "grad_norm": 1.330123782157898, "learning_rate": 2e-05, "loss": 0.04579584, "step": 5837 }, { "epoch": 11.676, "grad_norm": 2.1729001998901367, "learning_rate": 2e-05, "loss": 0.04692475, "step": 5838 }, { "epoch": 11.678, "grad_norm": 1.612794041633606, "learning_rate": 2e-05, "loss": 0.04984231, "step": 5839 }, { "epoch": 11.68, "grad_norm": 1.5346806049346924, "learning_rate": 2e-05, "loss": 0.0444356, "step": 5840 }, { "epoch": 11.682, "grad_norm": 2.6650583744049072, "learning_rate": 2e-05, "loss": 0.03806249, "step": 5841 }, { "epoch": 11.684, "grad_norm": 2.433091402053833, "learning_rate": 2e-05, "loss": 0.04636319, "step": 5842 }, { "epoch": 11.686, "grad_norm": 1.2058982849121094, "learning_rate": 2e-05, "loss": 0.03539244, "step": 5843 }, { "epoch": 11.688, "grad_norm": 1.7369778156280518, "learning_rate": 2e-05, "loss": 0.02115437, "step": 5844 }, { "epoch": 11.69, "grad_norm": 1.296627163887024, "learning_rate": 2e-05, "loss": 0.04892417, "step": 5845 }, { "epoch": 11.692, "grad_norm": 5.1285505294799805, "learning_rate": 2e-05, "loss": 0.04282135, "step": 5846 }, { "epoch": 11.693999999999999, "grad_norm": 1.1598219871520996, "learning_rate": 2e-05, "loss": 0.04708207, "step": 5847 }, { "epoch": 11.696, "grad_norm": 1.3933168649673462, "learning_rate": 2e-05, "loss": 0.04899856, "step": 5848 }, { "epoch": 11.698, "grad_norm": 1.0900496244430542, "learning_rate": 2e-05, "loss": 0.04548903, "step": 5849 }, { "epoch": 11.7, "grad_norm": 1.5884132385253906, "learning_rate": 2e-05, "loss": 0.05401516, "step": 5850 }, { "epoch": 11.702, "grad_norm": 1.4112673997879028, "learning_rate": 2e-05, "loss": 0.04854234, "step": 5851 }, { "epoch": 11.704, "grad_norm": 1.7265757322311401, "learning_rate": 2e-05, "loss": 0.04547007, "step": 5852 }, { "epoch": 11.706, "grad_norm": 1.800355315208435, "learning_rate": 2e-05, "loss": 0.0444559, "step": 5853 }, { "epoch": 11.708, "grad_norm": 1.2425519227981567, "learning_rate": 2e-05, "loss": 0.0489036, "step": 5854 }, { "epoch": 11.71, "grad_norm": 1.3983635902404785, "learning_rate": 2e-05, "loss": 0.06094494, "step": 5855 }, { "epoch": 11.712, "grad_norm": 1.776833415031433, "learning_rate": 2e-05, "loss": 0.04494461, "step": 5856 }, { "epoch": 11.714, "grad_norm": 1.8587884902954102, "learning_rate": 2e-05, "loss": 0.05583946, "step": 5857 }, { "epoch": 11.716, "grad_norm": 0.9269976615905762, "learning_rate": 2e-05, "loss": 0.03599362, "step": 5858 }, { "epoch": 11.718, "grad_norm": 3.314462661743164, "learning_rate": 2e-05, "loss": 0.05175107, "step": 5859 }, { "epoch": 11.72, "grad_norm": 2.074948787689209, "learning_rate": 2e-05, "loss": 0.0477162, "step": 5860 }, { "epoch": 11.722, "grad_norm": 1.2079555988311768, "learning_rate": 2e-05, "loss": 0.04706313, "step": 5861 }, { "epoch": 11.724, "grad_norm": 1.2197182178497314, "learning_rate": 2e-05, "loss": 0.04215481, "step": 5862 }, { "epoch": 11.725999999999999, "grad_norm": 1.459205150604248, "learning_rate": 2e-05, "loss": 0.04611627, "step": 5863 }, { "epoch": 11.728, "grad_norm": 1.3245116472244263, "learning_rate": 2e-05, "loss": 0.03651111, "step": 5864 }, { "epoch": 11.73, "grad_norm": 1.2864437103271484, "learning_rate": 2e-05, "loss": 0.03707273, "step": 5865 }, { "epoch": 11.732, "grad_norm": 1.7712574005126953, "learning_rate": 2e-05, "loss": 0.03862024, "step": 5866 }, { "epoch": 11.734, "grad_norm": 1.3328123092651367, "learning_rate": 2e-05, "loss": 0.04902117, "step": 5867 }, { "epoch": 11.736, "grad_norm": 0.9071385860443115, "learning_rate": 2e-05, "loss": 0.03179049, "step": 5868 }, { "epoch": 11.738, "grad_norm": 1.687678337097168, "learning_rate": 2e-05, "loss": 0.05468391, "step": 5869 }, { "epoch": 11.74, "grad_norm": 1.3182744979858398, "learning_rate": 2e-05, "loss": 0.0389279, "step": 5870 }, { "epoch": 11.742, "grad_norm": 2.01157808303833, "learning_rate": 2e-05, "loss": 0.04066446, "step": 5871 }, { "epoch": 11.744, "grad_norm": 2.4911129474639893, "learning_rate": 2e-05, "loss": 0.05356855, "step": 5872 }, { "epoch": 11.746, "grad_norm": 1.2817695140838623, "learning_rate": 2e-05, "loss": 0.04035872, "step": 5873 }, { "epoch": 11.748, "grad_norm": 1.4072495698928833, "learning_rate": 2e-05, "loss": 0.0498979, "step": 5874 }, { "epoch": 11.75, "grad_norm": 1.038226842880249, "learning_rate": 2e-05, "loss": 0.03607921, "step": 5875 }, { "epoch": 11.752, "grad_norm": 1.4267641305923462, "learning_rate": 2e-05, "loss": 0.05678145, "step": 5876 }, { "epoch": 11.754, "grad_norm": 1.14197838306427, "learning_rate": 2e-05, "loss": 0.03851016, "step": 5877 }, { "epoch": 11.756, "grad_norm": 1.4494892358779907, "learning_rate": 2e-05, "loss": 0.05326318, "step": 5878 }, { "epoch": 11.758, "grad_norm": 1.3799394369125366, "learning_rate": 2e-05, "loss": 0.0385946, "step": 5879 }, { "epoch": 11.76, "grad_norm": 1.6518877744674683, "learning_rate": 2e-05, "loss": 0.05615582, "step": 5880 }, { "epoch": 11.762, "grad_norm": 1.195953369140625, "learning_rate": 2e-05, "loss": 0.04200421, "step": 5881 }, { "epoch": 11.764, "grad_norm": 1.305022954940796, "learning_rate": 2e-05, "loss": 0.04386101, "step": 5882 }, { "epoch": 11.766, "grad_norm": 1.1504292488098145, "learning_rate": 2e-05, "loss": 0.04042993, "step": 5883 }, { "epoch": 11.768, "grad_norm": 1.6102385520935059, "learning_rate": 2e-05, "loss": 0.06152549, "step": 5884 }, { "epoch": 11.77, "grad_norm": 1.7718031406402588, "learning_rate": 2e-05, "loss": 0.05244738, "step": 5885 }, { "epoch": 11.772, "grad_norm": 3.241689682006836, "learning_rate": 2e-05, "loss": 0.04334035, "step": 5886 }, { "epoch": 11.774000000000001, "grad_norm": 1.0956321954727173, "learning_rate": 2e-05, "loss": 0.04233655, "step": 5887 }, { "epoch": 11.776, "grad_norm": 1.1027512550354004, "learning_rate": 2e-05, "loss": 0.03636168, "step": 5888 }, { "epoch": 11.778, "grad_norm": 1.2787376642227173, "learning_rate": 2e-05, "loss": 0.03690924, "step": 5889 }, { "epoch": 11.78, "grad_norm": 1.6747596263885498, "learning_rate": 2e-05, "loss": 0.04811033, "step": 5890 }, { "epoch": 11.782, "grad_norm": 1.2329976558685303, "learning_rate": 2e-05, "loss": 0.03226365, "step": 5891 }, { "epoch": 11.784, "grad_norm": 1.083417534828186, "learning_rate": 2e-05, "loss": 0.04078288, "step": 5892 }, { "epoch": 11.786, "grad_norm": 1.7159775495529175, "learning_rate": 2e-05, "loss": 0.04270864, "step": 5893 }, { "epoch": 11.788, "grad_norm": 1.3681230545043945, "learning_rate": 2e-05, "loss": 0.05161403, "step": 5894 }, { "epoch": 11.79, "grad_norm": 1.1216397285461426, "learning_rate": 2e-05, "loss": 0.03790008, "step": 5895 }, { "epoch": 11.792, "grad_norm": 1.7793452739715576, "learning_rate": 2e-05, "loss": 0.0408999, "step": 5896 }, { "epoch": 11.794, "grad_norm": 1.4301302433013916, "learning_rate": 2e-05, "loss": 0.04504237, "step": 5897 }, { "epoch": 11.796, "grad_norm": 1.3446604013442993, "learning_rate": 2e-05, "loss": 0.0413551, "step": 5898 }, { "epoch": 11.798, "grad_norm": 2.334562301635742, "learning_rate": 2e-05, "loss": 0.06192225, "step": 5899 }, { "epoch": 11.8, "grad_norm": 1.7023171186447144, "learning_rate": 2e-05, "loss": 0.04500677, "step": 5900 }, { "epoch": 11.802, "grad_norm": 1.248199224472046, "learning_rate": 2e-05, "loss": 0.04344091, "step": 5901 }, { "epoch": 11.804, "grad_norm": 1.3822078704833984, "learning_rate": 2e-05, "loss": 0.04688212, "step": 5902 }, { "epoch": 11.806000000000001, "grad_norm": 0.9345322251319885, "learning_rate": 2e-05, "loss": 0.03401906, "step": 5903 }, { "epoch": 11.808, "grad_norm": 1.076482892036438, "learning_rate": 2e-05, "loss": 0.04483665, "step": 5904 }, { "epoch": 11.81, "grad_norm": 1.7193052768707275, "learning_rate": 2e-05, "loss": 0.03884068, "step": 5905 }, { "epoch": 11.812, "grad_norm": 0.9782913327217102, "learning_rate": 2e-05, "loss": 0.03313865, "step": 5906 }, { "epoch": 11.814, "grad_norm": 1.5988812446594238, "learning_rate": 2e-05, "loss": 0.05233993, "step": 5907 }, { "epoch": 11.816, "grad_norm": 1.1546082496643066, "learning_rate": 2e-05, "loss": 0.05231573, "step": 5908 }, { "epoch": 11.818, "grad_norm": 1.18368661403656, "learning_rate": 2e-05, "loss": 0.04046147, "step": 5909 }, { "epoch": 11.82, "grad_norm": 1.4646607637405396, "learning_rate": 2e-05, "loss": 0.03100903, "step": 5910 }, { "epoch": 11.822, "grad_norm": 3.1986961364746094, "learning_rate": 2e-05, "loss": 0.04680929, "step": 5911 }, { "epoch": 11.824, "grad_norm": 1.5939075946807861, "learning_rate": 2e-05, "loss": 0.04059362, "step": 5912 }, { "epoch": 11.826, "grad_norm": 1.1175503730773926, "learning_rate": 2e-05, "loss": 0.03984897, "step": 5913 }, { "epoch": 11.828, "grad_norm": 1.1095728874206543, "learning_rate": 2e-05, "loss": 0.03536475, "step": 5914 }, { "epoch": 11.83, "grad_norm": 1.659307599067688, "learning_rate": 2e-05, "loss": 0.05149513, "step": 5915 }, { "epoch": 11.832, "grad_norm": 0.8394457697868347, "learning_rate": 2e-05, "loss": 0.02840887, "step": 5916 }, { "epoch": 11.834, "grad_norm": 1.069577932357788, "learning_rate": 2e-05, "loss": 0.0445364, "step": 5917 }, { "epoch": 11.836, "grad_norm": 3.412733793258667, "learning_rate": 2e-05, "loss": 0.05429213, "step": 5918 }, { "epoch": 11.838, "grad_norm": 2.011697292327881, "learning_rate": 2e-05, "loss": 0.05257804, "step": 5919 }, { "epoch": 11.84, "grad_norm": 1.5065538883209229, "learning_rate": 2e-05, "loss": 0.05617066, "step": 5920 }, { "epoch": 11.842, "grad_norm": 2.288857936859131, "learning_rate": 2e-05, "loss": 0.04707617, "step": 5921 }, { "epoch": 11.844, "grad_norm": 1.6899369955062866, "learning_rate": 2e-05, "loss": 0.04805134, "step": 5922 }, { "epoch": 11.846, "grad_norm": 0.944807231426239, "learning_rate": 2e-05, "loss": 0.04408221, "step": 5923 }, { "epoch": 11.848, "grad_norm": 1.4940017461776733, "learning_rate": 2e-05, "loss": 0.0487989, "step": 5924 }, { "epoch": 11.85, "grad_norm": 1.6419658660888672, "learning_rate": 2e-05, "loss": 0.04975691, "step": 5925 }, { "epoch": 11.852, "grad_norm": 1.4386626482009888, "learning_rate": 2e-05, "loss": 0.03675223, "step": 5926 }, { "epoch": 11.854, "grad_norm": 1.765627384185791, "learning_rate": 2e-05, "loss": 0.0496269, "step": 5927 }, { "epoch": 11.856, "grad_norm": 1.2555255889892578, "learning_rate": 2e-05, "loss": 0.04655837, "step": 5928 }, { "epoch": 11.858, "grad_norm": 2.4727370738983154, "learning_rate": 2e-05, "loss": 0.05486517, "step": 5929 }, { "epoch": 11.86, "grad_norm": 1.261451005935669, "learning_rate": 2e-05, "loss": 0.04114101, "step": 5930 }, { "epoch": 11.862, "grad_norm": 2.8810231685638428, "learning_rate": 2e-05, "loss": 0.04777797, "step": 5931 }, { "epoch": 11.864, "grad_norm": 2.3288686275482178, "learning_rate": 2e-05, "loss": 0.07667743, "step": 5932 }, { "epoch": 11.866, "grad_norm": 0.927460789680481, "learning_rate": 2e-05, "loss": 0.0349483, "step": 5933 }, { "epoch": 11.868, "grad_norm": 2.6389753818511963, "learning_rate": 2e-05, "loss": 0.0387733, "step": 5934 }, { "epoch": 11.87, "grad_norm": 1.9262769222259521, "learning_rate": 2e-05, "loss": 0.03882652, "step": 5935 }, { "epoch": 11.872, "grad_norm": 1.4889483451843262, "learning_rate": 2e-05, "loss": 0.04895875, "step": 5936 }, { "epoch": 11.874, "grad_norm": 1.3835666179656982, "learning_rate": 2e-05, "loss": 0.03973237, "step": 5937 }, { "epoch": 11.876, "grad_norm": 1.1526397466659546, "learning_rate": 2e-05, "loss": 0.03734651, "step": 5938 }, { "epoch": 11.878, "grad_norm": 1.4369529485702515, "learning_rate": 2e-05, "loss": 0.05701316, "step": 5939 }, { "epoch": 11.88, "grad_norm": 1.1306132078170776, "learning_rate": 2e-05, "loss": 0.03524964, "step": 5940 }, { "epoch": 11.882, "grad_norm": 1.1779828071594238, "learning_rate": 2e-05, "loss": 0.04080834, "step": 5941 }, { "epoch": 11.884, "grad_norm": 1.2661964893341064, "learning_rate": 2e-05, "loss": 0.05066068, "step": 5942 }, { "epoch": 11.886, "grad_norm": 1.1694908142089844, "learning_rate": 2e-05, "loss": 0.04653336, "step": 5943 }, { "epoch": 11.888, "grad_norm": 1.3221540451049805, "learning_rate": 2e-05, "loss": 0.04501926, "step": 5944 }, { "epoch": 11.89, "grad_norm": 1.6717414855957031, "learning_rate": 2e-05, "loss": 0.05765069, "step": 5945 }, { "epoch": 11.892, "grad_norm": 0.9409058690071106, "learning_rate": 2e-05, "loss": 0.03837239, "step": 5946 }, { "epoch": 11.894, "grad_norm": 1.0491093397140503, "learning_rate": 2e-05, "loss": 0.03510956, "step": 5947 }, { "epoch": 11.896, "grad_norm": 2.4397635459899902, "learning_rate": 2e-05, "loss": 0.05249226, "step": 5948 }, { "epoch": 11.898, "grad_norm": 1.4486075639724731, "learning_rate": 2e-05, "loss": 0.037448, "step": 5949 }, { "epoch": 11.9, "grad_norm": 1.1721850633621216, "learning_rate": 2e-05, "loss": 0.047097, "step": 5950 }, { "epoch": 11.902, "grad_norm": 1.055625557899475, "learning_rate": 2e-05, "loss": 0.04133175, "step": 5951 }, { "epoch": 11.904, "grad_norm": 1.2297016382217407, "learning_rate": 2e-05, "loss": 0.05778246, "step": 5952 }, { "epoch": 11.906, "grad_norm": 1.5838181972503662, "learning_rate": 2e-05, "loss": 0.04979094, "step": 5953 }, { "epoch": 11.908, "grad_norm": 1.2497130632400513, "learning_rate": 2e-05, "loss": 0.03473622, "step": 5954 }, { "epoch": 11.91, "grad_norm": 1.406507968902588, "learning_rate": 2e-05, "loss": 0.03643037, "step": 5955 }, { "epoch": 11.912, "grad_norm": 1.3133835792541504, "learning_rate": 2e-05, "loss": 0.0457564, "step": 5956 }, { "epoch": 11.914, "grad_norm": 1.4426908493041992, "learning_rate": 2e-05, "loss": 0.04277427, "step": 5957 }, { "epoch": 11.916, "grad_norm": 1.277441143989563, "learning_rate": 2e-05, "loss": 0.04180881, "step": 5958 }, { "epoch": 11.918, "grad_norm": 1.6538094282150269, "learning_rate": 2e-05, "loss": 0.04057349, "step": 5959 }, { "epoch": 11.92, "grad_norm": 1.3794416189193726, "learning_rate": 2e-05, "loss": 0.04414704, "step": 5960 }, { "epoch": 11.922, "grad_norm": 1.548293113708496, "learning_rate": 2e-05, "loss": 0.04472745, "step": 5961 }, { "epoch": 11.924, "grad_norm": 1.510328769683838, "learning_rate": 2e-05, "loss": 0.05884958, "step": 5962 }, { "epoch": 11.926, "grad_norm": 1.078235387802124, "learning_rate": 2e-05, "loss": 0.05215491, "step": 5963 }, { "epoch": 11.928, "grad_norm": 1.3813450336456299, "learning_rate": 2e-05, "loss": 0.03494862, "step": 5964 }, { "epoch": 11.93, "grad_norm": 1.258335828781128, "learning_rate": 2e-05, "loss": 0.04460224, "step": 5965 }, { "epoch": 11.932, "grad_norm": 1.7897981405258179, "learning_rate": 2e-05, "loss": 0.04800592, "step": 5966 }, { "epoch": 11.934, "grad_norm": 2.017218589782715, "learning_rate": 2e-05, "loss": 0.04859088, "step": 5967 }, { "epoch": 11.936, "grad_norm": 1.3227336406707764, "learning_rate": 2e-05, "loss": 0.04311728, "step": 5968 }, { "epoch": 11.938, "grad_norm": 1.1408333778381348, "learning_rate": 2e-05, "loss": 0.04529904, "step": 5969 }, { "epoch": 11.94, "grad_norm": 1.353716492652893, "learning_rate": 2e-05, "loss": 0.05408144, "step": 5970 }, { "epoch": 11.942, "grad_norm": 1.2190511226654053, "learning_rate": 2e-05, "loss": 0.03755567, "step": 5971 }, { "epoch": 11.943999999999999, "grad_norm": 1.8575340509414673, "learning_rate": 2e-05, "loss": 0.05075154, "step": 5972 }, { "epoch": 11.946, "grad_norm": 1.8959110975265503, "learning_rate": 2e-05, "loss": 0.05841636, "step": 5973 }, { "epoch": 11.948, "grad_norm": 1.2255719900131226, "learning_rate": 2e-05, "loss": 0.05134135, "step": 5974 }, { "epoch": 11.95, "grad_norm": 1.5601109266281128, "learning_rate": 2e-05, "loss": 0.04558882, "step": 5975 }, { "epoch": 11.952, "grad_norm": 1.4387339353561401, "learning_rate": 2e-05, "loss": 0.04524351, "step": 5976 }, { "epoch": 11.954, "grad_norm": 1.1376765966415405, "learning_rate": 2e-05, "loss": 0.04328175, "step": 5977 }, { "epoch": 11.956, "grad_norm": 1.1916512250900269, "learning_rate": 2e-05, "loss": 0.03925745, "step": 5978 }, { "epoch": 11.958, "grad_norm": 1.4428365230560303, "learning_rate": 2e-05, "loss": 0.0398249, "step": 5979 }, { "epoch": 11.96, "grad_norm": 1.3560397624969482, "learning_rate": 2e-05, "loss": 0.0461193, "step": 5980 }, { "epoch": 11.962, "grad_norm": 1.9162678718566895, "learning_rate": 2e-05, "loss": 0.0548541, "step": 5981 }, { "epoch": 11.964, "grad_norm": 3.61601185798645, "learning_rate": 2e-05, "loss": 0.05243468, "step": 5982 }, { "epoch": 11.966, "grad_norm": 1.3847134113311768, "learning_rate": 2e-05, "loss": 0.03628412, "step": 5983 }, { "epoch": 11.968, "grad_norm": 1.372618317604065, "learning_rate": 2e-05, "loss": 0.038436, "step": 5984 }, { "epoch": 11.97, "grad_norm": 1.0420581102371216, "learning_rate": 2e-05, "loss": 0.0371501, "step": 5985 }, { "epoch": 11.972, "grad_norm": 1.5138499736785889, "learning_rate": 2e-05, "loss": 0.03636958, "step": 5986 }, { "epoch": 11.974, "grad_norm": 1.0536664724349976, "learning_rate": 2e-05, "loss": 0.039578, "step": 5987 }, { "epoch": 11.975999999999999, "grad_norm": 1.248975157737732, "learning_rate": 2e-05, "loss": 0.04887934, "step": 5988 }, { "epoch": 11.978, "grad_norm": 0.9375743865966797, "learning_rate": 2e-05, "loss": 0.0324719, "step": 5989 }, { "epoch": 11.98, "grad_norm": 1.6238600015640259, "learning_rate": 2e-05, "loss": 0.06100935, "step": 5990 }, { "epoch": 11.982, "grad_norm": 0.9849083423614502, "learning_rate": 2e-05, "loss": 0.03539266, "step": 5991 }, { "epoch": 11.984, "grad_norm": 1.7816842794418335, "learning_rate": 2e-05, "loss": 0.04868245, "step": 5992 }, { "epoch": 11.986, "grad_norm": 1.495138168334961, "learning_rate": 2e-05, "loss": 0.04544922, "step": 5993 }, { "epoch": 11.988, "grad_norm": 1.3168374300003052, "learning_rate": 2e-05, "loss": 0.04699538, "step": 5994 }, { "epoch": 11.99, "grad_norm": 3.24379825592041, "learning_rate": 2e-05, "loss": 0.05233608, "step": 5995 }, { "epoch": 11.992, "grad_norm": 2.7409520149230957, "learning_rate": 2e-05, "loss": 0.0347956, "step": 5996 }, { "epoch": 11.994, "grad_norm": 1.1119478940963745, "learning_rate": 2e-05, "loss": 0.03495222, "step": 5997 }, { "epoch": 11.996, "grad_norm": 0.9931169152259827, "learning_rate": 2e-05, "loss": 0.03586205, "step": 5998 }, { "epoch": 11.998, "grad_norm": 1.299414873123169, "learning_rate": 2e-05, "loss": 0.04897714, "step": 5999 }, { "epoch": 12.0, "grad_norm": 1.1672351360321045, "learning_rate": 2e-05, "loss": 0.03338836, "step": 6000 }, { "epoch": 12.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9680638722554891, "Equal_1": 0.988, "Equal_2": 0.9560878243512974, "Equal_3": 0.8483033932135728, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9860279441117764, "Parallel_1": 0.9819639278557114, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.97, "Perpendicular_1": 0.996, "Perpendicular_2": 0.972, "Perpendicular_3": 0.6933867735470942, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9996666666666667, "PointLiesOnCircle_3": 0.9852000000000001, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9600798403193613 }, "eval_runtime": 225.6484, "eval_samples_per_second": 46.533, "eval_steps_per_second": 0.931, "step": 6000 }, { "epoch": 12.002, "grad_norm": 2.0453622341156006, "learning_rate": 2e-05, "loss": 0.04507887, "step": 6001 }, { "epoch": 12.004, "grad_norm": 0.8297189474105835, "learning_rate": 2e-05, "loss": 0.03419372, "step": 6002 }, { "epoch": 12.006, "grad_norm": 1.8643795251846313, "learning_rate": 2e-05, "loss": 0.05062669, "step": 6003 }, { "epoch": 12.008, "grad_norm": 1.3948396444320679, "learning_rate": 2e-05, "loss": 0.04684274, "step": 6004 }, { "epoch": 12.01, "grad_norm": 1.309333324432373, "learning_rate": 2e-05, "loss": 0.04394363, "step": 6005 }, { "epoch": 12.012, "grad_norm": 1.6416828632354736, "learning_rate": 2e-05, "loss": 0.04113109, "step": 6006 }, { "epoch": 12.014, "grad_norm": 0.9614999890327454, "learning_rate": 2e-05, "loss": 0.03297891, "step": 6007 }, { "epoch": 12.016, "grad_norm": 1.7547329664230347, "learning_rate": 2e-05, "loss": 0.06269316, "step": 6008 }, { "epoch": 12.018, "grad_norm": 1.0744808912277222, "learning_rate": 2e-05, "loss": 0.04511803, "step": 6009 }, { "epoch": 12.02, "grad_norm": 2.983142852783203, "learning_rate": 2e-05, "loss": 0.05942716, "step": 6010 }, { "epoch": 12.022, "grad_norm": 1.2435990571975708, "learning_rate": 2e-05, "loss": 0.03727053, "step": 6011 }, { "epoch": 12.024, "grad_norm": 1.369827389717102, "learning_rate": 2e-05, "loss": 0.03771032, "step": 6012 }, { "epoch": 12.026, "grad_norm": 1.1270586252212524, "learning_rate": 2e-05, "loss": 0.04644886, "step": 6013 }, { "epoch": 12.028, "grad_norm": 1.5865355730056763, "learning_rate": 2e-05, "loss": 0.04245877, "step": 6014 }, { "epoch": 12.03, "grad_norm": 2.0549163818359375, "learning_rate": 2e-05, "loss": 0.05613353, "step": 6015 }, { "epoch": 12.032, "grad_norm": 1.069564700126648, "learning_rate": 2e-05, "loss": 0.03502301, "step": 6016 }, { "epoch": 12.034, "grad_norm": 1.3907562494277954, "learning_rate": 2e-05, "loss": 0.03715889, "step": 6017 }, { "epoch": 12.036, "grad_norm": 1.2994953393936157, "learning_rate": 2e-05, "loss": 0.03785795, "step": 6018 }, { "epoch": 12.038, "grad_norm": 1.5148688554763794, "learning_rate": 2e-05, "loss": 0.04343065, "step": 6019 }, { "epoch": 12.04, "grad_norm": 2.5704307556152344, "learning_rate": 2e-05, "loss": 0.03235781, "step": 6020 }, { "epoch": 12.042, "grad_norm": 2.1970255374908447, "learning_rate": 2e-05, "loss": 0.05431473, "step": 6021 }, { "epoch": 12.044, "grad_norm": 1.2460700273513794, "learning_rate": 2e-05, "loss": 0.04172162, "step": 6022 }, { "epoch": 12.046, "grad_norm": 1.2687596082687378, "learning_rate": 2e-05, "loss": 0.04956955, "step": 6023 }, { "epoch": 12.048, "grad_norm": 1.360154390335083, "learning_rate": 2e-05, "loss": 0.04256953, "step": 6024 }, { "epoch": 12.05, "grad_norm": 1.322157382965088, "learning_rate": 2e-05, "loss": 0.05848314, "step": 6025 }, { "epoch": 12.052, "grad_norm": 2.0436723232269287, "learning_rate": 2e-05, "loss": 0.04991043, "step": 6026 }, { "epoch": 12.054, "grad_norm": 2.6245839595794678, "learning_rate": 2e-05, "loss": 0.0496827, "step": 6027 }, { "epoch": 12.056, "grad_norm": 1.3887856006622314, "learning_rate": 2e-05, "loss": 0.03522222, "step": 6028 }, { "epoch": 12.058, "grad_norm": 0.8878000378608704, "learning_rate": 2e-05, "loss": 0.02167447, "step": 6029 }, { "epoch": 12.06, "grad_norm": 1.1820523738861084, "learning_rate": 2e-05, "loss": 0.0406768, "step": 6030 }, { "epoch": 12.062, "grad_norm": 1.1053889989852905, "learning_rate": 2e-05, "loss": 0.03722425, "step": 6031 }, { "epoch": 12.064, "grad_norm": 1.1686859130859375, "learning_rate": 2e-05, "loss": 0.03907032, "step": 6032 }, { "epoch": 12.066, "grad_norm": 1.60000479221344, "learning_rate": 2e-05, "loss": 0.04424149, "step": 6033 }, { "epoch": 12.068, "grad_norm": 1.2649625539779663, "learning_rate": 2e-05, "loss": 0.03162839, "step": 6034 }, { "epoch": 12.07, "grad_norm": 1.7437782287597656, "learning_rate": 2e-05, "loss": 0.03457415, "step": 6035 }, { "epoch": 12.072, "grad_norm": 1.6281063556671143, "learning_rate": 2e-05, "loss": 0.05079435, "step": 6036 }, { "epoch": 12.074, "grad_norm": 1.362357497215271, "learning_rate": 2e-05, "loss": 0.04787667, "step": 6037 }, { "epoch": 12.076, "grad_norm": 2.384413242340088, "learning_rate": 2e-05, "loss": 0.05078615, "step": 6038 }, { "epoch": 12.078, "grad_norm": 1.2616544961929321, "learning_rate": 2e-05, "loss": 0.03411825, "step": 6039 }, { "epoch": 12.08, "grad_norm": 0.858539879322052, "learning_rate": 2e-05, "loss": 0.03019105, "step": 6040 }, { "epoch": 12.082, "grad_norm": 1.0017988681793213, "learning_rate": 2e-05, "loss": 0.03147456, "step": 6041 }, { "epoch": 12.084, "grad_norm": 1.0954729318618774, "learning_rate": 2e-05, "loss": 0.040604, "step": 6042 }, { "epoch": 12.086, "grad_norm": 3.614245653152466, "learning_rate": 2e-05, "loss": 0.05858262, "step": 6043 }, { "epoch": 12.088, "grad_norm": 1.083434820175171, "learning_rate": 2e-05, "loss": 0.04104295, "step": 6044 }, { "epoch": 12.09, "grad_norm": 1.8069895505905151, "learning_rate": 2e-05, "loss": 0.04294836, "step": 6045 }, { "epoch": 12.092, "grad_norm": 1.68820059299469, "learning_rate": 2e-05, "loss": 0.04799364, "step": 6046 }, { "epoch": 12.094, "grad_norm": 1.4438908100128174, "learning_rate": 2e-05, "loss": 0.05935677, "step": 6047 }, { "epoch": 12.096, "grad_norm": 1.881434440612793, "learning_rate": 2e-05, "loss": 0.03925735, "step": 6048 }, { "epoch": 12.098, "grad_norm": 1.227565050125122, "learning_rate": 2e-05, "loss": 0.04155404, "step": 6049 }, { "epoch": 12.1, "grad_norm": 2.288317918777466, "learning_rate": 2e-05, "loss": 0.05610761, "step": 6050 }, { "epoch": 12.102, "grad_norm": 1.0111768245697021, "learning_rate": 2e-05, "loss": 0.04479214, "step": 6051 }, { "epoch": 12.104, "grad_norm": 1.3721191883087158, "learning_rate": 2e-05, "loss": 0.04916908, "step": 6052 }, { "epoch": 12.106, "grad_norm": 2.9596445560455322, "learning_rate": 2e-05, "loss": 0.05736539, "step": 6053 }, { "epoch": 12.108, "grad_norm": 0.9496150016784668, "learning_rate": 2e-05, "loss": 0.03842902, "step": 6054 }, { "epoch": 12.11, "grad_norm": 0.9739997982978821, "learning_rate": 2e-05, "loss": 0.0420731, "step": 6055 }, { "epoch": 12.112, "grad_norm": 1.439457654953003, "learning_rate": 2e-05, "loss": 0.05006482, "step": 6056 }, { "epoch": 12.114, "grad_norm": 1.2109313011169434, "learning_rate": 2e-05, "loss": 0.03639213, "step": 6057 }, { "epoch": 12.116, "grad_norm": 1.3525007963180542, "learning_rate": 2e-05, "loss": 0.03997933, "step": 6058 }, { "epoch": 12.118, "grad_norm": 1.0247251987457275, "learning_rate": 2e-05, "loss": 0.03942738, "step": 6059 }, { "epoch": 12.12, "grad_norm": 1.8313347101211548, "learning_rate": 2e-05, "loss": 0.04688803, "step": 6060 }, { "epoch": 12.122, "grad_norm": 1.1015832424163818, "learning_rate": 2e-05, "loss": 0.0323557, "step": 6061 }, { "epoch": 12.124, "grad_norm": 1.340634822845459, "learning_rate": 2e-05, "loss": 0.05443436, "step": 6062 }, { "epoch": 12.126, "grad_norm": 1.7199428081512451, "learning_rate": 2e-05, "loss": 0.04185236, "step": 6063 }, { "epoch": 12.128, "grad_norm": 2.7263031005859375, "learning_rate": 2e-05, "loss": 0.03960133, "step": 6064 }, { "epoch": 12.13, "grad_norm": 1.9453492164611816, "learning_rate": 2e-05, "loss": 0.07152042, "step": 6065 }, { "epoch": 12.132, "grad_norm": 0.9302512407302856, "learning_rate": 2e-05, "loss": 0.03548937, "step": 6066 }, { "epoch": 12.134, "grad_norm": 1.4986331462860107, "learning_rate": 2e-05, "loss": 0.05728586, "step": 6067 }, { "epoch": 12.136, "grad_norm": 1.277394413948059, "learning_rate": 2e-05, "loss": 0.03712662, "step": 6068 }, { "epoch": 12.138, "grad_norm": 1.2260833978652954, "learning_rate": 2e-05, "loss": 0.04641536, "step": 6069 }, { "epoch": 12.14, "grad_norm": 2.1273744106292725, "learning_rate": 2e-05, "loss": 0.06871526, "step": 6070 }, { "epoch": 12.142, "grad_norm": 1.852168083190918, "learning_rate": 2e-05, "loss": 0.05976813, "step": 6071 }, { "epoch": 12.144, "grad_norm": 0.8380770683288574, "learning_rate": 2e-05, "loss": 0.0299613, "step": 6072 }, { "epoch": 12.146, "grad_norm": 1.66663658618927, "learning_rate": 2e-05, "loss": 0.05329958, "step": 6073 }, { "epoch": 12.148, "grad_norm": 1.6469478607177734, "learning_rate": 2e-05, "loss": 0.04227874, "step": 6074 }, { "epoch": 12.15, "grad_norm": 0.9962689280509949, "learning_rate": 2e-05, "loss": 0.03000914, "step": 6075 }, { "epoch": 12.152, "grad_norm": 1.282414197921753, "learning_rate": 2e-05, "loss": 0.06534447, "step": 6076 }, { "epoch": 12.154, "grad_norm": 1.0238263607025146, "learning_rate": 2e-05, "loss": 0.03109509, "step": 6077 }, { "epoch": 12.156, "grad_norm": 0.899166464805603, "learning_rate": 2e-05, "loss": 0.03665204, "step": 6078 }, { "epoch": 12.158, "grad_norm": 1.2855536937713623, "learning_rate": 2e-05, "loss": 0.03677532, "step": 6079 }, { "epoch": 12.16, "grad_norm": 1.6172006130218506, "learning_rate": 2e-05, "loss": 0.04748918, "step": 6080 }, { "epoch": 12.162, "grad_norm": 1.1100597381591797, "learning_rate": 2e-05, "loss": 0.04465011, "step": 6081 }, { "epoch": 12.164, "grad_norm": 1.2116100788116455, "learning_rate": 2e-05, "loss": 0.04209627, "step": 6082 }, { "epoch": 12.166, "grad_norm": 1.2083666324615479, "learning_rate": 2e-05, "loss": 0.03399335, "step": 6083 }, { "epoch": 12.168, "grad_norm": 1.3793971538543701, "learning_rate": 2e-05, "loss": 0.03864294, "step": 6084 }, { "epoch": 12.17, "grad_norm": 0.7666075825691223, "learning_rate": 2e-05, "loss": 0.02478723, "step": 6085 }, { "epoch": 12.172, "grad_norm": 1.897551417350769, "learning_rate": 2e-05, "loss": 0.06243557, "step": 6086 }, { "epoch": 12.174, "grad_norm": 1.231453776359558, "learning_rate": 2e-05, "loss": 0.05378513, "step": 6087 }, { "epoch": 12.176, "grad_norm": 1.3341623544692993, "learning_rate": 2e-05, "loss": 0.04234018, "step": 6088 }, { "epoch": 12.178, "grad_norm": 1.2109911441802979, "learning_rate": 2e-05, "loss": 0.02992423, "step": 6089 }, { "epoch": 12.18, "grad_norm": 1.7490544319152832, "learning_rate": 2e-05, "loss": 0.04035283, "step": 6090 }, { "epoch": 12.182, "grad_norm": 1.1450220346450806, "learning_rate": 2e-05, "loss": 0.04675542, "step": 6091 }, { "epoch": 12.184, "grad_norm": 1.1862246990203857, "learning_rate": 2e-05, "loss": 0.03695572, "step": 6092 }, { "epoch": 12.186, "grad_norm": 1.285001277923584, "learning_rate": 2e-05, "loss": 0.03316811, "step": 6093 }, { "epoch": 12.188, "grad_norm": 1.471510648727417, "learning_rate": 2e-05, "loss": 0.03820515, "step": 6094 }, { "epoch": 12.19, "grad_norm": 1.1149863004684448, "learning_rate": 2e-05, "loss": 0.03308522, "step": 6095 }, { "epoch": 12.192, "grad_norm": 1.3655328750610352, "learning_rate": 2e-05, "loss": 0.03735257, "step": 6096 }, { "epoch": 12.194, "grad_norm": 1.31796395778656, "learning_rate": 2e-05, "loss": 0.0412109, "step": 6097 }, { "epoch": 12.196, "grad_norm": 1.1840546131134033, "learning_rate": 2e-05, "loss": 0.03839169, "step": 6098 }, { "epoch": 12.198, "grad_norm": 4.447031021118164, "learning_rate": 2e-05, "loss": 0.04460757, "step": 6099 }, { "epoch": 12.2, "grad_norm": 1.3523399829864502, "learning_rate": 2e-05, "loss": 0.04808817, "step": 6100 }, { "epoch": 12.202, "grad_norm": 1.2691853046417236, "learning_rate": 2e-05, "loss": 0.04533077, "step": 6101 }, { "epoch": 12.204, "grad_norm": 1.6675738096237183, "learning_rate": 2e-05, "loss": 0.04889767, "step": 6102 }, { "epoch": 12.206, "grad_norm": 2.679682970046997, "learning_rate": 2e-05, "loss": 0.0485518, "step": 6103 }, { "epoch": 12.208, "grad_norm": 1.445830225944519, "learning_rate": 2e-05, "loss": 0.04278787, "step": 6104 }, { "epoch": 12.21, "grad_norm": 1.654105544090271, "learning_rate": 2e-05, "loss": 0.04053675, "step": 6105 }, { "epoch": 12.212, "grad_norm": 1.3877894878387451, "learning_rate": 2e-05, "loss": 0.04957257, "step": 6106 }, { "epoch": 12.214, "grad_norm": 2.349825382232666, "learning_rate": 2e-05, "loss": 0.04618423, "step": 6107 }, { "epoch": 12.216, "grad_norm": 1.063262701034546, "learning_rate": 2e-05, "loss": 0.04388535, "step": 6108 }, { "epoch": 12.218, "grad_norm": 1.0035594701766968, "learning_rate": 2e-05, "loss": 0.03710927, "step": 6109 }, { "epoch": 12.22, "grad_norm": 1.5889099836349487, "learning_rate": 2e-05, "loss": 0.05852876, "step": 6110 }, { "epoch": 12.222, "grad_norm": 1.0829167366027832, "learning_rate": 2e-05, "loss": 0.04417733, "step": 6111 }, { "epoch": 12.224, "grad_norm": 1.2925286293029785, "learning_rate": 2e-05, "loss": 0.05315527, "step": 6112 }, { "epoch": 12.226, "grad_norm": 1.2771823406219482, "learning_rate": 2e-05, "loss": 0.04743166, "step": 6113 }, { "epoch": 12.228, "grad_norm": 1.8031718730926514, "learning_rate": 2e-05, "loss": 0.05490491, "step": 6114 }, { "epoch": 12.23, "grad_norm": 4.757546424865723, "learning_rate": 2e-05, "loss": 0.04697193, "step": 6115 }, { "epoch": 12.232, "grad_norm": 2.589323043823242, "learning_rate": 2e-05, "loss": 0.05091906, "step": 6116 }, { "epoch": 12.234, "grad_norm": 1.147157907485962, "learning_rate": 2e-05, "loss": 0.0354594, "step": 6117 }, { "epoch": 12.236, "grad_norm": 1.1689702272415161, "learning_rate": 2e-05, "loss": 0.05225492, "step": 6118 }, { "epoch": 12.238, "grad_norm": 1.267418622970581, "learning_rate": 2e-05, "loss": 0.04183613, "step": 6119 }, { "epoch": 12.24, "grad_norm": 2.7306859493255615, "learning_rate": 2e-05, "loss": 0.04468532, "step": 6120 }, { "epoch": 12.242, "grad_norm": 2.179464340209961, "learning_rate": 2e-05, "loss": 0.05367554, "step": 6121 }, { "epoch": 12.244, "grad_norm": 1.5951850414276123, "learning_rate": 2e-05, "loss": 0.05392575, "step": 6122 }, { "epoch": 12.246, "grad_norm": 1.5338630676269531, "learning_rate": 2e-05, "loss": 0.05239145, "step": 6123 }, { "epoch": 12.248, "grad_norm": 1.4521045684814453, "learning_rate": 2e-05, "loss": 0.03477581, "step": 6124 }, { "epoch": 12.25, "grad_norm": 1.371685266494751, "learning_rate": 2e-05, "loss": 0.04997404, "step": 6125 }, { "epoch": 12.252, "grad_norm": 1.0525933504104614, "learning_rate": 2e-05, "loss": 0.04329906, "step": 6126 }, { "epoch": 12.254, "grad_norm": 1.3579634428024292, "learning_rate": 2e-05, "loss": 0.04377569, "step": 6127 }, { "epoch": 12.256, "grad_norm": 2.218615770339966, "learning_rate": 2e-05, "loss": 0.06167816, "step": 6128 }, { "epoch": 12.258, "grad_norm": 1.4037760496139526, "learning_rate": 2e-05, "loss": 0.05200637, "step": 6129 }, { "epoch": 12.26, "grad_norm": 1.197969913482666, "learning_rate": 2e-05, "loss": 0.03924961, "step": 6130 }, { "epoch": 12.262, "grad_norm": 1.3045804500579834, "learning_rate": 2e-05, "loss": 0.03886849, "step": 6131 }, { "epoch": 12.264, "grad_norm": 1.2083258628845215, "learning_rate": 2e-05, "loss": 0.03633629, "step": 6132 }, { "epoch": 12.266, "grad_norm": 1.3620266914367676, "learning_rate": 2e-05, "loss": 0.0390857, "step": 6133 }, { "epoch": 12.268, "grad_norm": 1.8047446012496948, "learning_rate": 2e-05, "loss": 0.03185136, "step": 6134 }, { "epoch": 12.27, "grad_norm": 1.9416303634643555, "learning_rate": 2e-05, "loss": 0.04050217, "step": 6135 }, { "epoch": 12.272, "grad_norm": 2.363163709640503, "learning_rate": 2e-05, "loss": 0.0334041, "step": 6136 }, { "epoch": 12.274000000000001, "grad_norm": 1.4767624139785767, "learning_rate": 2e-05, "loss": 0.04628345, "step": 6137 }, { "epoch": 12.276, "grad_norm": 1.6571887731552124, "learning_rate": 2e-05, "loss": 0.05242894, "step": 6138 }, { "epoch": 12.278, "grad_norm": 1.4333903789520264, "learning_rate": 2e-05, "loss": 0.04385048, "step": 6139 }, { "epoch": 12.28, "grad_norm": 1.884385108947754, "learning_rate": 2e-05, "loss": 0.04608832, "step": 6140 }, { "epoch": 12.282, "grad_norm": 1.454089641571045, "learning_rate": 2e-05, "loss": 0.04242826, "step": 6141 }, { "epoch": 12.284, "grad_norm": 1.570263147354126, "learning_rate": 2e-05, "loss": 0.05017062, "step": 6142 }, { "epoch": 12.286, "grad_norm": 1.0338143110275269, "learning_rate": 2e-05, "loss": 0.02951477, "step": 6143 }, { "epoch": 12.288, "grad_norm": 2.561279773712158, "learning_rate": 2e-05, "loss": 0.03623242, "step": 6144 }, { "epoch": 12.29, "grad_norm": 1.3254700899124146, "learning_rate": 2e-05, "loss": 0.03661431, "step": 6145 }, { "epoch": 12.292, "grad_norm": 1.1387964487075806, "learning_rate": 2e-05, "loss": 0.02429761, "step": 6146 }, { "epoch": 12.294, "grad_norm": 1.008934497833252, "learning_rate": 2e-05, "loss": 0.03456727, "step": 6147 }, { "epoch": 12.296, "grad_norm": 0.8425427079200745, "learning_rate": 2e-05, "loss": 0.03165461, "step": 6148 }, { "epoch": 12.298, "grad_norm": 3.0855016708374023, "learning_rate": 2e-05, "loss": 0.07248887, "step": 6149 }, { "epoch": 12.3, "grad_norm": 0.9261988997459412, "learning_rate": 2e-05, "loss": 0.0305062, "step": 6150 }, { "epoch": 12.302, "grad_norm": 3.3163230419158936, "learning_rate": 2e-05, "loss": 0.04309294, "step": 6151 }, { "epoch": 12.304, "grad_norm": 1.473903775215149, "learning_rate": 2e-05, "loss": 0.04795074, "step": 6152 }, { "epoch": 12.306, "grad_norm": 1.6115399599075317, "learning_rate": 2e-05, "loss": 0.03649607, "step": 6153 }, { "epoch": 12.308, "grad_norm": 1.3920986652374268, "learning_rate": 2e-05, "loss": 0.04761494, "step": 6154 }, { "epoch": 12.31, "grad_norm": 1.3106554746627808, "learning_rate": 2e-05, "loss": 0.04344271, "step": 6155 }, { "epoch": 12.312, "grad_norm": 1.2097766399383545, "learning_rate": 2e-05, "loss": 0.03139389, "step": 6156 }, { "epoch": 12.314, "grad_norm": 2.0887393951416016, "learning_rate": 2e-05, "loss": 0.07277094, "step": 6157 }, { "epoch": 12.316, "grad_norm": 0.9932165145874023, "learning_rate": 2e-05, "loss": 0.03150304, "step": 6158 }, { "epoch": 12.318, "grad_norm": 1.6363164186477661, "learning_rate": 2e-05, "loss": 0.03685419, "step": 6159 }, { "epoch": 12.32, "grad_norm": 0.9719448089599609, "learning_rate": 2e-05, "loss": 0.03005211, "step": 6160 }, { "epoch": 12.322, "grad_norm": 0.9449983835220337, "learning_rate": 2e-05, "loss": 0.03131802, "step": 6161 }, { "epoch": 12.324, "grad_norm": 1.4210776090621948, "learning_rate": 2e-05, "loss": 0.03870679, "step": 6162 }, { "epoch": 12.326, "grad_norm": 1.059273362159729, "learning_rate": 2e-05, "loss": 0.03339715, "step": 6163 }, { "epoch": 12.328, "grad_norm": 1.0459723472595215, "learning_rate": 2e-05, "loss": 0.04106949, "step": 6164 }, { "epoch": 12.33, "grad_norm": 1.4043748378753662, "learning_rate": 2e-05, "loss": 0.039837, "step": 6165 }, { "epoch": 12.332, "grad_norm": 1.4938982725143433, "learning_rate": 2e-05, "loss": 0.04977816, "step": 6166 }, { "epoch": 12.334, "grad_norm": 2.406876564025879, "learning_rate": 2e-05, "loss": 0.03850146, "step": 6167 }, { "epoch": 12.336, "grad_norm": 1.4718785285949707, "learning_rate": 2e-05, "loss": 0.03187497, "step": 6168 }, { "epoch": 12.338, "grad_norm": 1.2084425687789917, "learning_rate": 2e-05, "loss": 0.03397946, "step": 6169 }, { "epoch": 12.34, "grad_norm": 1.1077980995178223, "learning_rate": 2e-05, "loss": 0.03275523, "step": 6170 }, { "epoch": 12.342, "grad_norm": 3.988109588623047, "learning_rate": 2e-05, "loss": 0.06082713, "step": 6171 }, { "epoch": 12.344, "grad_norm": 0.8460855484008789, "learning_rate": 2e-05, "loss": 0.02551005, "step": 6172 }, { "epoch": 12.346, "grad_norm": 2.823441743850708, "learning_rate": 2e-05, "loss": 0.04445308, "step": 6173 }, { "epoch": 12.348, "grad_norm": 0.8289135694503784, "learning_rate": 2e-05, "loss": 0.02100476, "step": 6174 }, { "epoch": 12.35, "grad_norm": 1.150907278060913, "learning_rate": 2e-05, "loss": 0.04813028, "step": 6175 }, { "epoch": 12.352, "grad_norm": 0.9638398289680481, "learning_rate": 2e-05, "loss": 0.03116925, "step": 6176 }, { "epoch": 12.354, "grad_norm": 1.1859619617462158, "learning_rate": 2e-05, "loss": 0.03521572, "step": 6177 }, { "epoch": 12.356, "grad_norm": 3.075559377670288, "learning_rate": 2e-05, "loss": 0.04122707, "step": 6178 }, { "epoch": 12.358, "grad_norm": 1.1414211988449097, "learning_rate": 2e-05, "loss": 0.03379297, "step": 6179 }, { "epoch": 12.36, "grad_norm": 1.7353578805923462, "learning_rate": 2e-05, "loss": 0.03621262, "step": 6180 }, { "epoch": 12.362, "grad_norm": 1.3085752725601196, "learning_rate": 2e-05, "loss": 0.03368364, "step": 6181 }, { "epoch": 12.364, "grad_norm": 1.171000361442566, "learning_rate": 2e-05, "loss": 0.02848397, "step": 6182 }, { "epoch": 12.366, "grad_norm": 1.3344168663024902, "learning_rate": 2e-05, "loss": 0.03983635, "step": 6183 }, { "epoch": 12.368, "grad_norm": 2.224670886993408, "learning_rate": 2e-05, "loss": 0.04560218, "step": 6184 }, { "epoch": 12.37, "grad_norm": 4.642077922821045, "learning_rate": 2e-05, "loss": 0.02805091, "step": 6185 }, { "epoch": 12.372, "grad_norm": 1.235609531402588, "learning_rate": 2e-05, "loss": 0.03190771, "step": 6186 }, { "epoch": 12.374, "grad_norm": 1.417596697807312, "learning_rate": 2e-05, "loss": 0.03260474, "step": 6187 }, { "epoch": 12.376, "grad_norm": 1.6948504447937012, "learning_rate": 2e-05, "loss": 0.05413381, "step": 6188 }, { "epoch": 12.378, "grad_norm": 1.108695149421692, "learning_rate": 2e-05, "loss": 0.03963104, "step": 6189 }, { "epoch": 12.38, "grad_norm": 1.7198768854141235, "learning_rate": 2e-05, "loss": 0.04033287, "step": 6190 }, { "epoch": 12.382, "grad_norm": 1.8242746591567993, "learning_rate": 2e-05, "loss": 0.05406561, "step": 6191 }, { "epoch": 12.384, "grad_norm": 1.8407591581344604, "learning_rate": 2e-05, "loss": 0.04720781, "step": 6192 }, { "epoch": 12.386, "grad_norm": 1.0429465770721436, "learning_rate": 2e-05, "loss": 0.03631607, "step": 6193 }, { "epoch": 12.388, "grad_norm": 2.377769947052002, "learning_rate": 2e-05, "loss": 0.04056407, "step": 6194 }, { "epoch": 12.39, "grad_norm": 1.035057783126831, "learning_rate": 2e-05, "loss": 0.035265, "step": 6195 }, { "epoch": 12.392, "grad_norm": 2.417996406555176, "learning_rate": 2e-05, "loss": 0.04286297, "step": 6196 }, { "epoch": 12.394, "grad_norm": 1.0926456451416016, "learning_rate": 2e-05, "loss": 0.04381856, "step": 6197 }, { "epoch": 12.396, "grad_norm": 1.0534722805023193, "learning_rate": 2e-05, "loss": 0.03921672, "step": 6198 }, { "epoch": 12.398, "grad_norm": 2.308216094970703, "learning_rate": 2e-05, "loss": 0.04651362, "step": 6199 }, { "epoch": 12.4, "grad_norm": 1.5886707305908203, "learning_rate": 2e-05, "loss": 0.04697857, "step": 6200 }, { "epoch": 12.402, "grad_norm": 1.6171622276306152, "learning_rate": 2e-05, "loss": 0.04984737, "step": 6201 }, { "epoch": 12.404, "grad_norm": 0.8989342451095581, "learning_rate": 2e-05, "loss": 0.03235118, "step": 6202 }, { "epoch": 12.406, "grad_norm": 2.1757442951202393, "learning_rate": 2e-05, "loss": 0.0411982, "step": 6203 }, { "epoch": 12.408, "grad_norm": 1.5509450435638428, "learning_rate": 2e-05, "loss": 0.03985682, "step": 6204 }, { "epoch": 12.41, "grad_norm": 1.3055469989776611, "learning_rate": 2e-05, "loss": 0.03675168, "step": 6205 }, { "epoch": 12.412, "grad_norm": 0.9025886654853821, "learning_rate": 2e-05, "loss": 0.03087064, "step": 6206 }, { "epoch": 12.414, "grad_norm": 1.8405051231384277, "learning_rate": 2e-05, "loss": 0.04755569, "step": 6207 }, { "epoch": 12.416, "grad_norm": 2.5023672580718994, "learning_rate": 2e-05, "loss": 0.05869409, "step": 6208 }, { "epoch": 12.418, "grad_norm": 2.242622137069702, "learning_rate": 2e-05, "loss": 0.06333459, "step": 6209 }, { "epoch": 12.42, "grad_norm": 1.1513686180114746, "learning_rate": 2e-05, "loss": 0.0378928, "step": 6210 }, { "epoch": 12.422, "grad_norm": 1.3134626150131226, "learning_rate": 2e-05, "loss": 0.03788881, "step": 6211 }, { "epoch": 12.424, "grad_norm": 1.3897289037704468, "learning_rate": 2e-05, "loss": 0.03820506, "step": 6212 }, { "epoch": 12.426, "grad_norm": 1.3690463304519653, "learning_rate": 2e-05, "loss": 0.0315478, "step": 6213 }, { "epoch": 12.428, "grad_norm": 1.857747197151184, "learning_rate": 2e-05, "loss": 0.04571281, "step": 6214 }, { "epoch": 12.43, "grad_norm": 0.9337007999420166, "learning_rate": 2e-05, "loss": 0.03620454, "step": 6215 }, { "epoch": 12.432, "grad_norm": 1.9891836643218994, "learning_rate": 2e-05, "loss": 0.04186926, "step": 6216 }, { "epoch": 12.434, "grad_norm": 1.1559808254241943, "learning_rate": 2e-05, "loss": 0.04334553, "step": 6217 }, { "epoch": 12.436, "grad_norm": 1.4083528518676758, "learning_rate": 2e-05, "loss": 0.06200282, "step": 6218 }, { "epoch": 12.438, "grad_norm": 1.0316534042358398, "learning_rate": 2e-05, "loss": 0.03102511, "step": 6219 }, { "epoch": 12.44, "grad_norm": 1.3623266220092773, "learning_rate": 2e-05, "loss": 0.03596399, "step": 6220 }, { "epoch": 12.442, "grad_norm": 1.1606459617614746, "learning_rate": 2e-05, "loss": 0.03946595, "step": 6221 }, { "epoch": 12.444, "grad_norm": 1.418427586555481, "learning_rate": 2e-05, "loss": 0.0394343, "step": 6222 }, { "epoch": 12.446, "grad_norm": 0.856622040271759, "learning_rate": 2e-05, "loss": 0.02308959, "step": 6223 }, { "epoch": 12.448, "grad_norm": 1.5719470977783203, "learning_rate": 2e-05, "loss": 0.05477959, "step": 6224 }, { "epoch": 12.45, "grad_norm": 1.8131663799285889, "learning_rate": 2e-05, "loss": 0.06788234, "step": 6225 }, { "epoch": 12.452, "grad_norm": 1.6468989849090576, "learning_rate": 2e-05, "loss": 0.05444904, "step": 6226 }, { "epoch": 12.454, "grad_norm": 1.105216145515442, "learning_rate": 2e-05, "loss": 0.03612129, "step": 6227 }, { "epoch": 12.456, "grad_norm": 1.5104936361312866, "learning_rate": 2e-05, "loss": 0.04039486, "step": 6228 }, { "epoch": 12.458, "grad_norm": 1.7759315967559814, "learning_rate": 2e-05, "loss": 0.04631813, "step": 6229 }, { "epoch": 12.46, "grad_norm": 1.0398575067520142, "learning_rate": 2e-05, "loss": 0.03344077, "step": 6230 }, { "epoch": 12.462, "grad_norm": 1.8576164245605469, "learning_rate": 2e-05, "loss": 0.05409398, "step": 6231 }, { "epoch": 12.464, "grad_norm": 1.6747236251831055, "learning_rate": 2e-05, "loss": 0.04340385, "step": 6232 }, { "epoch": 12.466, "grad_norm": 1.3914557695388794, "learning_rate": 2e-05, "loss": 0.05315351, "step": 6233 }, { "epoch": 12.468, "grad_norm": 1.43662428855896, "learning_rate": 2e-05, "loss": 0.05425394, "step": 6234 }, { "epoch": 12.47, "grad_norm": 2.898132085800171, "learning_rate": 2e-05, "loss": 0.04620753, "step": 6235 }, { "epoch": 12.472, "grad_norm": 1.788785457611084, "learning_rate": 2e-05, "loss": 0.0689465, "step": 6236 }, { "epoch": 12.474, "grad_norm": 1.4933058023452759, "learning_rate": 2e-05, "loss": 0.04538993, "step": 6237 }, { "epoch": 12.475999999999999, "grad_norm": 1.1536328792572021, "learning_rate": 2e-05, "loss": 0.04329805, "step": 6238 }, { "epoch": 12.478, "grad_norm": 1.2392916679382324, "learning_rate": 2e-05, "loss": 0.055818, "step": 6239 }, { "epoch": 12.48, "grad_norm": 1.1834752559661865, "learning_rate": 2e-05, "loss": 0.04977222, "step": 6240 }, { "epoch": 12.482, "grad_norm": 1.8389145135879517, "learning_rate": 2e-05, "loss": 0.04229666, "step": 6241 }, { "epoch": 12.484, "grad_norm": 2.0700206756591797, "learning_rate": 2e-05, "loss": 0.04690216, "step": 6242 }, { "epoch": 12.486, "grad_norm": 1.3595695495605469, "learning_rate": 2e-05, "loss": 0.03846291, "step": 6243 }, { "epoch": 12.488, "grad_norm": 0.8740466237068176, "learning_rate": 2e-05, "loss": 0.03391623, "step": 6244 }, { "epoch": 12.49, "grad_norm": 1.2904584407806396, "learning_rate": 2e-05, "loss": 0.05347811, "step": 6245 }, { "epoch": 12.492, "grad_norm": 1.3550524711608887, "learning_rate": 2e-05, "loss": 0.04715247, "step": 6246 }, { "epoch": 12.494, "grad_norm": 1.4273346662521362, "learning_rate": 2e-05, "loss": 0.05306143, "step": 6247 }, { "epoch": 12.496, "grad_norm": 1.726851224899292, "learning_rate": 2e-05, "loss": 0.03953635, "step": 6248 }, { "epoch": 12.498, "grad_norm": 1.6622463464736938, "learning_rate": 2e-05, "loss": 0.05597632, "step": 6249 }, { "epoch": 12.5, "grad_norm": 1.1400461196899414, "learning_rate": 2e-05, "loss": 0.04303446, "step": 6250 }, { "epoch": 12.502, "grad_norm": 1.068807601928711, "learning_rate": 2e-05, "loss": 0.04233329, "step": 6251 }, { "epoch": 12.504, "grad_norm": 1.1722207069396973, "learning_rate": 2e-05, "loss": 0.04468241, "step": 6252 }, { "epoch": 12.506, "grad_norm": 1.2234675884246826, "learning_rate": 2e-05, "loss": 0.0336486, "step": 6253 }, { "epoch": 12.508, "grad_norm": 1.9324644804000854, "learning_rate": 2e-05, "loss": 0.0546665, "step": 6254 }, { "epoch": 12.51, "grad_norm": 1.039088249206543, "learning_rate": 2e-05, "loss": 0.03372445, "step": 6255 }, { "epoch": 12.512, "grad_norm": 1.204850673675537, "learning_rate": 2e-05, "loss": 0.0392486, "step": 6256 }, { "epoch": 12.514, "grad_norm": 1.0852515697479248, "learning_rate": 2e-05, "loss": 0.03467633, "step": 6257 }, { "epoch": 12.516, "grad_norm": 1.4168981313705444, "learning_rate": 2e-05, "loss": 0.04812116, "step": 6258 }, { "epoch": 12.518, "grad_norm": 1.1843552589416504, "learning_rate": 2e-05, "loss": 0.04127664, "step": 6259 }, { "epoch": 12.52, "grad_norm": 1.080523133277893, "learning_rate": 2e-05, "loss": 0.05414173, "step": 6260 }, { "epoch": 12.522, "grad_norm": 1.1029906272888184, "learning_rate": 2e-05, "loss": 0.0343609, "step": 6261 }, { "epoch": 12.524000000000001, "grad_norm": 1.5605822801589966, "learning_rate": 2e-05, "loss": 0.06201877, "step": 6262 }, { "epoch": 12.526, "grad_norm": 1.9041469097137451, "learning_rate": 2e-05, "loss": 0.05325141, "step": 6263 }, { "epoch": 12.528, "grad_norm": 1.1127643585205078, "learning_rate": 2e-05, "loss": 0.04700782, "step": 6264 }, { "epoch": 12.53, "grad_norm": 1.771794080734253, "learning_rate": 2e-05, "loss": 0.04708548, "step": 6265 }, { "epoch": 12.532, "grad_norm": 0.8835294842720032, "learning_rate": 2e-05, "loss": 0.02617223, "step": 6266 }, { "epoch": 12.534, "grad_norm": 1.2144519090652466, "learning_rate": 2e-05, "loss": 0.03177464, "step": 6267 }, { "epoch": 12.536, "grad_norm": 1.269631266593933, "learning_rate": 2e-05, "loss": 0.0429852, "step": 6268 }, { "epoch": 12.538, "grad_norm": 0.9989429116249084, "learning_rate": 2e-05, "loss": 0.03079105, "step": 6269 }, { "epoch": 12.54, "grad_norm": 1.0922902822494507, "learning_rate": 2e-05, "loss": 0.04355695, "step": 6270 }, { "epoch": 12.542, "grad_norm": 1.4542877674102783, "learning_rate": 2e-05, "loss": 0.04602045, "step": 6271 }, { "epoch": 12.544, "grad_norm": 1.1666557788848877, "learning_rate": 2e-05, "loss": 0.0424659, "step": 6272 }, { "epoch": 12.546, "grad_norm": 1.8474477529525757, "learning_rate": 2e-05, "loss": 0.05145413, "step": 6273 }, { "epoch": 12.548, "grad_norm": 1.7277367115020752, "learning_rate": 2e-05, "loss": 0.04547224, "step": 6274 }, { "epoch": 12.55, "grad_norm": 1.2968204021453857, "learning_rate": 2e-05, "loss": 0.05744804, "step": 6275 }, { "epoch": 12.552, "grad_norm": 0.885041356086731, "learning_rate": 2e-05, "loss": 0.02881624, "step": 6276 }, { "epoch": 12.554, "grad_norm": 1.2140165567398071, "learning_rate": 2e-05, "loss": 0.04063617, "step": 6277 }, { "epoch": 12.556000000000001, "grad_norm": 0.9985512495040894, "learning_rate": 2e-05, "loss": 0.02475342, "step": 6278 }, { "epoch": 12.558, "grad_norm": 1.0989340543746948, "learning_rate": 2e-05, "loss": 0.03903017, "step": 6279 }, { "epoch": 12.56, "grad_norm": 1.421140432357788, "learning_rate": 2e-05, "loss": 0.04167198, "step": 6280 }, { "epoch": 12.562, "grad_norm": 1.3441224098205566, "learning_rate": 2e-05, "loss": 0.04768921, "step": 6281 }, { "epoch": 12.564, "grad_norm": 0.8638107180595398, "learning_rate": 2e-05, "loss": 0.02912693, "step": 6282 }, { "epoch": 12.566, "grad_norm": 0.9974273443222046, "learning_rate": 2e-05, "loss": 0.03495281, "step": 6283 }, { "epoch": 12.568, "grad_norm": 1.4846467971801758, "learning_rate": 2e-05, "loss": 0.0437046, "step": 6284 }, { "epoch": 12.57, "grad_norm": 1.0479719638824463, "learning_rate": 2e-05, "loss": 0.03646088, "step": 6285 }, { "epoch": 12.572, "grad_norm": 1.4242684841156006, "learning_rate": 2e-05, "loss": 0.02921792, "step": 6286 }, { "epoch": 12.574, "grad_norm": 1.4535014629364014, "learning_rate": 2e-05, "loss": 0.03363311, "step": 6287 }, { "epoch": 12.576, "grad_norm": 2.2380754947662354, "learning_rate": 2e-05, "loss": 0.05058814, "step": 6288 }, { "epoch": 12.578, "grad_norm": 1.024611473083496, "learning_rate": 2e-05, "loss": 0.03373952, "step": 6289 }, { "epoch": 12.58, "grad_norm": 1.348824143409729, "learning_rate": 2e-05, "loss": 0.04505939, "step": 6290 }, { "epoch": 12.582, "grad_norm": 1.7254105806350708, "learning_rate": 2e-05, "loss": 0.04170604, "step": 6291 }, { "epoch": 12.584, "grad_norm": 1.2654290199279785, "learning_rate": 2e-05, "loss": 0.04060872, "step": 6292 }, { "epoch": 12.586, "grad_norm": 4.495528221130371, "learning_rate": 2e-05, "loss": 0.03194971, "step": 6293 }, { "epoch": 12.588, "grad_norm": 2.2239458560943604, "learning_rate": 2e-05, "loss": 0.058491, "step": 6294 }, { "epoch": 12.59, "grad_norm": 1.0780088901519775, "learning_rate": 2e-05, "loss": 0.03230238, "step": 6295 }, { "epoch": 12.592, "grad_norm": 1.3520599603652954, "learning_rate": 2e-05, "loss": 0.03909261, "step": 6296 }, { "epoch": 12.594, "grad_norm": 2.423722267150879, "learning_rate": 2e-05, "loss": 0.05330641, "step": 6297 }, { "epoch": 12.596, "grad_norm": 1.1696189641952515, "learning_rate": 2e-05, "loss": 0.04031657, "step": 6298 }, { "epoch": 12.598, "grad_norm": 1.2343465089797974, "learning_rate": 2e-05, "loss": 0.04270149, "step": 6299 }, { "epoch": 12.6, "grad_norm": 1.2355128526687622, "learning_rate": 2e-05, "loss": 0.04290633, "step": 6300 }, { "epoch": 12.602, "grad_norm": 2.0673325061798096, "learning_rate": 2e-05, "loss": 0.05577706, "step": 6301 }, { "epoch": 12.604, "grad_norm": 2.5515084266662598, "learning_rate": 2e-05, "loss": 0.04718814, "step": 6302 }, { "epoch": 12.606, "grad_norm": 1.3687702417373657, "learning_rate": 2e-05, "loss": 0.05464392, "step": 6303 }, { "epoch": 12.608, "grad_norm": 3.0399668216705322, "learning_rate": 2e-05, "loss": 0.03925911, "step": 6304 }, { "epoch": 12.61, "grad_norm": 1.171708106994629, "learning_rate": 2e-05, "loss": 0.0236215, "step": 6305 }, { "epoch": 12.612, "grad_norm": 1.4000498056411743, "learning_rate": 2e-05, "loss": 0.03053783, "step": 6306 }, { "epoch": 12.614, "grad_norm": 1.3665634393692017, "learning_rate": 2e-05, "loss": 0.04709284, "step": 6307 }, { "epoch": 12.616, "grad_norm": 1.5376986265182495, "learning_rate": 2e-05, "loss": 0.06400272, "step": 6308 }, { "epoch": 12.618, "grad_norm": 1.2292733192443848, "learning_rate": 2e-05, "loss": 0.04021486, "step": 6309 }, { "epoch": 12.62, "grad_norm": 1.2984143495559692, "learning_rate": 2e-05, "loss": 0.03572147, "step": 6310 }, { "epoch": 12.622, "grad_norm": 0.9473585486412048, "learning_rate": 2e-05, "loss": 0.03935558, "step": 6311 }, { "epoch": 12.624, "grad_norm": 0.7904446125030518, "learning_rate": 2e-05, "loss": 0.02854537, "step": 6312 }, { "epoch": 12.626, "grad_norm": 1.692899227142334, "learning_rate": 2e-05, "loss": 0.0574809, "step": 6313 }, { "epoch": 12.628, "grad_norm": 1.418422818183899, "learning_rate": 2e-05, "loss": 0.04338343, "step": 6314 }, { "epoch": 12.63, "grad_norm": 1.0997378826141357, "learning_rate": 2e-05, "loss": 0.04099502, "step": 6315 }, { "epoch": 12.632, "grad_norm": 0.7807484269142151, "learning_rate": 2e-05, "loss": 0.02516371, "step": 6316 }, { "epoch": 12.634, "grad_norm": 1.1499603986740112, "learning_rate": 2e-05, "loss": 0.04060725, "step": 6317 }, { "epoch": 12.636, "grad_norm": 1.376180648803711, "learning_rate": 2e-05, "loss": 0.04795082, "step": 6318 }, { "epoch": 12.638, "grad_norm": 1.7178436517715454, "learning_rate": 2e-05, "loss": 0.03784566, "step": 6319 }, { "epoch": 12.64, "grad_norm": 1.7242496013641357, "learning_rate": 2e-05, "loss": 0.06087743, "step": 6320 }, { "epoch": 12.642, "grad_norm": 1.407828688621521, "learning_rate": 2e-05, "loss": 0.04791344, "step": 6321 }, { "epoch": 12.644, "grad_norm": 1.2469614744186401, "learning_rate": 2e-05, "loss": 0.03489613, "step": 6322 }, { "epoch": 12.646, "grad_norm": 1.1262562274932861, "learning_rate": 2e-05, "loss": 0.0405737, "step": 6323 }, { "epoch": 12.648, "grad_norm": 1.2003681659698486, "learning_rate": 2e-05, "loss": 0.03134914, "step": 6324 }, { "epoch": 12.65, "grad_norm": 1.561112403869629, "learning_rate": 2e-05, "loss": 0.04147166, "step": 6325 }, { "epoch": 12.652, "grad_norm": 1.5456750392913818, "learning_rate": 2e-05, "loss": 0.06560336, "step": 6326 }, { "epoch": 12.654, "grad_norm": 1.151427984237671, "learning_rate": 2e-05, "loss": 0.0402531, "step": 6327 }, { "epoch": 12.656, "grad_norm": 1.4893380403518677, "learning_rate": 2e-05, "loss": 0.03513143, "step": 6328 }, { "epoch": 12.658, "grad_norm": 0.9739792346954346, "learning_rate": 2e-05, "loss": 0.04505148, "step": 6329 }, { "epoch": 12.66, "grad_norm": 1.6729964017868042, "learning_rate": 2e-05, "loss": 0.0554752, "step": 6330 }, { "epoch": 12.662, "grad_norm": 1.477545976638794, "learning_rate": 2e-05, "loss": 0.04508356, "step": 6331 }, { "epoch": 12.664, "grad_norm": 1.1011073589324951, "learning_rate": 2e-05, "loss": 0.04050437, "step": 6332 }, { "epoch": 12.666, "grad_norm": 1.5996047258377075, "learning_rate": 2e-05, "loss": 0.03945097, "step": 6333 }, { "epoch": 12.668, "grad_norm": 1.9121135473251343, "learning_rate": 2e-05, "loss": 0.04508352, "step": 6334 }, { "epoch": 12.67, "grad_norm": 1.1469815969467163, "learning_rate": 2e-05, "loss": 0.03730458, "step": 6335 }, { "epoch": 12.672, "grad_norm": 1.6128054857254028, "learning_rate": 2e-05, "loss": 0.03322927, "step": 6336 }, { "epoch": 12.674, "grad_norm": 1.2360023260116577, "learning_rate": 2e-05, "loss": 0.03601769, "step": 6337 }, { "epoch": 12.676, "grad_norm": 0.9723455309867859, "learning_rate": 2e-05, "loss": 0.03090682, "step": 6338 }, { "epoch": 12.678, "grad_norm": 0.7664157748222351, "learning_rate": 2e-05, "loss": 0.02331576, "step": 6339 }, { "epoch": 12.68, "grad_norm": 1.8629016876220703, "learning_rate": 2e-05, "loss": 0.05990157, "step": 6340 }, { "epoch": 12.682, "grad_norm": 1.3592828512191772, "learning_rate": 2e-05, "loss": 0.04747428, "step": 6341 }, { "epoch": 12.684, "grad_norm": 1.6482810974121094, "learning_rate": 2e-05, "loss": 0.05335284, "step": 6342 }, { "epoch": 12.686, "grad_norm": 1.381803274154663, "learning_rate": 2e-05, "loss": 0.03803715, "step": 6343 }, { "epoch": 12.688, "grad_norm": 1.4017860889434814, "learning_rate": 2e-05, "loss": 0.0441341, "step": 6344 }, { "epoch": 12.69, "grad_norm": 1.701885461807251, "learning_rate": 2e-05, "loss": 0.0518548, "step": 6345 }, { "epoch": 12.692, "grad_norm": 1.1061818599700928, "learning_rate": 2e-05, "loss": 0.03018975, "step": 6346 }, { "epoch": 12.693999999999999, "grad_norm": 1.2730849981307983, "learning_rate": 2e-05, "loss": 0.04656669, "step": 6347 }, { "epoch": 12.696, "grad_norm": 1.0069987773895264, "learning_rate": 2e-05, "loss": 0.03312222, "step": 6348 }, { "epoch": 12.698, "grad_norm": 1.473136305809021, "learning_rate": 2e-05, "loss": 0.03876702, "step": 6349 }, { "epoch": 12.7, "grad_norm": 1.193970799446106, "learning_rate": 2e-05, "loss": 0.05121401, "step": 6350 }, { "epoch": 12.702, "grad_norm": 2.4699454307556152, "learning_rate": 2e-05, "loss": 0.0593311, "step": 6351 }, { "epoch": 12.704, "grad_norm": 1.2948992252349854, "learning_rate": 2e-05, "loss": 0.04756367, "step": 6352 }, { "epoch": 12.706, "grad_norm": 1.7724204063415527, "learning_rate": 2e-05, "loss": 0.06780231, "step": 6353 }, { "epoch": 12.708, "grad_norm": 0.9041151404380798, "learning_rate": 2e-05, "loss": 0.0361415, "step": 6354 }, { "epoch": 12.71, "grad_norm": 1.1595178842544556, "learning_rate": 2e-05, "loss": 0.0363571, "step": 6355 }, { "epoch": 12.712, "grad_norm": 1.8556021451950073, "learning_rate": 2e-05, "loss": 0.06934631, "step": 6356 }, { "epoch": 12.714, "grad_norm": 2.2679331302642822, "learning_rate": 2e-05, "loss": 0.05921429, "step": 6357 }, { "epoch": 12.716, "grad_norm": 1.1647306680679321, "learning_rate": 2e-05, "loss": 0.05089372, "step": 6358 }, { "epoch": 12.718, "grad_norm": 1.2592551708221436, "learning_rate": 2e-05, "loss": 0.04893951, "step": 6359 }, { "epoch": 12.72, "grad_norm": 1.7902069091796875, "learning_rate": 2e-05, "loss": 0.04070158, "step": 6360 }, { "epoch": 12.722, "grad_norm": 1.436287760734558, "learning_rate": 2e-05, "loss": 0.04522366, "step": 6361 }, { "epoch": 12.724, "grad_norm": 1.3190879821777344, "learning_rate": 2e-05, "loss": 0.04031306, "step": 6362 }, { "epoch": 12.725999999999999, "grad_norm": 1.2581310272216797, "learning_rate": 2e-05, "loss": 0.03700478, "step": 6363 }, { "epoch": 12.728, "grad_norm": 1.1962329149246216, "learning_rate": 2e-05, "loss": 0.03546837, "step": 6364 }, { "epoch": 12.73, "grad_norm": 1.0473268032073975, "learning_rate": 2e-05, "loss": 0.0403455, "step": 6365 }, { "epoch": 12.732, "grad_norm": 1.1484397649765015, "learning_rate": 2e-05, "loss": 0.03856234, "step": 6366 }, { "epoch": 12.734, "grad_norm": 1.0049259662628174, "learning_rate": 2e-05, "loss": 0.04047795, "step": 6367 }, { "epoch": 12.736, "grad_norm": 1.2135729789733887, "learning_rate": 2e-05, "loss": 0.04761136, "step": 6368 }, { "epoch": 12.738, "grad_norm": 2.209233522415161, "learning_rate": 2e-05, "loss": 0.06106929, "step": 6369 }, { "epoch": 12.74, "grad_norm": 1.1767494678497314, "learning_rate": 2e-05, "loss": 0.03807288, "step": 6370 }, { "epoch": 12.742, "grad_norm": 2.3366644382476807, "learning_rate": 2e-05, "loss": 0.06742451, "step": 6371 }, { "epoch": 12.744, "grad_norm": 1.7737507820129395, "learning_rate": 2e-05, "loss": 0.06365604, "step": 6372 }, { "epoch": 12.746, "grad_norm": 1.7560341358184814, "learning_rate": 2e-05, "loss": 0.05094313, "step": 6373 }, { "epoch": 12.748, "grad_norm": 4.217509746551514, "learning_rate": 2e-05, "loss": 0.04649055, "step": 6374 }, { "epoch": 12.75, "grad_norm": 1.5598090887069702, "learning_rate": 2e-05, "loss": 0.04742983, "step": 6375 }, { "epoch": 12.752, "grad_norm": 1.0324444770812988, "learning_rate": 2e-05, "loss": 0.03832695, "step": 6376 }, { "epoch": 12.754, "grad_norm": 1.04880690574646, "learning_rate": 2e-05, "loss": 0.03784114, "step": 6377 }, { "epoch": 12.756, "grad_norm": 1.4626555442810059, "learning_rate": 2e-05, "loss": 0.06083284, "step": 6378 }, { "epoch": 12.758, "grad_norm": 0.9879564642906189, "learning_rate": 2e-05, "loss": 0.03788735, "step": 6379 }, { "epoch": 12.76, "grad_norm": 1.1170194149017334, "learning_rate": 2e-05, "loss": 0.03437886, "step": 6380 }, { "epoch": 12.762, "grad_norm": 2.191253662109375, "learning_rate": 2e-05, "loss": 0.06371867, "step": 6381 }, { "epoch": 12.764, "grad_norm": 1.1988576650619507, "learning_rate": 2e-05, "loss": 0.05880814, "step": 6382 }, { "epoch": 12.766, "grad_norm": 1.0940005779266357, "learning_rate": 2e-05, "loss": 0.04405262, "step": 6383 }, { "epoch": 12.768, "grad_norm": 1.9173158407211304, "learning_rate": 2e-05, "loss": 0.05288801, "step": 6384 }, { "epoch": 12.77, "grad_norm": 1.1071702241897583, "learning_rate": 2e-05, "loss": 0.03890513, "step": 6385 }, { "epoch": 12.772, "grad_norm": 1.6578243970870972, "learning_rate": 2e-05, "loss": 0.04402731, "step": 6386 }, { "epoch": 12.774000000000001, "grad_norm": 1.157480001449585, "learning_rate": 2e-05, "loss": 0.04725039, "step": 6387 }, { "epoch": 12.776, "grad_norm": 1.150146722793579, "learning_rate": 2e-05, "loss": 0.03431505, "step": 6388 }, { "epoch": 12.778, "grad_norm": 1.1529570817947388, "learning_rate": 2e-05, "loss": 0.04831611, "step": 6389 }, { "epoch": 12.78, "grad_norm": 1.2388617992401123, "learning_rate": 2e-05, "loss": 0.03395271, "step": 6390 }, { "epoch": 12.782, "grad_norm": 1.1840648651123047, "learning_rate": 2e-05, "loss": 0.04003302, "step": 6391 }, { "epoch": 12.784, "grad_norm": 1.1195842027664185, "learning_rate": 2e-05, "loss": 0.03489996, "step": 6392 }, { "epoch": 12.786, "grad_norm": 1.217199444770813, "learning_rate": 2e-05, "loss": 0.03269583, "step": 6393 }, { "epoch": 12.788, "grad_norm": 1.1000312566757202, "learning_rate": 2e-05, "loss": 0.0385128, "step": 6394 }, { "epoch": 12.79, "grad_norm": 1.5512630939483643, "learning_rate": 2e-05, "loss": 0.05418114, "step": 6395 }, { "epoch": 12.792, "grad_norm": 1.274643898010254, "learning_rate": 2e-05, "loss": 0.0409357, "step": 6396 }, { "epoch": 12.794, "grad_norm": 0.9262605309486389, "learning_rate": 2e-05, "loss": 0.03208461, "step": 6397 }, { "epoch": 12.796, "grad_norm": 1.3708499670028687, "learning_rate": 2e-05, "loss": 0.04414977, "step": 6398 }, { "epoch": 12.798, "grad_norm": 1.6720889806747437, "learning_rate": 2e-05, "loss": 0.05504428, "step": 6399 }, { "epoch": 12.8, "grad_norm": 1.1998666524887085, "learning_rate": 2e-05, "loss": 0.03634812, "step": 6400 }, { "epoch": 12.802, "grad_norm": 1.2738087177276611, "learning_rate": 2e-05, "loss": 0.0290394, "step": 6401 }, { "epoch": 12.804, "grad_norm": 2.3144185543060303, "learning_rate": 2e-05, "loss": 0.03959765, "step": 6402 }, { "epoch": 12.806000000000001, "grad_norm": 2.0867855548858643, "learning_rate": 2e-05, "loss": 0.04219016, "step": 6403 }, { "epoch": 12.808, "grad_norm": 1.0705214738845825, "learning_rate": 2e-05, "loss": 0.03663403, "step": 6404 }, { "epoch": 12.81, "grad_norm": 1.6160434484481812, "learning_rate": 2e-05, "loss": 0.04320464, "step": 6405 }, { "epoch": 12.812, "grad_norm": 1.8483175039291382, "learning_rate": 2e-05, "loss": 0.05441513, "step": 6406 }, { "epoch": 12.814, "grad_norm": 6.569179534912109, "learning_rate": 2e-05, "loss": 0.06092038, "step": 6407 }, { "epoch": 12.816, "grad_norm": 1.2271147966384888, "learning_rate": 2e-05, "loss": 0.04394755, "step": 6408 }, { "epoch": 12.818, "grad_norm": 1.1272095441818237, "learning_rate": 2e-05, "loss": 0.03704889, "step": 6409 }, { "epoch": 12.82, "grad_norm": 2.213526964187622, "learning_rate": 2e-05, "loss": 0.05000011, "step": 6410 }, { "epoch": 12.822, "grad_norm": 1.1357035636901855, "learning_rate": 2e-05, "loss": 0.04634327, "step": 6411 }, { "epoch": 12.824, "grad_norm": 1.8390262126922607, "learning_rate": 2e-05, "loss": 0.03590558, "step": 6412 }, { "epoch": 12.826, "grad_norm": 1.436282753944397, "learning_rate": 2e-05, "loss": 0.0410841, "step": 6413 }, { "epoch": 12.828, "grad_norm": 1.0409058332443237, "learning_rate": 2e-05, "loss": 0.03310699, "step": 6414 }, { "epoch": 12.83, "grad_norm": 1.3270622491836548, "learning_rate": 2e-05, "loss": 0.04053577, "step": 6415 }, { "epoch": 12.832, "grad_norm": 1.3046685457229614, "learning_rate": 2e-05, "loss": 0.06096248, "step": 6416 }, { "epoch": 12.834, "grad_norm": 3.5594890117645264, "learning_rate": 2e-05, "loss": 0.03877787, "step": 6417 }, { "epoch": 12.836, "grad_norm": 1.1558665037155151, "learning_rate": 2e-05, "loss": 0.03710722, "step": 6418 }, { "epoch": 12.838, "grad_norm": 1.673643708229065, "learning_rate": 2e-05, "loss": 0.04900486, "step": 6419 }, { "epoch": 12.84, "grad_norm": 1.143010139465332, "learning_rate": 2e-05, "loss": 0.04076853, "step": 6420 }, { "epoch": 12.842, "grad_norm": 1.5248163938522339, "learning_rate": 2e-05, "loss": 0.04699665, "step": 6421 }, { "epoch": 12.844, "grad_norm": 1.5471190214157104, "learning_rate": 2e-05, "loss": 0.03741369, "step": 6422 }, { "epoch": 12.846, "grad_norm": 1.3504881858825684, "learning_rate": 2e-05, "loss": 0.03360882, "step": 6423 }, { "epoch": 12.848, "grad_norm": 1.3424625396728516, "learning_rate": 2e-05, "loss": 0.04257516, "step": 6424 }, { "epoch": 12.85, "grad_norm": 1.788443684577942, "learning_rate": 2e-05, "loss": 0.03707795, "step": 6425 }, { "epoch": 12.852, "grad_norm": 1.6131446361541748, "learning_rate": 2e-05, "loss": 0.04443607, "step": 6426 }, { "epoch": 12.854, "grad_norm": 1.4956599473953247, "learning_rate": 2e-05, "loss": 0.05423944, "step": 6427 }, { "epoch": 12.856, "grad_norm": 1.0362269878387451, "learning_rate": 2e-05, "loss": 0.04193046, "step": 6428 }, { "epoch": 12.858, "grad_norm": 1.2342666387557983, "learning_rate": 2e-05, "loss": 0.05017107, "step": 6429 }, { "epoch": 12.86, "grad_norm": 0.8604697585105896, "learning_rate": 2e-05, "loss": 0.02440374, "step": 6430 }, { "epoch": 12.862, "grad_norm": 0.8684183955192566, "learning_rate": 2e-05, "loss": 0.03188442, "step": 6431 }, { "epoch": 12.864, "grad_norm": 1.0182112455368042, "learning_rate": 2e-05, "loss": 0.03407199, "step": 6432 }, { "epoch": 12.866, "grad_norm": 1.6675790548324585, "learning_rate": 2e-05, "loss": 0.04539261, "step": 6433 }, { "epoch": 12.868, "grad_norm": 1.18894362449646, "learning_rate": 2e-05, "loss": 0.04414905, "step": 6434 }, { "epoch": 12.87, "grad_norm": 2.7961933612823486, "learning_rate": 2e-05, "loss": 0.03586389, "step": 6435 }, { "epoch": 12.872, "grad_norm": 4.284701347351074, "learning_rate": 2e-05, "loss": 0.04585398, "step": 6436 }, { "epoch": 12.874, "grad_norm": 3.2627532482147217, "learning_rate": 2e-05, "loss": 0.04893541, "step": 6437 }, { "epoch": 12.876, "grad_norm": 1.0978165864944458, "learning_rate": 2e-05, "loss": 0.04391659, "step": 6438 }, { "epoch": 12.878, "grad_norm": 7.8645734786987305, "learning_rate": 2e-05, "loss": 0.04529089, "step": 6439 }, { "epoch": 12.88, "grad_norm": 1.2035636901855469, "learning_rate": 2e-05, "loss": 0.04635575, "step": 6440 }, { "epoch": 12.882, "grad_norm": 2.5439112186431885, "learning_rate": 2e-05, "loss": 0.0611103, "step": 6441 }, { "epoch": 12.884, "grad_norm": 1.5645581483840942, "learning_rate": 2e-05, "loss": 0.03364026, "step": 6442 }, { "epoch": 12.886, "grad_norm": 1.481075644493103, "learning_rate": 2e-05, "loss": 0.06775667, "step": 6443 }, { "epoch": 12.888, "grad_norm": 2.809765100479126, "learning_rate": 2e-05, "loss": 0.04417407, "step": 6444 }, { "epoch": 12.89, "grad_norm": 1.4419442415237427, "learning_rate": 2e-05, "loss": 0.06172138, "step": 6445 }, { "epoch": 12.892, "grad_norm": 1.2046618461608887, "learning_rate": 2e-05, "loss": 0.04705287, "step": 6446 }, { "epoch": 12.894, "grad_norm": 2.1636664867401123, "learning_rate": 2e-05, "loss": 0.04848149, "step": 6447 }, { "epoch": 12.896, "grad_norm": 1.5486644506454468, "learning_rate": 2e-05, "loss": 0.03825862, "step": 6448 }, { "epoch": 12.898, "grad_norm": 1.7767754793167114, "learning_rate": 2e-05, "loss": 0.03896247, "step": 6449 }, { "epoch": 12.9, "grad_norm": 3.553741693496704, "learning_rate": 2e-05, "loss": 0.03893029, "step": 6450 }, { "epoch": 12.902, "grad_norm": 1.3980071544647217, "learning_rate": 2e-05, "loss": 0.04370964, "step": 6451 }, { "epoch": 12.904, "grad_norm": 1.3102903366088867, "learning_rate": 2e-05, "loss": 0.03697208, "step": 6452 }, { "epoch": 12.906, "grad_norm": 1.3294453620910645, "learning_rate": 2e-05, "loss": 0.04935815, "step": 6453 }, { "epoch": 12.908, "grad_norm": 1.0956804752349854, "learning_rate": 2e-05, "loss": 0.02627955, "step": 6454 }, { "epoch": 12.91, "grad_norm": 1.353235125541687, "learning_rate": 2e-05, "loss": 0.04556011, "step": 6455 }, { "epoch": 12.912, "grad_norm": 1.159041166305542, "learning_rate": 2e-05, "loss": 0.03820071, "step": 6456 }, { "epoch": 12.914, "grad_norm": 0.9859350919723511, "learning_rate": 2e-05, "loss": 0.03681324, "step": 6457 }, { "epoch": 12.916, "grad_norm": 3.7848832607269287, "learning_rate": 2e-05, "loss": 0.03109528, "step": 6458 }, { "epoch": 12.918, "grad_norm": 1.6311275959014893, "learning_rate": 2e-05, "loss": 0.05512562, "step": 6459 }, { "epoch": 12.92, "grad_norm": 1.0418145656585693, "learning_rate": 2e-05, "loss": 0.03659768, "step": 6460 }, { "epoch": 12.922, "grad_norm": 1.9835551977157593, "learning_rate": 2e-05, "loss": 0.03820136, "step": 6461 }, { "epoch": 12.924, "grad_norm": 1.359780192375183, "learning_rate": 2e-05, "loss": 0.04610706, "step": 6462 }, { "epoch": 12.926, "grad_norm": 1.8841519355773926, "learning_rate": 2e-05, "loss": 0.04814822, "step": 6463 }, { "epoch": 12.928, "grad_norm": 1.223069429397583, "learning_rate": 2e-05, "loss": 0.03895852, "step": 6464 }, { "epoch": 12.93, "grad_norm": 1.2000818252563477, "learning_rate": 2e-05, "loss": 0.04265573, "step": 6465 }, { "epoch": 12.932, "grad_norm": 1.2161264419555664, "learning_rate": 2e-05, "loss": 0.03890467, "step": 6466 }, { "epoch": 12.934, "grad_norm": 1.1205466985702515, "learning_rate": 2e-05, "loss": 0.04377972, "step": 6467 }, { "epoch": 12.936, "grad_norm": 1.3872324228286743, "learning_rate": 2e-05, "loss": 0.05282361, "step": 6468 }, { "epoch": 12.938, "grad_norm": 0.9276785254478455, "learning_rate": 2e-05, "loss": 0.02774069, "step": 6469 }, { "epoch": 12.94, "grad_norm": 1.2955553531646729, "learning_rate": 2e-05, "loss": 0.04931837, "step": 6470 }, { "epoch": 12.942, "grad_norm": 1.0661050081253052, "learning_rate": 2e-05, "loss": 0.04816168, "step": 6471 }, { "epoch": 12.943999999999999, "grad_norm": 1.419911503791809, "learning_rate": 2e-05, "loss": 0.05201279, "step": 6472 }, { "epoch": 12.946, "grad_norm": 1.8624131679534912, "learning_rate": 2e-05, "loss": 0.05197743, "step": 6473 }, { "epoch": 12.948, "grad_norm": 1.0450013875961304, "learning_rate": 2e-05, "loss": 0.02894584, "step": 6474 }, { "epoch": 12.95, "grad_norm": 1.498249888420105, "learning_rate": 2e-05, "loss": 0.04716285, "step": 6475 }, { "epoch": 12.952, "grad_norm": 1.7548869848251343, "learning_rate": 2e-05, "loss": 0.04840318, "step": 6476 }, { "epoch": 12.954, "grad_norm": 1.5746945142745972, "learning_rate": 2e-05, "loss": 0.0485624, "step": 6477 }, { "epoch": 12.956, "grad_norm": 1.9599506855010986, "learning_rate": 2e-05, "loss": 0.05219299, "step": 6478 }, { "epoch": 12.958, "grad_norm": 1.5262001752853394, "learning_rate": 2e-05, "loss": 0.0321418, "step": 6479 }, { "epoch": 12.96, "grad_norm": 1.4649481773376465, "learning_rate": 2e-05, "loss": 0.05471179, "step": 6480 }, { "epoch": 12.962, "grad_norm": 1.1299291849136353, "learning_rate": 2e-05, "loss": 0.04470336, "step": 6481 }, { "epoch": 12.964, "grad_norm": 1.9703450202941895, "learning_rate": 2e-05, "loss": 0.05945123, "step": 6482 }, { "epoch": 12.966, "grad_norm": 1.3353742361068726, "learning_rate": 2e-05, "loss": 0.03942459, "step": 6483 }, { "epoch": 12.968, "grad_norm": 1.1149654388427734, "learning_rate": 2e-05, "loss": 0.04058683, "step": 6484 }, { "epoch": 12.97, "grad_norm": 1.6536850929260254, "learning_rate": 2e-05, "loss": 0.04904682, "step": 6485 }, { "epoch": 12.972, "grad_norm": 1.9809800386428833, "learning_rate": 2e-05, "loss": 0.04088744, "step": 6486 }, { "epoch": 12.974, "grad_norm": 1.0145504474639893, "learning_rate": 2e-05, "loss": 0.03976154, "step": 6487 }, { "epoch": 12.975999999999999, "grad_norm": 0.9738277196884155, "learning_rate": 2e-05, "loss": 0.03777561, "step": 6488 }, { "epoch": 12.978, "grad_norm": 1.1342767477035522, "learning_rate": 2e-05, "loss": 0.04310197, "step": 6489 }, { "epoch": 12.98, "grad_norm": 1.8001629114151, "learning_rate": 2e-05, "loss": 0.06380954, "step": 6490 }, { "epoch": 12.982, "grad_norm": 1.2117040157318115, "learning_rate": 2e-05, "loss": 0.03770922, "step": 6491 }, { "epoch": 12.984, "grad_norm": 1.0680229663848877, "learning_rate": 2e-05, "loss": 0.03220348, "step": 6492 }, { "epoch": 12.986, "grad_norm": 1.0495458841323853, "learning_rate": 2e-05, "loss": 0.04047805, "step": 6493 }, { "epoch": 12.988, "grad_norm": 1.6925138235092163, "learning_rate": 2e-05, "loss": 0.03979167, "step": 6494 }, { "epoch": 12.99, "grad_norm": 6.468026161193848, "learning_rate": 2e-05, "loss": 0.04304725, "step": 6495 }, { "epoch": 12.992, "grad_norm": 1.2650574445724487, "learning_rate": 2e-05, "loss": 0.0526716, "step": 6496 }, { "epoch": 12.994, "grad_norm": 1.5298429727554321, "learning_rate": 2e-05, "loss": 0.04853235, "step": 6497 }, { "epoch": 12.996, "grad_norm": 1.51418936252594, "learning_rate": 2e-05, "loss": 0.05758035, "step": 6498 }, { "epoch": 12.998, "grad_norm": 1.0514975786209106, "learning_rate": 2e-05, "loss": 0.03851912, "step": 6499 }, { "epoch": 13.0, "grad_norm": 0.9639672040939331, "learning_rate": 2e-05, "loss": 0.03053028, "step": 6500 }, { "epoch": 13.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9680638722554891, "Equal_1": 0.994, "Equal_2": 0.9640718562874252, "Equal_3": 0.8582834331337326, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.954, "Perpendicular_1": 0.996, "Perpendicular_2": 0.97, "Perpendicular_3": 0.7334669338677354, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.992, "PointLiesOnCircle_3": 0.9887999999999999, "PointLiesOnLine_1": 0.9879759519038076, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9640718562874252 }, "eval_runtime": 226.4718, "eval_samples_per_second": 46.363, "eval_steps_per_second": 0.927, "step": 6500 }, { "epoch": 13.002, "grad_norm": 0.7828242778778076, "learning_rate": 2e-05, "loss": 0.03173206, "step": 6501 }, { "epoch": 13.004, "grad_norm": 1.782631278038025, "learning_rate": 2e-05, "loss": 0.04320549, "step": 6502 }, { "epoch": 13.006, "grad_norm": 1.8873721361160278, "learning_rate": 2e-05, "loss": 0.0523093, "step": 6503 }, { "epoch": 13.008, "grad_norm": 1.5443907976150513, "learning_rate": 2e-05, "loss": 0.0378192, "step": 6504 }, { "epoch": 13.01, "grad_norm": 0.9920076131820679, "learning_rate": 2e-05, "loss": 0.03662051, "step": 6505 }, { "epoch": 13.012, "grad_norm": 1.197677493095398, "learning_rate": 2e-05, "loss": 0.05089819, "step": 6506 }, { "epoch": 13.014, "grad_norm": 1.093849539756775, "learning_rate": 2e-05, "loss": 0.02536143, "step": 6507 }, { "epoch": 13.016, "grad_norm": 1.689639687538147, "learning_rate": 2e-05, "loss": 0.03575768, "step": 6508 }, { "epoch": 13.018, "grad_norm": 1.2394994497299194, "learning_rate": 2e-05, "loss": 0.03672951, "step": 6509 }, { "epoch": 13.02, "grad_norm": 1.2133610248565674, "learning_rate": 2e-05, "loss": 0.04451734, "step": 6510 }, { "epoch": 13.022, "grad_norm": 1.1811102628707886, "learning_rate": 2e-05, "loss": 0.036816, "step": 6511 }, { "epoch": 13.024, "grad_norm": 0.8901414275169373, "learning_rate": 2e-05, "loss": 0.03340251, "step": 6512 }, { "epoch": 13.026, "grad_norm": 1.354149341583252, "learning_rate": 2e-05, "loss": 0.04412079, "step": 6513 }, { "epoch": 13.028, "grad_norm": 1.2340337038040161, "learning_rate": 2e-05, "loss": 0.0358588, "step": 6514 }, { "epoch": 13.03, "grad_norm": 1.311306357383728, "learning_rate": 2e-05, "loss": 0.04572522, "step": 6515 }, { "epoch": 13.032, "grad_norm": 1.6359919309616089, "learning_rate": 2e-05, "loss": 0.04489954, "step": 6516 }, { "epoch": 13.034, "grad_norm": 1.2337026596069336, "learning_rate": 2e-05, "loss": 0.03803317, "step": 6517 }, { "epoch": 13.036, "grad_norm": 1.9316576719284058, "learning_rate": 2e-05, "loss": 0.05387948, "step": 6518 }, { "epoch": 13.038, "grad_norm": 1.3743391036987305, "learning_rate": 2e-05, "loss": 0.03881828, "step": 6519 }, { "epoch": 13.04, "grad_norm": 1.4887734651565552, "learning_rate": 2e-05, "loss": 0.05761924, "step": 6520 }, { "epoch": 13.042, "grad_norm": 1.091062068939209, "learning_rate": 2e-05, "loss": 0.0361223, "step": 6521 }, { "epoch": 13.044, "grad_norm": 1.6364877223968506, "learning_rate": 2e-05, "loss": 0.05511393, "step": 6522 }, { "epoch": 13.046, "grad_norm": 1.5620850324630737, "learning_rate": 2e-05, "loss": 0.03663145, "step": 6523 }, { "epoch": 13.048, "grad_norm": 1.0851964950561523, "learning_rate": 2e-05, "loss": 0.03636155, "step": 6524 }, { "epoch": 13.05, "grad_norm": 3.1290059089660645, "learning_rate": 2e-05, "loss": 0.05202487, "step": 6525 }, { "epoch": 13.052, "grad_norm": 2.183480739593506, "learning_rate": 2e-05, "loss": 0.03541419, "step": 6526 }, { "epoch": 13.054, "grad_norm": 1.3249014616012573, "learning_rate": 2e-05, "loss": 0.03748333, "step": 6527 }, { "epoch": 13.056, "grad_norm": 1.0118151903152466, "learning_rate": 2e-05, "loss": 0.03109203, "step": 6528 }, { "epoch": 13.058, "grad_norm": 3.3238203525543213, "learning_rate": 2e-05, "loss": 0.04952617, "step": 6529 }, { "epoch": 13.06, "grad_norm": 0.7821671366691589, "learning_rate": 2e-05, "loss": 0.02596595, "step": 6530 }, { "epoch": 13.062, "grad_norm": 0.9515002965927124, "learning_rate": 2e-05, "loss": 0.03527415, "step": 6531 }, { "epoch": 13.064, "grad_norm": 1.0977836847305298, "learning_rate": 2e-05, "loss": 0.03957658, "step": 6532 }, { "epoch": 13.066, "grad_norm": 1.0060389041900635, "learning_rate": 2e-05, "loss": 0.04286555, "step": 6533 }, { "epoch": 13.068, "grad_norm": 1.1531269550323486, "learning_rate": 2e-05, "loss": 0.04112451, "step": 6534 }, { "epoch": 13.07, "grad_norm": 1.6965833902359009, "learning_rate": 2e-05, "loss": 0.05074886, "step": 6535 }, { "epoch": 13.072, "grad_norm": 1.488512396812439, "learning_rate": 2e-05, "loss": 0.03712457, "step": 6536 }, { "epoch": 13.074, "grad_norm": 1.2076475620269775, "learning_rate": 2e-05, "loss": 0.04220496, "step": 6537 }, { "epoch": 13.076, "grad_norm": 1.1648658514022827, "learning_rate": 2e-05, "loss": 0.04553771, "step": 6538 }, { "epoch": 13.078, "grad_norm": 3.989266872406006, "learning_rate": 2e-05, "loss": 0.04580517, "step": 6539 }, { "epoch": 13.08, "grad_norm": 1.6307330131530762, "learning_rate": 2e-05, "loss": 0.04242995, "step": 6540 }, { "epoch": 13.082, "grad_norm": 1.6501014232635498, "learning_rate": 2e-05, "loss": 0.0507371, "step": 6541 }, { "epoch": 13.084, "grad_norm": 1.2813491821289062, "learning_rate": 2e-05, "loss": 0.05639303, "step": 6542 }, { "epoch": 13.086, "grad_norm": 1.0876442193984985, "learning_rate": 2e-05, "loss": 0.03926321, "step": 6543 }, { "epoch": 13.088, "grad_norm": 0.8566210269927979, "learning_rate": 2e-05, "loss": 0.02937926, "step": 6544 }, { "epoch": 13.09, "grad_norm": 2.0628321170806885, "learning_rate": 2e-05, "loss": 0.05154063, "step": 6545 }, { "epoch": 13.092, "grad_norm": 2.1200478076934814, "learning_rate": 2e-05, "loss": 0.0620321, "step": 6546 }, { "epoch": 13.094, "grad_norm": 1.0273569822311401, "learning_rate": 2e-05, "loss": 0.03169393, "step": 6547 }, { "epoch": 13.096, "grad_norm": 1.6486334800720215, "learning_rate": 2e-05, "loss": 0.06193471, "step": 6548 }, { "epoch": 13.098, "grad_norm": 1.2995295524597168, "learning_rate": 2e-05, "loss": 0.03834078, "step": 6549 }, { "epoch": 13.1, "grad_norm": 1.4458826780319214, "learning_rate": 2e-05, "loss": 0.04348934, "step": 6550 }, { "epoch": 13.102, "grad_norm": 0.9468706250190735, "learning_rate": 2e-05, "loss": 0.0265991, "step": 6551 }, { "epoch": 13.104, "grad_norm": 1.5052721500396729, "learning_rate": 2e-05, "loss": 0.03971453, "step": 6552 }, { "epoch": 13.106, "grad_norm": 1.0897274017333984, "learning_rate": 2e-05, "loss": 0.04572339, "step": 6553 }, { "epoch": 13.108, "grad_norm": 2.31817889213562, "learning_rate": 2e-05, "loss": 0.04776686, "step": 6554 }, { "epoch": 13.11, "grad_norm": 1.23155677318573, "learning_rate": 2e-05, "loss": 0.04778636, "step": 6555 }, { "epoch": 13.112, "grad_norm": 1.2805047035217285, "learning_rate": 2e-05, "loss": 0.0421508, "step": 6556 }, { "epoch": 13.114, "grad_norm": 1.1453936100006104, "learning_rate": 2e-05, "loss": 0.03191746, "step": 6557 }, { "epoch": 13.116, "grad_norm": 1.4269286394119263, "learning_rate": 2e-05, "loss": 0.06121207, "step": 6558 }, { "epoch": 13.118, "grad_norm": 0.9146667718887329, "learning_rate": 2e-05, "loss": 0.03032596, "step": 6559 }, { "epoch": 13.12, "grad_norm": 1.1641069650650024, "learning_rate": 2e-05, "loss": 0.03436857, "step": 6560 }, { "epoch": 13.122, "grad_norm": 1.1072880029678345, "learning_rate": 2e-05, "loss": 0.0352905, "step": 6561 }, { "epoch": 13.124, "grad_norm": 1.4498995542526245, "learning_rate": 2e-05, "loss": 0.04916134, "step": 6562 }, { "epoch": 13.126, "grad_norm": 1.1582210063934326, "learning_rate": 2e-05, "loss": 0.04251442, "step": 6563 }, { "epoch": 13.128, "grad_norm": 1.4876548051834106, "learning_rate": 2e-05, "loss": 0.04602171, "step": 6564 }, { "epoch": 13.13, "grad_norm": 1.677201509475708, "learning_rate": 2e-05, "loss": 0.04439024, "step": 6565 }, { "epoch": 13.132, "grad_norm": 1.3733197450637817, "learning_rate": 2e-05, "loss": 0.04142016, "step": 6566 }, { "epoch": 13.134, "grad_norm": 1.2022961378097534, "learning_rate": 2e-05, "loss": 0.05051107, "step": 6567 }, { "epoch": 13.136, "grad_norm": 1.0408233404159546, "learning_rate": 2e-05, "loss": 0.03698466, "step": 6568 }, { "epoch": 13.138, "grad_norm": 1.3255647420883179, "learning_rate": 2e-05, "loss": 0.05154828, "step": 6569 }, { "epoch": 13.14, "grad_norm": 1.9496055841445923, "learning_rate": 2e-05, "loss": 0.04161049, "step": 6570 }, { "epoch": 13.142, "grad_norm": 1.45006263256073, "learning_rate": 2e-05, "loss": 0.04057205, "step": 6571 }, { "epoch": 13.144, "grad_norm": 1.8636252880096436, "learning_rate": 2e-05, "loss": 0.06695044, "step": 6572 }, { "epoch": 13.146, "grad_norm": 1.3660047054290771, "learning_rate": 2e-05, "loss": 0.05198672, "step": 6573 }, { "epoch": 13.148, "grad_norm": 1.6898857355117798, "learning_rate": 2e-05, "loss": 0.04781071, "step": 6574 }, { "epoch": 13.15, "grad_norm": 5.843007564544678, "learning_rate": 2e-05, "loss": 0.05130579, "step": 6575 }, { "epoch": 13.152, "grad_norm": 1.3876694440841675, "learning_rate": 2e-05, "loss": 0.05688769, "step": 6576 }, { "epoch": 13.154, "grad_norm": 0.967074990272522, "learning_rate": 2e-05, "loss": 0.0433304, "step": 6577 }, { "epoch": 13.156, "grad_norm": 1.0467573404312134, "learning_rate": 2e-05, "loss": 0.03011406, "step": 6578 }, { "epoch": 13.158, "grad_norm": 1.736803412437439, "learning_rate": 2e-05, "loss": 0.0421577, "step": 6579 }, { "epoch": 13.16, "grad_norm": 1.5483254194259644, "learning_rate": 2e-05, "loss": 0.04508199, "step": 6580 }, { "epoch": 13.162, "grad_norm": 1.1511164903640747, "learning_rate": 2e-05, "loss": 0.03072161, "step": 6581 }, { "epoch": 13.164, "grad_norm": 2.89504075050354, "learning_rate": 2e-05, "loss": 0.05156472, "step": 6582 }, { "epoch": 13.166, "grad_norm": 1.201817512512207, "learning_rate": 2e-05, "loss": 0.03565674, "step": 6583 }, { "epoch": 13.168, "grad_norm": 2.410062074661255, "learning_rate": 2e-05, "loss": 0.04463431, "step": 6584 }, { "epoch": 13.17, "grad_norm": 0.9088672995567322, "learning_rate": 2e-05, "loss": 0.02330456, "step": 6585 }, { "epoch": 13.172, "grad_norm": 1.1246742010116577, "learning_rate": 2e-05, "loss": 0.0419739, "step": 6586 }, { "epoch": 13.174, "grad_norm": 1.754203200340271, "learning_rate": 2e-05, "loss": 0.04619648, "step": 6587 }, { "epoch": 13.176, "grad_norm": 2.315643072128296, "learning_rate": 2e-05, "loss": 0.06521422, "step": 6588 }, { "epoch": 13.178, "grad_norm": 1.0164684057235718, "learning_rate": 2e-05, "loss": 0.04499497, "step": 6589 }, { "epoch": 13.18, "grad_norm": 1.6308876276016235, "learning_rate": 2e-05, "loss": 0.05851418, "step": 6590 }, { "epoch": 13.182, "grad_norm": 1.4268776178359985, "learning_rate": 2e-05, "loss": 0.05136928, "step": 6591 }, { "epoch": 13.184, "grad_norm": 2.1274125576019287, "learning_rate": 2e-05, "loss": 0.05368359, "step": 6592 }, { "epoch": 13.186, "grad_norm": 2.1470067501068115, "learning_rate": 2e-05, "loss": 0.04764347, "step": 6593 }, { "epoch": 13.188, "grad_norm": 1.082139253616333, "learning_rate": 2e-05, "loss": 0.04102166, "step": 6594 }, { "epoch": 13.19, "grad_norm": 1.3327008485794067, "learning_rate": 2e-05, "loss": 0.06069902, "step": 6595 }, { "epoch": 13.192, "grad_norm": 1.2000174522399902, "learning_rate": 2e-05, "loss": 0.04001678, "step": 6596 }, { "epoch": 13.194, "grad_norm": 1.2445776462554932, "learning_rate": 2e-05, "loss": 0.05458623, "step": 6597 }, { "epoch": 13.196, "grad_norm": 0.7143605351448059, "learning_rate": 2e-05, "loss": 0.02305287, "step": 6598 }, { "epoch": 13.198, "grad_norm": 1.5529810190200806, "learning_rate": 2e-05, "loss": 0.0460292, "step": 6599 }, { "epoch": 13.2, "grad_norm": 1.0145153999328613, "learning_rate": 2e-05, "loss": 0.03968326, "step": 6600 }, { "epoch": 13.202, "grad_norm": 2.562831401824951, "learning_rate": 2e-05, "loss": 0.0514598, "step": 6601 }, { "epoch": 13.204, "grad_norm": 2.5142977237701416, "learning_rate": 2e-05, "loss": 0.04803064, "step": 6602 }, { "epoch": 13.206, "grad_norm": 0.8493307828903198, "learning_rate": 2e-05, "loss": 0.03162545, "step": 6603 }, { "epoch": 13.208, "grad_norm": 1.5501047372817993, "learning_rate": 2e-05, "loss": 0.0464934, "step": 6604 }, { "epoch": 13.21, "grad_norm": 1.5637036561965942, "learning_rate": 2e-05, "loss": 0.05569514, "step": 6605 }, { "epoch": 13.212, "grad_norm": 1.4015189409255981, "learning_rate": 2e-05, "loss": 0.05374562, "step": 6606 }, { "epoch": 13.214, "grad_norm": 1.3560268878936768, "learning_rate": 2e-05, "loss": 0.04332697, "step": 6607 }, { "epoch": 13.216, "grad_norm": 1.829144835472107, "learning_rate": 2e-05, "loss": 0.06463364, "step": 6608 }, { "epoch": 13.218, "grad_norm": 1.1475543975830078, "learning_rate": 2e-05, "loss": 0.04143241, "step": 6609 }, { "epoch": 13.22, "grad_norm": 1.0967352390289307, "learning_rate": 2e-05, "loss": 0.04054123, "step": 6610 }, { "epoch": 13.222, "grad_norm": 1.3601809740066528, "learning_rate": 2e-05, "loss": 0.03934775, "step": 6611 }, { "epoch": 13.224, "grad_norm": 0.9302353858947754, "learning_rate": 2e-05, "loss": 0.03270544, "step": 6612 }, { "epoch": 13.226, "grad_norm": 1.7621548175811768, "learning_rate": 2e-05, "loss": 0.05618281, "step": 6613 }, { "epoch": 13.228, "grad_norm": 1.440403938293457, "learning_rate": 2e-05, "loss": 0.04851909, "step": 6614 }, { "epoch": 13.23, "grad_norm": 0.8671087622642517, "learning_rate": 2e-05, "loss": 0.03538548, "step": 6615 }, { "epoch": 13.232, "grad_norm": 1.3257696628570557, "learning_rate": 2e-05, "loss": 0.03618744, "step": 6616 }, { "epoch": 13.234, "grad_norm": 1.5346698760986328, "learning_rate": 2e-05, "loss": 0.05036522, "step": 6617 }, { "epoch": 13.236, "grad_norm": 1.5200269222259521, "learning_rate": 2e-05, "loss": 0.05201121, "step": 6618 }, { "epoch": 13.238, "grad_norm": 1.8437978029251099, "learning_rate": 2e-05, "loss": 0.0491823, "step": 6619 }, { "epoch": 13.24, "grad_norm": 1.3438249826431274, "learning_rate": 2e-05, "loss": 0.04863397, "step": 6620 }, { "epoch": 13.242, "grad_norm": 1.2552480697631836, "learning_rate": 2e-05, "loss": 0.0371317, "step": 6621 }, { "epoch": 13.244, "grad_norm": 1.0937210321426392, "learning_rate": 2e-05, "loss": 0.03394495, "step": 6622 }, { "epoch": 13.246, "grad_norm": 2.265315532684326, "learning_rate": 2e-05, "loss": 0.04309444, "step": 6623 }, { "epoch": 13.248, "grad_norm": 1.5332144498825073, "learning_rate": 2e-05, "loss": 0.04996818, "step": 6624 }, { "epoch": 13.25, "grad_norm": 1.181949257850647, "learning_rate": 2e-05, "loss": 0.03416567, "step": 6625 }, { "epoch": 13.252, "grad_norm": 0.9218850135803223, "learning_rate": 2e-05, "loss": 0.03545377, "step": 6626 }, { "epoch": 13.254, "grad_norm": 1.1980159282684326, "learning_rate": 2e-05, "loss": 0.04754961, "step": 6627 }, { "epoch": 13.256, "grad_norm": 1.2684178352355957, "learning_rate": 2e-05, "loss": 0.04073523, "step": 6628 }, { "epoch": 13.258, "grad_norm": 1.221415400505066, "learning_rate": 2e-05, "loss": 0.03944853, "step": 6629 }, { "epoch": 13.26, "grad_norm": 1.3978205919265747, "learning_rate": 2e-05, "loss": 0.05636044, "step": 6630 }, { "epoch": 13.262, "grad_norm": 1.1385477781295776, "learning_rate": 2e-05, "loss": 0.038518, "step": 6631 }, { "epoch": 13.264, "grad_norm": 0.9235950112342834, "learning_rate": 2e-05, "loss": 0.0386169, "step": 6632 }, { "epoch": 13.266, "grad_norm": 0.998964250087738, "learning_rate": 2e-05, "loss": 0.03849095, "step": 6633 }, { "epoch": 13.268, "grad_norm": 1.1807550191879272, "learning_rate": 2e-05, "loss": 0.04427938, "step": 6634 }, { "epoch": 13.27, "grad_norm": 1.8048549890518188, "learning_rate": 2e-05, "loss": 0.05709498, "step": 6635 }, { "epoch": 13.272, "grad_norm": 1.356444239616394, "learning_rate": 2e-05, "loss": 0.04096004, "step": 6636 }, { "epoch": 13.274000000000001, "grad_norm": 1.1942660808563232, "learning_rate": 2e-05, "loss": 0.0479869, "step": 6637 }, { "epoch": 13.276, "grad_norm": 1.3519905805587769, "learning_rate": 2e-05, "loss": 0.04803681, "step": 6638 }, { "epoch": 13.278, "grad_norm": 2.0840697288513184, "learning_rate": 2e-05, "loss": 0.05779681, "step": 6639 }, { "epoch": 13.28, "grad_norm": 1.7280572652816772, "learning_rate": 2e-05, "loss": 0.04558481, "step": 6640 }, { "epoch": 13.282, "grad_norm": 0.96333247423172, "learning_rate": 2e-05, "loss": 0.03358809, "step": 6641 }, { "epoch": 13.284, "grad_norm": 1.2904242277145386, "learning_rate": 2e-05, "loss": 0.03558896, "step": 6642 }, { "epoch": 13.286, "grad_norm": 1.6364506483078003, "learning_rate": 2e-05, "loss": 0.04362378, "step": 6643 }, { "epoch": 13.288, "grad_norm": 1.847243070602417, "learning_rate": 2e-05, "loss": 0.04681784, "step": 6644 }, { "epoch": 13.29, "grad_norm": 1.4986414909362793, "learning_rate": 2e-05, "loss": 0.05653023, "step": 6645 }, { "epoch": 13.292, "grad_norm": 0.9744773507118225, "learning_rate": 2e-05, "loss": 0.03100562, "step": 6646 }, { "epoch": 13.294, "grad_norm": 3.910753011703491, "learning_rate": 2e-05, "loss": 0.05208149, "step": 6647 }, { "epoch": 13.296, "grad_norm": 1.940828800201416, "learning_rate": 2e-05, "loss": 0.04828981, "step": 6648 }, { "epoch": 13.298, "grad_norm": 0.8967350721359253, "learning_rate": 2e-05, "loss": 0.0277104, "step": 6649 }, { "epoch": 13.3, "grad_norm": 1.63437020778656, "learning_rate": 2e-05, "loss": 0.05645341, "step": 6650 }, { "epoch": 13.302, "grad_norm": 2.576859951019287, "learning_rate": 2e-05, "loss": 0.05670431, "step": 6651 }, { "epoch": 13.304, "grad_norm": 1.0181019306182861, "learning_rate": 2e-05, "loss": 0.02822084, "step": 6652 }, { "epoch": 13.306, "grad_norm": 1.2848466634750366, "learning_rate": 2e-05, "loss": 0.03874955, "step": 6653 }, { "epoch": 13.308, "grad_norm": 1.3398500680923462, "learning_rate": 2e-05, "loss": 0.04126788, "step": 6654 }, { "epoch": 13.31, "grad_norm": 1.1850476264953613, "learning_rate": 2e-05, "loss": 0.04200507, "step": 6655 }, { "epoch": 13.312, "grad_norm": 1.9471945762634277, "learning_rate": 2e-05, "loss": 0.04575231, "step": 6656 }, { "epoch": 13.314, "grad_norm": 1.040307641029358, "learning_rate": 2e-05, "loss": 0.04075161, "step": 6657 }, { "epoch": 13.316, "grad_norm": 2.0187220573425293, "learning_rate": 2e-05, "loss": 0.05225504, "step": 6658 }, { "epoch": 13.318, "grad_norm": 1.2644380331039429, "learning_rate": 2e-05, "loss": 0.03621522, "step": 6659 }, { "epoch": 13.32, "grad_norm": 1.1418365240097046, "learning_rate": 2e-05, "loss": 0.0367348, "step": 6660 }, { "epoch": 13.322, "grad_norm": 1.9665353298187256, "learning_rate": 2e-05, "loss": 0.03425472, "step": 6661 }, { "epoch": 13.324, "grad_norm": 1.1387509107589722, "learning_rate": 2e-05, "loss": 0.03701718, "step": 6662 }, { "epoch": 13.326, "grad_norm": 1.3636493682861328, "learning_rate": 2e-05, "loss": 0.02778786, "step": 6663 }, { "epoch": 13.328, "grad_norm": 1.709892988204956, "learning_rate": 2e-05, "loss": 0.03386191, "step": 6664 }, { "epoch": 13.33, "grad_norm": 1.3221369981765747, "learning_rate": 2e-05, "loss": 0.0351978, "step": 6665 }, { "epoch": 13.332, "grad_norm": 1.4954278469085693, "learning_rate": 2e-05, "loss": 0.05141364, "step": 6666 }, { "epoch": 13.334, "grad_norm": 1.0428029298782349, "learning_rate": 2e-05, "loss": 0.0349552, "step": 6667 }, { "epoch": 13.336, "grad_norm": 2.8376824855804443, "learning_rate": 2e-05, "loss": 0.0508856, "step": 6668 }, { "epoch": 13.338, "grad_norm": 1.1509932279586792, "learning_rate": 2e-05, "loss": 0.0545894, "step": 6669 }, { "epoch": 13.34, "grad_norm": 1.2159600257873535, "learning_rate": 2e-05, "loss": 0.04009219, "step": 6670 }, { "epoch": 13.342, "grad_norm": 2.1603293418884277, "learning_rate": 2e-05, "loss": 0.0554916, "step": 6671 }, { "epoch": 13.344, "grad_norm": 1.0430208444595337, "learning_rate": 2e-05, "loss": 0.03750631, "step": 6672 }, { "epoch": 13.346, "grad_norm": 1.709657073020935, "learning_rate": 2e-05, "loss": 0.03355395, "step": 6673 }, { "epoch": 13.348, "grad_norm": 1.536507248878479, "learning_rate": 2e-05, "loss": 0.03488292, "step": 6674 }, { "epoch": 13.35, "grad_norm": 1.2476590871810913, "learning_rate": 2e-05, "loss": 0.04612921, "step": 6675 }, { "epoch": 13.352, "grad_norm": 1.3954904079437256, "learning_rate": 2e-05, "loss": 0.03104328, "step": 6676 }, { "epoch": 13.354, "grad_norm": 0.9118001461029053, "learning_rate": 2e-05, "loss": 0.03600406, "step": 6677 }, { "epoch": 13.356, "grad_norm": 1.302927017211914, "learning_rate": 2e-05, "loss": 0.04378239, "step": 6678 }, { "epoch": 13.358, "grad_norm": 0.9710843563079834, "learning_rate": 2e-05, "loss": 0.03664377, "step": 6679 }, { "epoch": 13.36, "grad_norm": 1.462697982788086, "learning_rate": 2e-05, "loss": 0.05048002, "step": 6680 }, { "epoch": 13.362, "grad_norm": 1.2698971033096313, "learning_rate": 2e-05, "loss": 0.04131015, "step": 6681 }, { "epoch": 13.364, "grad_norm": 2.334305763244629, "learning_rate": 2e-05, "loss": 0.05682716, "step": 6682 }, { "epoch": 13.366, "grad_norm": 0.9665806889533997, "learning_rate": 2e-05, "loss": 0.03379928, "step": 6683 }, { "epoch": 13.368, "grad_norm": 2.5614027976989746, "learning_rate": 2e-05, "loss": 0.05788635, "step": 6684 }, { "epoch": 13.37, "grad_norm": 1.1701353788375854, "learning_rate": 2e-05, "loss": 0.05182283, "step": 6685 }, { "epoch": 13.372, "grad_norm": 1.5176148414611816, "learning_rate": 2e-05, "loss": 0.04578764, "step": 6686 }, { "epoch": 13.374, "grad_norm": 1.03241765499115, "learning_rate": 2e-05, "loss": 0.02910157, "step": 6687 }, { "epoch": 13.376, "grad_norm": 1.4912577867507935, "learning_rate": 2e-05, "loss": 0.05624252, "step": 6688 }, { "epoch": 13.378, "grad_norm": 2.000216245651245, "learning_rate": 2e-05, "loss": 0.07832874, "step": 6689 }, { "epoch": 13.38, "grad_norm": 0.9870254993438721, "learning_rate": 2e-05, "loss": 0.03531221, "step": 6690 }, { "epoch": 13.382, "grad_norm": 1.357419729232788, "learning_rate": 2e-05, "loss": 0.04698518, "step": 6691 }, { "epoch": 13.384, "grad_norm": 1.0344133377075195, "learning_rate": 2e-05, "loss": 0.03177445, "step": 6692 }, { "epoch": 13.386, "grad_norm": 1.1080751419067383, "learning_rate": 2e-05, "loss": 0.04208478, "step": 6693 }, { "epoch": 13.388, "grad_norm": 1.2324795722961426, "learning_rate": 2e-05, "loss": 0.054432, "step": 6694 }, { "epoch": 13.39, "grad_norm": 0.9383613467216492, "learning_rate": 2e-05, "loss": 0.03874735, "step": 6695 }, { "epoch": 13.392, "grad_norm": 1.4383766651153564, "learning_rate": 2e-05, "loss": 0.04498213, "step": 6696 }, { "epoch": 13.394, "grad_norm": 3.113072395324707, "learning_rate": 2e-05, "loss": 0.03912481, "step": 6697 }, { "epoch": 13.396, "grad_norm": 2.6363680362701416, "learning_rate": 2e-05, "loss": 0.05699215, "step": 6698 }, { "epoch": 13.398, "grad_norm": 1.64556086063385, "learning_rate": 2e-05, "loss": 0.03673462, "step": 6699 }, { "epoch": 13.4, "grad_norm": 1.4506820440292358, "learning_rate": 2e-05, "loss": 0.03451908, "step": 6700 }, { "epoch": 13.402, "grad_norm": 1.2463765144348145, "learning_rate": 2e-05, "loss": 0.03161247, "step": 6701 }, { "epoch": 13.404, "grad_norm": 0.906589925289154, "learning_rate": 2e-05, "loss": 0.03612549, "step": 6702 }, { "epoch": 13.406, "grad_norm": 2.0598535537719727, "learning_rate": 2e-05, "loss": 0.05772673, "step": 6703 }, { "epoch": 13.408, "grad_norm": 2.7383081912994385, "learning_rate": 2e-05, "loss": 0.05676894, "step": 6704 }, { "epoch": 13.41, "grad_norm": 0.8603478670120239, "learning_rate": 2e-05, "loss": 0.03033182, "step": 6705 }, { "epoch": 13.412, "grad_norm": 1.2971959114074707, "learning_rate": 2e-05, "loss": 0.04674042, "step": 6706 }, { "epoch": 13.414, "grad_norm": 1.5236220359802246, "learning_rate": 2e-05, "loss": 0.04426293, "step": 6707 }, { "epoch": 13.416, "grad_norm": 1.5733261108398438, "learning_rate": 2e-05, "loss": 0.04068376, "step": 6708 }, { "epoch": 13.418, "grad_norm": 1.0372813940048218, "learning_rate": 2e-05, "loss": 0.03074005, "step": 6709 }, { "epoch": 13.42, "grad_norm": 1.040541172027588, "learning_rate": 2e-05, "loss": 0.03368942, "step": 6710 }, { "epoch": 13.422, "grad_norm": 1.1496481895446777, "learning_rate": 2e-05, "loss": 0.04329249, "step": 6711 }, { "epoch": 13.424, "grad_norm": 1.0836849212646484, "learning_rate": 2e-05, "loss": 0.04265901, "step": 6712 }, { "epoch": 13.426, "grad_norm": 1.1534157991409302, "learning_rate": 2e-05, "loss": 0.03363778, "step": 6713 }, { "epoch": 13.428, "grad_norm": 1.7870947122573853, "learning_rate": 2e-05, "loss": 0.03490334, "step": 6714 }, { "epoch": 13.43, "grad_norm": 1.3456041812896729, "learning_rate": 2e-05, "loss": 0.04892221, "step": 6715 }, { "epoch": 13.432, "grad_norm": 1.2972897291183472, "learning_rate": 2e-05, "loss": 0.0546928, "step": 6716 }, { "epoch": 13.434, "grad_norm": 1.291178822517395, "learning_rate": 2e-05, "loss": 0.05447577, "step": 6717 }, { "epoch": 13.436, "grad_norm": 1.1853821277618408, "learning_rate": 2e-05, "loss": 0.04232875, "step": 6718 }, { "epoch": 13.438, "grad_norm": 1.1682018041610718, "learning_rate": 2e-05, "loss": 0.05160779, "step": 6719 }, { "epoch": 13.44, "grad_norm": 1.006597876548767, "learning_rate": 2e-05, "loss": 0.03653274, "step": 6720 }, { "epoch": 13.442, "grad_norm": 1.1587315797805786, "learning_rate": 2e-05, "loss": 0.03796876, "step": 6721 }, { "epoch": 13.444, "grad_norm": 2.0830724239349365, "learning_rate": 2e-05, "loss": 0.05352376, "step": 6722 }, { "epoch": 13.446, "grad_norm": 1.744712233543396, "learning_rate": 2e-05, "loss": 0.06779357, "step": 6723 }, { "epoch": 13.448, "grad_norm": 1.3811239004135132, "learning_rate": 2e-05, "loss": 0.03959166, "step": 6724 }, { "epoch": 13.45, "grad_norm": 1.7073357105255127, "learning_rate": 2e-05, "loss": 0.04887697, "step": 6725 }, { "epoch": 13.452, "grad_norm": 1.185996174812317, "learning_rate": 2e-05, "loss": 0.05762554, "step": 6726 }, { "epoch": 13.454, "grad_norm": 1.5772680044174194, "learning_rate": 2e-05, "loss": 0.04324632, "step": 6727 }, { "epoch": 13.456, "grad_norm": 1.1356761455535889, "learning_rate": 2e-05, "loss": 0.03531969, "step": 6728 }, { "epoch": 13.458, "grad_norm": 1.2080693244934082, "learning_rate": 2e-05, "loss": 0.03642308, "step": 6729 }, { "epoch": 13.46, "grad_norm": 1.1640558242797852, "learning_rate": 2e-05, "loss": 0.04593107, "step": 6730 }, { "epoch": 13.462, "grad_norm": 1.2975975275039673, "learning_rate": 2e-05, "loss": 0.04841218, "step": 6731 }, { "epoch": 13.464, "grad_norm": 1.1821534633636475, "learning_rate": 2e-05, "loss": 0.03736658, "step": 6732 }, { "epoch": 13.466, "grad_norm": 1.7350611686706543, "learning_rate": 2e-05, "loss": 0.05214888, "step": 6733 }, { "epoch": 13.468, "grad_norm": 1.3901562690734863, "learning_rate": 2e-05, "loss": 0.05181838, "step": 6734 }, { "epoch": 13.47, "grad_norm": 1.1504522562026978, "learning_rate": 2e-05, "loss": 0.03707007, "step": 6735 }, { "epoch": 13.472, "grad_norm": 1.5149831771850586, "learning_rate": 2e-05, "loss": 0.04737812, "step": 6736 }, { "epoch": 13.474, "grad_norm": 1.2422029972076416, "learning_rate": 2e-05, "loss": 0.04593173, "step": 6737 }, { "epoch": 13.475999999999999, "grad_norm": 1.1405526399612427, "learning_rate": 2e-05, "loss": 0.03212631, "step": 6738 }, { "epoch": 13.478, "grad_norm": 1.8547710180282593, "learning_rate": 2e-05, "loss": 0.04072982, "step": 6739 }, { "epoch": 13.48, "grad_norm": 1.2541835308074951, "learning_rate": 2e-05, "loss": 0.03975745, "step": 6740 }, { "epoch": 13.482, "grad_norm": 1.873019814491272, "learning_rate": 2e-05, "loss": 0.04224801, "step": 6741 }, { "epoch": 13.484, "grad_norm": 1.2246263027191162, "learning_rate": 2e-05, "loss": 0.03548772, "step": 6742 }, { "epoch": 13.486, "grad_norm": 1.5510194301605225, "learning_rate": 2e-05, "loss": 0.04826787, "step": 6743 }, { "epoch": 13.488, "grad_norm": 1.4928739070892334, "learning_rate": 2e-05, "loss": 0.03853283, "step": 6744 }, { "epoch": 13.49, "grad_norm": 1.2632286548614502, "learning_rate": 2e-05, "loss": 0.03628128, "step": 6745 }, { "epoch": 13.492, "grad_norm": 1.3708271980285645, "learning_rate": 2e-05, "loss": 0.04372465, "step": 6746 }, { "epoch": 13.494, "grad_norm": 1.194564938545227, "learning_rate": 2e-05, "loss": 0.04159772, "step": 6747 }, { "epoch": 13.496, "grad_norm": 1.0240092277526855, "learning_rate": 2e-05, "loss": 0.05373198, "step": 6748 }, { "epoch": 13.498, "grad_norm": 1.237774133682251, "learning_rate": 2e-05, "loss": 0.04013044, "step": 6749 }, { "epoch": 13.5, "grad_norm": 1.5504425764083862, "learning_rate": 2e-05, "loss": 0.05620146, "step": 6750 }, { "epoch": 13.502, "grad_norm": 1.4804521799087524, "learning_rate": 2e-05, "loss": 0.03649015, "step": 6751 }, { "epoch": 13.504, "grad_norm": 0.8222930431365967, "learning_rate": 2e-05, "loss": 0.03705214, "step": 6752 }, { "epoch": 13.506, "grad_norm": 2.1745126247406006, "learning_rate": 2e-05, "loss": 0.04818426, "step": 6753 }, { "epoch": 13.508, "grad_norm": 1.8995699882507324, "learning_rate": 2e-05, "loss": 0.05376872, "step": 6754 }, { "epoch": 13.51, "grad_norm": 1.3382285833358765, "learning_rate": 2e-05, "loss": 0.03650725, "step": 6755 }, { "epoch": 13.512, "grad_norm": 1.3371944427490234, "learning_rate": 2e-05, "loss": 0.03946814, "step": 6756 }, { "epoch": 13.514, "grad_norm": 1.1457873582839966, "learning_rate": 2e-05, "loss": 0.03886025, "step": 6757 }, { "epoch": 13.516, "grad_norm": 1.1448132991790771, "learning_rate": 2e-05, "loss": 0.04712182, "step": 6758 }, { "epoch": 13.518, "grad_norm": 1.4828782081604004, "learning_rate": 2e-05, "loss": 0.03719214, "step": 6759 }, { "epoch": 13.52, "grad_norm": 1.0648431777954102, "learning_rate": 2e-05, "loss": 0.04081538, "step": 6760 }, { "epoch": 13.522, "grad_norm": 1.129176378250122, "learning_rate": 2e-05, "loss": 0.04356973, "step": 6761 }, { "epoch": 13.524000000000001, "grad_norm": 2.10121488571167, "learning_rate": 2e-05, "loss": 0.06023614, "step": 6762 }, { "epoch": 13.526, "grad_norm": 1.0562833547592163, "learning_rate": 2e-05, "loss": 0.03647918, "step": 6763 }, { "epoch": 13.528, "grad_norm": 1.0096609592437744, "learning_rate": 2e-05, "loss": 0.04023831, "step": 6764 }, { "epoch": 13.53, "grad_norm": 2.1515493392944336, "learning_rate": 2e-05, "loss": 0.03626168, "step": 6765 }, { "epoch": 13.532, "grad_norm": 1.9057750701904297, "learning_rate": 2e-05, "loss": 0.03350605, "step": 6766 }, { "epoch": 13.534, "grad_norm": 1.2841042280197144, "learning_rate": 2e-05, "loss": 0.05026775, "step": 6767 }, { "epoch": 13.536, "grad_norm": 0.7441931962966919, "learning_rate": 2e-05, "loss": 0.02336359, "step": 6768 }, { "epoch": 13.538, "grad_norm": 0.9778230786323547, "learning_rate": 2e-05, "loss": 0.02938631, "step": 6769 }, { "epoch": 13.54, "grad_norm": 1.0956193208694458, "learning_rate": 2e-05, "loss": 0.0417144, "step": 6770 }, { "epoch": 13.542, "grad_norm": 0.8444679379463196, "learning_rate": 2e-05, "loss": 0.02365974, "step": 6771 }, { "epoch": 13.544, "grad_norm": 1.2109949588775635, "learning_rate": 2e-05, "loss": 0.0453985, "step": 6772 }, { "epoch": 13.546, "grad_norm": 1.271173357963562, "learning_rate": 2e-05, "loss": 0.03714771, "step": 6773 }, { "epoch": 13.548, "grad_norm": 1.4884365797042847, "learning_rate": 2e-05, "loss": 0.04173386, "step": 6774 }, { "epoch": 13.55, "grad_norm": 1.263060450553894, "learning_rate": 2e-05, "loss": 0.04918937, "step": 6775 }, { "epoch": 13.552, "grad_norm": 1.838823676109314, "learning_rate": 2e-05, "loss": 0.05225374, "step": 6776 }, { "epoch": 13.554, "grad_norm": 1.6952496767044067, "learning_rate": 2e-05, "loss": 0.05605008, "step": 6777 }, { "epoch": 13.556000000000001, "grad_norm": 1.122870922088623, "learning_rate": 2e-05, "loss": 0.03862893, "step": 6778 }, { "epoch": 13.558, "grad_norm": 2.2334885597229004, "learning_rate": 2e-05, "loss": 0.04381295, "step": 6779 }, { "epoch": 13.56, "grad_norm": 2.3463003635406494, "learning_rate": 2e-05, "loss": 0.06292856, "step": 6780 }, { "epoch": 13.562, "grad_norm": 1.1781302690505981, "learning_rate": 2e-05, "loss": 0.0432412, "step": 6781 }, { "epoch": 13.564, "grad_norm": 1.5159214735031128, "learning_rate": 2e-05, "loss": 0.07161657, "step": 6782 }, { "epoch": 13.566, "grad_norm": 1.4085272550582886, "learning_rate": 2e-05, "loss": 0.04207098, "step": 6783 }, { "epoch": 13.568, "grad_norm": 0.9838753342628479, "learning_rate": 2e-05, "loss": 0.03958264, "step": 6784 }, { "epoch": 13.57, "grad_norm": 1.288290023803711, "learning_rate": 2e-05, "loss": 0.03104446, "step": 6785 }, { "epoch": 13.572, "grad_norm": 1.2938867807388306, "learning_rate": 2e-05, "loss": 0.02755605, "step": 6786 }, { "epoch": 13.574, "grad_norm": 0.9718157052993774, "learning_rate": 2e-05, "loss": 0.03226566, "step": 6787 }, { "epoch": 13.576, "grad_norm": 1.0730926990509033, "learning_rate": 2e-05, "loss": 0.03626654, "step": 6788 }, { "epoch": 13.578, "grad_norm": 1.5193839073181152, "learning_rate": 2e-05, "loss": 0.04053587, "step": 6789 }, { "epoch": 13.58, "grad_norm": 2.2980332374572754, "learning_rate": 2e-05, "loss": 0.04546433, "step": 6790 }, { "epoch": 13.582, "grad_norm": 1.184172511100769, "learning_rate": 2e-05, "loss": 0.03922792, "step": 6791 }, { "epoch": 13.584, "grad_norm": 1.1608223915100098, "learning_rate": 2e-05, "loss": 0.04254516, "step": 6792 }, { "epoch": 13.586, "grad_norm": 0.911185622215271, "learning_rate": 2e-05, "loss": 0.03383351, "step": 6793 }, { "epoch": 13.588, "grad_norm": 1.048732042312622, "learning_rate": 2e-05, "loss": 0.04052911, "step": 6794 }, { "epoch": 13.59, "grad_norm": 1.9913432598114014, "learning_rate": 2e-05, "loss": 0.04581846, "step": 6795 }, { "epoch": 13.592, "grad_norm": 7.5923590660095215, "learning_rate": 2e-05, "loss": 0.04791499, "step": 6796 }, { "epoch": 13.594, "grad_norm": 1.3415007591247559, "learning_rate": 2e-05, "loss": 0.03521166, "step": 6797 }, { "epoch": 13.596, "grad_norm": 1.0784838199615479, "learning_rate": 2e-05, "loss": 0.03846452, "step": 6798 }, { "epoch": 13.598, "grad_norm": 1.8709461688995361, "learning_rate": 2e-05, "loss": 0.03164558, "step": 6799 }, { "epoch": 13.6, "grad_norm": 1.1107147932052612, "learning_rate": 2e-05, "loss": 0.03997769, "step": 6800 }, { "epoch": 13.602, "grad_norm": 1.2348753213882446, "learning_rate": 2e-05, "loss": 0.04370356, "step": 6801 }, { "epoch": 13.604, "grad_norm": 0.9056511521339417, "learning_rate": 2e-05, "loss": 0.02630659, "step": 6802 }, { "epoch": 13.606, "grad_norm": 1.8926500082015991, "learning_rate": 2e-05, "loss": 0.04153786, "step": 6803 }, { "epoch": 13.608, "grad_norm": 1.2148364782333374, "learning_rate": 2e-05, "loss": 0.03618565, "step": 6804 }, { "epoch": 13.61, "grad_norm": 1.2170469760894775, "learning_rate": 2e-05, "loss": 0.04323645, "step": 6805 }, { "epoch": 13.612, "grad_norm": 2.1806893348693848, "learning_rate": 2e-05, "loss": 0.05993907, "step": 6806 }, { "epoch": 13.614, "grad_norm": 1.8596165180206299, "learning_rate": 2e-05, "loss": 0.03296434, "step": 6807 }, { "epoch": 13.616, "grad_norm": 1.353456974029541, "learning_rate": 2e-05, "loss": 0.05409721, "step": 6808 }, { "epoch": 13.618, "grad_norm": 1.064528226852417, "learning_rate": 2e-05, "loss": 0.03024231, "step": 6809 }, { "epoch": 13.62, "grad_norm": 1.1600133180618286, "learning_rate": 2e-05, "loss": 0.03491781, "step": 6810 }, { "epoch": 13.622, "grad_norm": 1.9706141948699951, "learning_rate": 2e-05, "loss": 0.05067671, "step": 6811 }, { "epoch": 13.624, "grad_norm": 0.9442355036735535, "learning_rate": 2e-05, "loss": 0.03200253, "step": 6812 }, { "epoch": 13.626, "grad_norm": 1.613079309463501, "learning_rate": 2e-05, "loss": 0.04598642, "step": 6813 }, { "epoch": 13.628, "grad_norm": 6.15842342376709, "learning_rate": 2e-05, "loss": 0.05411564, "step": 6814 }, { "epoch": 13.63, "grad_norm": 1.3837759494781494, "learning_rate": 2e-05, "loss": 0.04823672, "step": 6815 }, { "epoch": 13.632, "grad_norm": 4.1100754737854, "learning_rate": 2e-05, "loss": 0.0420612, "step": 6816 }, { "epoch": 13.634, "grad_norm": 1.6604418754577637, "learning_rate": 2e-05, "loss": 0.04633522, "step": 6817 }, { "epoch": 13.636, "grad_norm": 1.3121565580368042, "learning_rate": 2e-05, "loss": 0.03734418, "step": 6818 }, { "epoch": 13.638, "grad_norm": 1.443253755569458, "learning_rate": 2e-05, "loss": 0.04456676, "step": 6819 }, { "epoch": 13.64, "grad_norm": 1.1545835733413696, "learning_rate": 2e-05, "loss": 0.0379322, "step": 6820 }, { "epoch": 13.642, "grad_norm": 3.696871757507324, "learning_rate": 2e-05, "loss": 0.04654226, "step": 6821 }, { "epoch": 13.644, "grad_norm": 2.698059558868408, "learning_rate": 2e-05, "loss": 0.04772506, "step": 6822 }, { "epoch": 13.646, "grad_norm": 1.0709425210952759, "learning_rate": 2e-05, "loss": 0.03561562, "step": 6823 }, { "epoch": 13.648, "grad_norm": 1.0646330118179321, "learning_rate": 2e-05, "loss": 0.03484592, "step": 6824 }, { "epoch": 13.65, "grad_norm": 1.7137929201126099, "learning_rate": 2e-05, "loss": 0.05322141, "step": 6825 }, { "epoch": 13.652, "grad_norm": 0.9703205227851868, "learning_rate": 2e-05, "loss": 0.03583568, "step": 6826 }, { "epoch": 13.654, "grad_norm": 1.0372276306152344, "learning_rate": 2e-05, "loss": 0.04091222, "step": 6827 }, { "epoch": 13.656, "grad_norm": 1.275295615196228, "learning_rate": 2e-05, "loss": 0.03378256, "step": 6828 }, { "epoch": 13.658, "grad_norm": 1.4294553995132446, "learning_rate": 2e-05, "loss": 0.02650681, "step": 6829 }, { "epoch": 13.66, "grad_norm": 1.4252593517303467, "learning_rate": 2e-05, "loss": 0.04224738, "step": 6830 }, { "epoch": 13.662, "grad_norm": 2.1036853790283203, "learning_rate": 2e-05, "loss": 0.0587058, "step": 6831 }, { "epoch": 13.664, "grad_norm": 1.4288240671157837, "learning_rate": 2e-05, "loss": 0.03694381, "step": 6832 }, { "epoch": 13.666, "grad_norm": 1.4910424947738647, "learning_rate": 2e-05, "loss": 0.03965569, "step": 6833 }, { "epoch": 13.668, "grad_norm": 1.8841309547424316, "learning_rate": 2e-05, "loss": 0.05315829, "step": 6834 }, { "epoch": 13.67, "grad_norm": 1.3187294006347656, "learning_rate": 2e-05, "loss": 0.0441037, "step": 6835 }, { "epoch": 13.672, "grad_norm": 1.5964475870132446, "learning_rate": 2e-05, "loss": 0.03939425, "step": 6836 }, { "epoch": 13.674, "grad_norm": 1.5482819080352783, "learning_rate": 2e-05, "loss": 0.03919324, "step": 6837 }, { "epoch": 13.676, "grad_norm": 0.9610161185264587, "learning_rate": 2e-05, "loss": 0.03885095, "step": 6838 }, { "epoch": 13.678, "grad_norm": 1.404104471206665, "learning_rate": 2e-05, "loss": 0.04278558, "step": 6839 }, { "epoch": 13.68, "grad_norm": 1.1305415630340576, "learning_rate": 2e-05, "loss": 0.03205916, "step": 6840 }, { "epoch": 13.682, "grad_norm": 1.5377179384231567, "learning_rate": 2e-05, "loss": 0.04802936, "step": 6841 }, { "epoch": 13.684, "grad_norm": 1.0097688436508179, "learning_rate": 2e-05, "loss": 0.03578993, "step": 6842 }, { "epoch": 13.686, "grad_norm": 1.115171194076538, "learning_rate": 2e-05, "loss": 0.02412172, "step": 6843 }, { "epoch": 13.688, "grad_norm": 1.3743011951446533, "learning_rate": 2e-05, "loss": 0.04627132, "step": 6844 }, { "epoch": 13.69, "grad_norm": 1.713904619216919, "learning_rate": 2e-05, "loss": 0.03674621, "step": 6845 }, { "epoch": 13.692, "grad_norm": 2.326152801513672, "learning_rate": 2e-05, "loss": 0.05216234, "step": 6846 }, { "epoch": 13.693999999999999, "grad_norm": 1.5268429517745972, "learning_rate": 2e-05, "loss": 0.03656963, "step": 6847 }, { "epoch": 13.696, "grad_norm": 1.4034799337387085, "learning_rate": 2e-05, "loss": 0.03888054, "step": 6848 }, { "epoch": 13.698, "grad_norm": 0.8901887536048889, "learning_rate": 2e-05, "loss": 0.03020368, "step": 6849 }, { "epoch": 13.7, "grad_norm": 1.5183515548706055, "learning_rate": 2e-05, "loss": 0.03925902, "step": 6850 }, { "epoch": 13.702, "grad_norm": 1.4339163303375244, "learning_rate": 2e-05, "loss": 0.03720367, "step": 6851 }, { "epoch": 13.704, "grad_norm": 1.1391642093658447, "learning_rate": 2e-05, "loss": 0.0445908, "step": 6852 }, { "epoch": 13.706, "grad_norm": 1.0143404006958008, "learning_rate": 2e-05, "loss": 0.03798932, "step": 6853 }, { "epoch": 13.708, "grad_norm": 1.042809009552002, "learning_rate": 2e-05, "loss": 0.0306283, "step": 6854 }, { "epoch": 13.71, "grad_norm": 1.0130754709243774, "learning_rate": 2e-05, "loss": 0.03276106, "step": 6855 }, { "epoch": 13.712, "grad_norm": 1.4400782585144043, "learning_rate": 2e-05, "loss": 0.06013027, "step": 6856 }, { "epoch": 13.714, "grad_norm": 0.8150467276573181, "learning_rate": 2e-05, "loss": 0.02554425, "step": 6857 }, { "epoch": 13.716, "grad_norm": 1.1223294734954834, "learning_rate": 2e-05, "loss": 0.0357421, "step": 6858 }, { "epoch": 13.718, "grad_norm": 2.065725803375244, "learning_rate": 2e-05, "loss": 0.03707106, "step": 6859 }, { "epoch": 13.72, "grad_norm": 1.8507651090621948, "learning_rate": 2e-05, "loss": 0.05931889, "step": 6860 }, { "epoch": 13.722, "grad_norm": 1.1880868673324585, "learning_rate": 2e-05, "loss": 0.02942112, "step": 6861 }, { "epoch": 13.724, "grad_norm": 1.179278016090393, "learning_rate": 2e-05, "loss": 0.03240401, "step": 6862 }, { "epoch": 13.725999999999999, "grad_norm": 3.0401697158813477, "learning_rate": 2e-05, "loss": 0.03715945, "step": 6863 }, { "epoch": 13.728, "grad_norm": 1.5099812746047974, "learning_rate": 2e-05, "loss": 0.05022899, "step": 6864 }, { "epoch": 13.73, "grad_norm": 1.2504340410232544, "learning_rate": 2e-05, "loss": 0.033149, "step": 6865 }, { "epoch": 13.732, "grad_norm": 1.390339970588684, "learning_rate": 2e-05, "loss": 0.04740375, "step": 6866 }, { "epoch": 13.734, "grad_norm": 1.3759169578552246, "learning_rate": 2e-05, "loss": 0.04141629, "step": 6867 }, { "epoch": 13.736, "grad_norm": 2.340059280395508, "learning_rate": 2e-05, "loss": 0.06072231, "step": 6868 }, { "epoch": 13.738, "grad_norm": 1.72480309009552, "learning_rate": 2e-05, "loss": 0.04704083, "step": 6869 }, { "epoch": 13.74, "grad_norm": 2.2766008377075195, "learning_rate": 2e-05, "loss": 0.03970015, "step": 6870 }, { "epoch": 13.742, "grad_norm": 0.9225317239761353, "learning_rate": 2e-05, "loss": 0.0267899, "step": 6871 }, { "epoch": 13.744, "grad_norm": 1.686190128326416, "learning_rate": 2e-05, "loss": 0.0391211, "step": 6872 }, { "epoch": 13.746, "grad_norm": 1.0415571928024292, "learning_rate": 2e-05, "loss": 0.03432431, "step": 6873 }, { "epoch": 13.748, "grad_norm": 1.7514448165893555, "learning_rate": 2e-05, "loss": 0.05624153, "step": 6874 }, { "epoch": 13.75, "grad_norm": 1.0762476921081543, "learning_rate": 2e-05, "loss": 0.03919458, "step": 6875 }, { "epoch": 13.752, "grad_norm": 1.4674787521362305, "learning_rate": 2e-05, "loss": 0.03218137, "step": 6876 }, { "epoch": 13.754, "grad_norm": 1.9578840732574463, "learning_rate": 2e-05, "loss": 0.04159928, "step": 6877 }, { "epoch": 13.756, "grad_norm": 1.9095509052276611, "learning_rate": 2e-05, "loss": 0.04101838, "step": 6878 }, { "epoch": 13.758, "grad_norm": 1.5603878498077393, "learning_rate": 2e-05, "loss": 0.04076263, "step": 6879 }, { "epoch": 13.76, "grad_norm": 2.285066604614258, "learning_rate": 2e-05, "loss": 0.04084171, "step": 6880 }, { "epoch": 13.762, "grad_norm": 1.0601623058319092, "learning_rate": 2e-05, "loss": 0.04159306, "step": 6881 }, { "epoch": 13.764, "grad_norm": 0.9693232178688049, "learning_rate": 2e-05, "loss": 0.04004766, "step": 6882 }, { "epoch": 13.766, "grad_norm": 1.9593684673309326, "learning_rate": 2e-05, "loss": 0.05278585, "step": 6883 }, { "epoch": 13.768, "grad_norm": 1.7436232566833496, "learning_rate": 2e-05, "loss": 0.04101618, "step": 6884 }, { "epoch": 13.77, "grad_norm": 2.0569212436676025, "learning_rate": 2e-05, "loss": 0.0421036, "step": 6885 }, { "epoch": 13.772, "grad_norm": 2.661609172821045, "learning_rate": 2e-05, "loss": 0.05512743, "step": 6886 }, { "epoch": 13.774000000000001, "grad_norm": 1.209600806236267, "learning_rate": 2e-05, "loss": 0.03806563, "step": 6887 }, { "epoch": 13.776, "grad_norm": 1.7323733568191528, "learning_rate": 2e-05, "loss": 0.04656436, "step": 6888 }, { "epoch": 13.778, "grad_norm": 1.02362060546875, "learning_rate": 2e-05, "loss": 0.02516697, "step": 6889 }, { "epoch": 13.78, "grad_norm": 1.7323473691940308, "learning_rate": 2e-05, "loss": 0.053158, "step": 6890 }, { "epoch": 13.782, "grad_norm": 0.9210132956504822, "learning_rate": 2e-05, "loss": 0.03491342, "step": 6891 }, { "epoch": 13.784, "grad_norm": 1.350464105606079, "learning_rate": 2e-05, "loss": 0.02975042, "step": 6892 }, { "epoch": 13.786, "grad_norm": 1.3000922203063965, "learning_rate": 2e-05, "loss": 0.05045019, "step": 6893 }, { "epoch": 13.788, "grad_norm": 1.5011271238327026, "learning_rate": 2e-05, "loss": 0.03117067, "step": 6894 }, { "epoch": 13.79, "grad_norm": 1.8774755001068115, "learning_rate": 2e-05, "loss": 0.03168941, "step": 6895 }, { "epoch": 13.792, "grad_norm": 1.2744702100753784, "learning_rate": 2e-05, "loss": 0.04838127, "step": 6896 }, { "epoch": 13.794, "grad_norm": 1.5665106773376465, "learning_rate": 2e-05, "loss": 0.05183577, "step": 6897 }, { "epoch": 13.796, "grad_norm": 0.8226173520088196, "learning_rate": 2e-05, "loss": 0.02836231, "step": 6898 }, { "epoch": 13.798, "grad_norm": 1.4769667387008667, "learning_rate": 2e-05, "loss": 0.03458671, "step": 6899 }, { "epoch": 13.8, "grad_norm": 0.8754921555519104, "learning_rate": 2e-05, "loss": 0.03141877, "step": 6900 }, { "epoch": 13.802, "grad_norm": 1.1194977760314941, "learning_rate": 2e-05, "loss": 0.03978465, "step": 6901 }, { "epoch": 13.804, "grad_norm": 1.5424827337265015, "learning_rate": 2e-05, "loss": 0.03826003, "step": 6902 }, { "epoch": 13.806000000000001, "grad_norm": 1.114986538887024, "learning_rate": 2e-05, "loss": 0.04370611, "step": 6903 }, { "epoch": 13.808, "grad_norm": 1.1948245763778687, "learning_rate": 2e-05, "loss": 0.04667969, "step": 6904 }, { "epoch": 13.81, "grad_norm": 1.5277029275894165, "learning_rate": 2e-05, "loss": 0.03033474, "step": 6905 }, { "epoch": 13.812, "grad_norm": 0.9101424217224121, "learning_rate": 2e-05, "loss": 0.02876927, "step": 6906 }, { "epoch": 13.814, "grad_norm": 1.2480432987213135, "learning_rate": 2e-05, "loss": 0.04940585, "step": 6907 }, { "epoch": 13.816, "grad_norm": 1.0929871797561646, "learning_rate": 2e-05, "loss": 0.03923286, "step": 6908 }, { "epoch": 13.818, "grad_norm": 1.1175719499588013, "learning_rate": 2e-05, "loss": 0.04925608, "step": 6909 }, { "epoch": 13.82, "grad_norm": 1.6127643585205078, "learning_rate": 2e-05, "loss": 0.04544271, "step": 6910 }, { "epoch": 13.822, "grad_norm": 1.1693975925445557, "learning_rate": 2e-05, "loss": 0.03580477, "step": 6911 }, { "epoch": 13.824, "grad_norm": 1.5068267583847046, "learning_rate": 2e-05, "loss": 0.0306951, "step": 6912 }, { "epoch": 13.826, "grad_norm": 1.4507073163986206, "learning_rate": 2e-05, "loss": 0.04374061, "step": 6913 }, { "epoch": 13.828, "grad_norm": 1.034610390663147, "learning_rate": 2e-05, "loss": 0.0357092, "step": 6914 }, { "epoch": 13.83, "grad_norm": 2.4994475841522217, "learning_rate": 2e-05, "loss": 0.04055736, "step": 6915 }, { "epoch": 13.832, "grad_norm": 1.1172064542770386, "learning_rate": 2e-05, "loss": 0.03705576, "step": 6916 }, { "epoch": 13.834, "grad_norm": 1.2466567754745483, "learning_rate": 2e-05, "loss": 0.03888496, "step": 6917 }, { "epoch": 13.836, "grad_norm": 1.7447651624679565, "learning_rate": 2e-05, "loss": 0.05006548, "step": 6918 }, { "epoch": 13.838, "grad_norm": 1.3122609853744507, "learning_rate": 2e-05, "loss": 0.03600291, "step": 6919 }, { "epoch": 13.84, "grad_norm": 0.9558256268501282, "learning_rate": 2e-05, "loss": 0.033617, "step": 6920 }, { "epoch": 13.842, "grad_norm": 1.1473444700241089, "learning_rate": 2e-05, "loss": 0.0451837, "step": 6921 }, { "epoch": 13.844, "grad_norm": 1.5461227893829346, "learning_rate": 2e-05, "loss": 0.03855314, "step": 6922 }, { "epoch": 13.846, "grad_norm": 1.384239673614502, "learning_rate": 2e-05, "loss": 0.0580353, "step": 6923 }, { "epoch": 13.848, "grad_norm": 1.3983010053634644, "learning_rate": 2e-05, "loss": 0.04143279, "step": 6924 }, { "epoch": 13.85, "grad_norm": 3.343487024307251, "learning_rate": 2e-05, "loss": 0.02516507, "step": 6925 }, { "epoch": 13.852, "grad_norm": 1.0734851360321045, "learning_rate": 2e-05, "loss": 0.039743, "step": 6926 }, { "epoch": 13.854, "grad_norm": 1.1995145082473755, "learning_rate": 2e-05, "loss": 0.0470471, "step": 6927 }, { "epoch": 13.856, "grad_norm": 1.4415197372436523, "learning_rate": 2e-05, "loss": 0.04325081, "step": 6928 }, { "epoch": 13.858, "grad_norm": 1.4374862909317017, "learning_rate": 2e-05, "loss": 0.03836238, "step": 6929 }, { "epoch": 13.86, "grad_norm": 1.5242687463760376, "learning_rate": 2e-05, "loss": 0.03844429, "step": 6930 }, { "epoch": 13.862, "grad_norm": 1.5752671957015991, "learning_rate": 2e-05, "loss": 0.04652138, "step": 6931 }, { "epoch": 13.864, "grad_norm": 0.9790463447570801, "learning_rate": 2e-05, "loss": 0.03887503, "step": 6932 }, { "epoch": 13.866, "grad_norm": 1.2099833488464355, "learning_rate": 2e-05, "loss": 0.05030723, "step": 6933 }, { "epoch": 13.868, "grad_norm": 1.5278183221817017, "learning_rate": 2e-05, "loss": 0.03947808, "step": 6934 }, { "epoch": 13.87, "grad_norm": 1.3118654489517212, "learning_rate": 2e-05, "loss": 0.04740915, "step": 6935 }, { "epoch": 13.872, "grad_norm": 1.618256688117981, "learning_rate": 2e-05, "loss": 0.04932495, "step": 6936 }, { "epoch": 13.874, "grad_norm": 1.0943766832351685, "learning_rate": 2e-05, "loss": 0.03536448, "step": 6937 }, { "epoch": 13.876, "grad_norm": 1.03761625289917, "learning_rate": 2e-05, "loss": 0.03931534, "step": 6938 }, { "epoch": 13.878, "grad_norm": 1.673818826675415, "learning_rate": 2e-05, "loss": 0.03226377, "step": 6939 }, { "epoch": 13.88, "grad_norm": 2.6841683387756348, "learning_rate": 2e-05, "loss": 0.05732997, "step": 6940 }, { "epoch": 13.882, "grad_norm": 1.627537488937378, "learning_rate": 2e-05, "loss": 0.05065906, "step": 6941 }, { "epoch": 13.884, "grad_norm": 0.9038280844688416, "learning_rate": 2e-05, "loss": 0.03653199, "step": 6942 }, { "epoch": 13.886, "grad_norm": 2.546828508377075, "learning_rate": 2e-05, "loss": 0.04579455, "step": 6943 }, { "epoch": 13.888, "grad_norm": 1.0984400510787964, "learning_rate": 2e-05, "loss": 0.032439, "step": 6944 }, { "epoch": 13.89, "grad_norm": 1.4772499799728394, "learning_rate": 2e-05, "loss": 0.04808973, "step": 6945 }, { "epoch": 13.892, "grad_norm": 1.352643370628357, "learning_rate": 2e-05, "loss": 0.03147309, "step": 6946 }, { "epoch": 13.894, "grad_norm": 1.0693271160125732, "learning_rate": 2e-05, "loss": 0.05313559, "step": 6947 }, { "epoch": 13.896, "grad_norm": 1.8942362070083618, "learning_rate": 2e-05, "loss": 0.04041258, "step": 6948 }, { "epoch": 13.898, "grad_norm": 1.7818812131881714, "learning_rate": 2e-05, "loss": 0.05491984, "step": 6949 }, { "epoch": 13.9, "grad_norm": 1.2428054809570312, "learning_rate": 2e-05, "loss": 0.03566056, "step": 6950 }, { "epoch": 13.902, "grad_norm": 1.200032114982605, "learning_rate": 2e-05, "loss": 0.04039552, "step": 6951 }, { "epoch": 13.904, "grad_norm": 2.027585983276367, "learning_rate": 2e-05, "loss": 0.03995071, "step": 6952 }, { "epoch": 13.906, "grad_norm": 1.765017032623291, "learning_rate": 2e-05, "loss": 0.04368681, "step": 6953 }, { "epoch": 13.908, "grad_norm": 1.485695481300354, "learning_rate": 2e-05, "loss": 0.04324844, "step": 6954 }, { "epoch": 13.91, "grad_norm": 1.273964285850525, "learning_rate": 2e-05, "loss": 0.04189643, "step": 6955 }, { "epoch": 13.912, "grad_norm": 1.303305745124817, "learning_rate": 2e-05, "loss": 0.0540895, "step": 6956 }, { "epoch": 13.914, "grad_norm": 1.149793267250061, "learning_rate": 2e-05, "loss": 0.04192396, "step": 6957 }, { "epoch": 13.916, "grad_norm": 1.1662238836288452, "learning_rate": 2e-05, "loss": 0.03651194, "step": 6958 }, { "epoch": 13.918, "grad_norm": 0.8947420716285706, "learning_rate": 2e-05, "loss": 0.03032719, "step": 6959 }, { "epoch": 13.92, "grad_norm": 1.9991943836212158, "learning_rate": 2e-05, "loss": 0.03783449, "step": 6960 }, { "epoch": 13.922, "grad_norm": 1.2026978731155396, "learning_rate": 2e-05, "loss": 0.03814108, "step": 6961 }, { "epoch": 13.924, "grad_norm": 1.4767955541610718, "learning_rate": 2e-05, "loss": 0.04172668, "step": 6962 }, { "epoch": 13.926, "grad_norm": 0.9975910782814026, "learning_rate": 2e-05, "loss": 0.03926569, "step": 6963 }, { "epoch": 13.928, "grad_norm": 1.3972898721694946, "learning_rate": 2e-05, "loss": 0.05067389, "step": 6964 }, { "epoch": 13.93, "grad_norm": 1.2450178861618042, "learning_rate": 2e-05, "loss": 0.04553521, "step": 6965 }, { "epoch": 13.932, "grad_norm": 1.4498207569122314, "learning_rate": 2e-05, "loss": 0.06393066, "step": 6966 }, { "epoch": 13.934, "grad_norm": 1.758060336112976, "learning_rate": 2e-05, "loss": 0.0448221, "step": 6967 }, { "epoch": 13.936, "grad_norm": 1.1852418184280396, "learning_rate": 2e-05, "loss": 0.04526831, "step": 6968 }, { "epoch": 13.938, "grad_norm": 2.2559356689453125, "learning_rate": 2e-05, "loss": 0.04437196, "step": 6969 }, { "epoch": 13.94, "grad_norm": 1.5420230627059937, "learning_rate": 2e-05, "loss": 0.0365928, "step": 6970 }, { "epoch": 13.942, "grad_norm": 1.0498487949371338, "learning_rate": 2e-05, "loss": 0.03509529, "step": 6971 }, { "epoch": 13.943999999999999, "grad_norm": 1.1230368614196777, "learning_rate": 2e-05, "loss": 0.03348869, "step": 6972 }, { "epoch": 13.946, "grad_norm": 1.4302164316177368, "learning_rate": 2e-05, "loss": 0.04673934, "step": 6973 }, { "epoch": 13.948, "grad_norm": 1.5155372619628906, "learning_rate": 2e-05, "loss": 0.04505777, "step": 6974 }, { "epoch": 13.95, "grad_norm": 0.9956756830215454, "learning_rate": 2e-05, "loss": 0.03277112, "step": 6975 }, { "epoch": 13.952, "grad_norm": 0.9545654058456421, "learning_rate": 2e-05, "loss": 0.0381178, "step": 6976 }, { "epoch": 13.954, "grad_norm": 1.8389108180999756, "learning_rate": 2e-05, "loss": 0.04874717, "step": 6977 }, { "epoch": 13.956, "grad_norm": 1.2004295587539673, "learning_rate": 2e-05, "loss": 0.03742421, "step": 6978 }, { "epoch": 13.958, "grad_norm": 2.7141454219818115, "learning_rate": 2e-05, "loss": 0.07491717, "step": 6979 }, { "epoch": 13.96, "grad_norm": 1.7227081060409546, "learning_rate": 2e-05, "loss": 0.03458557, "step": 6980 }, { "epoch": 13.962, "grad_norm": 0.9264144897460938, "learning_rate": 2e-05, "loss": 0.02987941, "step": 6981 }, { "epoch": 13.964, "grad_norm": 2.0929763317108154, "learning_rate": 2e-05, "loss": 0.06620575, "step": 6982 }, { "epoch": 13.966, "grad_norm": 1.4285316467285156, "learning_rate": 2e-05, "loss": 0.03368532, "step": 6983 }, { "epoch": 13.968, "grad_norm": 0.9414852857589722, "learning_rate": 2e-05, "loss": 0.03221244, "step": 6984 }, { "epoch": 13.97, "grad_norm": 0.9055988788604736, "learning_rate": 2e-05, "loss": 0.02919992, "step": 6985 }, { "epoch": 13.972, "grad_norm": 1.1984152793884277, "learning_rate": 2e-05, "loss": 0.04340597, "step": 6986 }, { "epoch": 13.974, "grad_norm": 1.0239797830581665, "learning_rate": 2e-05, "loss": 0.03583422, "step": 6987 }, { "epoch": 13.975999999999999, "grad_norm": 1.1550936698913574, "learning_rate": 2e-05, "loss": 0.04548385, "step": 6988 }, { "epoch": 13.978, "grad_norm": 1.5664997100830078, "learning_rate": 2e-05, "loss": 0.04664285, "step": 6989 }, { "epoch": 13.98, "grad_norm": 1.2001768350601196, "learning_rate": 2e-05, "loss": 0.04582148, "step": 6990 }, { "epoch": 13.982, "grad_norm": 2.012150287628174, "learning_rate": 2e-05, "loss": 0.03806469, "step": 6991 }, { "epoch": 13.984, "grad_norm": 1.069664478302002, "learning_rate": 2e-05, "loss": 0.04124177, "step": 6992 }, { "epoch": 13.986, "grad_norm": 1.3372480869293213, "learning_rate": 2e-05, "loss": 0.03079654, "step": 6993 }, { "epoch": 13.988, "grad_norm": 1.312620759010315, "learning_rate": 2e-05, "loss": 0.03986132, "step": 6994 }, { "epoch": 13.99, "grad_norm": 1.1589443683624268, "learning_rate": 2e-05, "loss": 0.03438497, "step": 6995 }, { "epoch": 13.992, "grad_norm": 1.6298073530197144, "learning_rate": 2e-05, "loss": 0.04208472, "step": 6996 }, { "epoch": 13.994, "grad_norm": 1.4981279373168945, "learning_rate": 2e-05, "loss": 0.03887921, "step": 6997 }, { "epoch": 13.996, "grad_norm": 0.9304689764976501, "learning_rate": 2e-05, "loss": 0.0343471, "step": 6998 }, { "epoch": 13.998, "grad_norm": 1.2929764986038208, "learning_rate": 2e-05, "loss": 0.04664343, "step": 6999 }, { "epoch": 14.0, "grad_norm": 1.127898097038269, "learning_rate": 2e-05, "loss": 0.0355192, "step": 7000 }, { "epoch": 14.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9540918163672655, "Equal_1": 0.992, "Equal_2": 0.9640718562874252, "Equal_3": 0.8882235528942116, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9880239520958084, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.966, "Perpendicular_1": 0.994, "Perpendicular_2": 0.972, "Perpendicular_3": 0.7745490981963928, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9852000000000001, "PointLiesOnLine_1": 0.9879759519038076, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9620758483033932 }, "eval_runtime": 228.2951, "eval_samples_per_second": 45.993, "eval_steps_per_second": 0.92, "step": 7000 }, { "epoch": 14.002, "grad_norm": 1.543593168258667, "learning_rate": 2e-05, "loss": 0.04972406, "step": 7001 }, { "epoch": 14.004, "grad_norm": 1.055652379989624, "learning_rate": 2e-05, "loss": 0.04080581, "step": 7002 }, { "epoch": 14.006, "grad_norm": 1.5364817380905151, "learning_rate": 2e-05, "loss": 0.03954908, "step": 7003 }, { "epoch": 14.008, "grad_norm": 1.0673036575317383, "learning_rate": 2e-05, "loss": 0.03433, "step": 7004 }, { "epoch": 14.01, "grad_norm": 4.186787128448486, "learning_rate": 2e-05, "loss": 0.08991122, "step": 7005 }, { "epoch": 14.012, "grad_norm": 1.572356104850769, "learning_rate": 2e-05, "loss": 0.04192705, "step": 7006 }, { "epoch": 14.014, "grad_norm": 1.8416147232055664, "learning_rate": 2e-05, "loss": 0.03756042, "step": 7007 }, { "epoch": 14.016, "grad_norm": 1.752917766571045, "learning_rate": 2e-05, "loss": 0.03856428, "step": 7008 }, { "epoch": 14.018, "grad_norm": 1.2398486137390137, "learning_rate": 2e-05, "loss": 0.03733564, "step": 7009 }, { "epoch": 14.02, "grad_norm": 1.1829638481140137, "learning_rate": 2e-05, "loss": 0.0440739, "step": 7010 }, { "epoch": 14.022, "grad_norm": 1.036558747291565, "learning_rate": 2e-05, "loss": 0.03251273, "step": 7011 }, { "epoch": 14.024, "grad_norm": 1.1435189247131348, "learning_rate": 2e-05, "loss": 0.03176043, "step": 7012 }, { "epoch": 14.026, "grad_norm": 1.421624779701233, "learning_rate": 2e-05, "loss": 0.03984127, "step": 7013 }, { "epoch": 14.028, "grad_norm": 1.1825897693634033, "learning_rate": 2e-05, "loss": 0.03840197, "step": 7014 }, { "epoch": 14.03, "grad_norm": 1.4395607709884644, "learning_rate": 2e-05, "loss": 0.04054178, "step": 7015 }, { "epoch": 14.032, "grad_norm": 3.844397783279419, "learning_rate": 2e-05, "loss": 0.04830306, "step": 7016 }, { "epoch": 14.034, "grad_norm": 1.3389534950256348, "learning_rate": 2e-05, "loss": 0.04535104, "step": 7017 }, { "epoch": 14.036, "grad_norm": 1.1437925100326538, "learning_rate": 2e-05, "loss": 0.03618667, "step": 7018 }, { "epoch": 14.038, "grad_norm": 1.6122055053710938, "learning_rate": 2e-05, "loss": 0.03232446, "step": 7019 }, { "epoch": 14.04, "grad_norm": 1.8328319787979126, "learning_rate": 2e-05, "loss": 0.04495162, "step": 7020 }, { "epoch": 14.042, "grad_norm": 1.0077152252197266, "learning_rate": 2e-05, "loss": 0.03165629, "step": 7021 }, { "epoch": 14.044, "grad_norm": 1.7416986227035522, "learning_rate": 2e-05, "loss": 0.04725628, "step": 7022 }, { "epoch": 14.046, "grad_norm": 1.1496182680130005, "learning_rate": 2e-05, "loss": 0.03489914, "step": 7023 }, { "epoch": 14.048, "grad_norm": 1.266981840133667, "learning_rate": 2e-05, "loss": 0.04201283, "step": 7024 }, { "epoch": 14.05, "grad_norm": 1.8616235256195068, "learning_rate": 2e-05, "loss": 0.04852257, "step": 7025 }, { "epoch": 14.052, "grad_norm": 1.630226731300354, "learning_rate": 2e-05, "loss": 0.0358972, "step": 7026 }, { "epoch": 14.054, "grad_norm": 1.3865302801132202, "learning_rate": 2e-05, "loss": 0.04059657, "step": 7027 }, { "epoch": 14.056, "grad_norm": 1.3287951946258545, "learning_rate": 2e-05, "loss": 0.04250805, "step": 7028 }, { "epoch": 14.058, "grad_norm": 1.1187971830368042, "learning_rate": 2e-05, "loss": 0.04474328, "step": 7029 }, { "epoch": 14.06, "grad_norm": 1.2386623620986938, "learning_rate": 2e-05, "loss": 0.05381689, "step": 7030 }, { "epoch": 14.062, "grad_norm": 1.7847754955291748, "learning_rate": 2e-05, "loss": 0.04896498, "step": 7031 }, { "epoch": 14.064, "grad_norm": 1.465959072113037, "learning_rate": 2e-05, "loss": 0.05326352, "step": 7032 }, { "epoch": 14.066, "grad_norm": 1.937292456626892, "learning_rate": 2e-05, "loss": 0.02535335, "step": 7033 }, { "epoch": 14.068, "grad_norm": 0.9535014033317566, "learning_rate": 2e-05, "loss": 0.03444283, "step": 7034 }, { "epoch": 14.07, "grad_norm": 0.9395742416381836, "learning_rate": 2e-05, "loss": 0.02949386, "step": 7035 }, { "epoch": 14.072, "grad_norm": 1.4321669340133667, "learning_rate": 2e-05, "loss": 0.03432464, "step": 7036 }, { "epoch": 14.074, "grad_norm": 1.4082679748535156, "learning_rate": 2e-05, "loss": 0.07055074, "step": 7037 }, { "epoch": 14.076, "grad_norm": 1.1814398765563965, "learning_rate": 2e-05, "loss": 0.05004214, "step": 7038 }, { "epoch": 14.078, "grad_norm": 1.2365251779556274, "learning_rate": 2e-05, "loss": 0.05067028, "step": 7039 }, { "epoch": 14.08, "grad_norm": 1.3151178359985352, "learning_rate": 2e-05, "loss": 0.0299421, "step": 7040 }, { "epoch": 14.082, "grad_norm": 1.1801363229751587, "learning_rate": 2e-05, "loss": 0.05045368, "step": 7041 }, { "epoch": 14.084, "grad_norm": 2.098558187484741, "learning_rate": 2e-05, "loss": 0.05283017, "step": 7042 }, { "epoch": 14.086, "grad_norm": 1.2813454866409302, "learning_rate": 2e-05, "loss": 0.04555622, "step": 7043 }, { "epoch": 14.088, "grad_norm": 1.2556875944137573, "learning_rate": 2e-05, "loss": 0.03392221, "step": 7044 }, { "epoch": 14.09, "grad_norm": 1.3572802543640137, "learning_rate": 2e-05, "loss": 0.03664856, "step": 7045 }, { "epoch": 14.092, "grad_norm": 3.6943295001983643, "learning_rate": 2e-05, "loss": 0.05953811, "step": 7046 }, { "epoch": 14.094, "grad_norm": 1.342200517654419, "learning_rate": 2e-05, "loss": 0.04363202, "step": 7047 }, { "epoch": 14.096, "grad_norm": 1.4195085763931274, "learning_rate": 2e-05, "loss": 0.05334013, "step": 7048 }, { "epoch": 14.098, "grad_norm": 1.4023659229278564, "learning_rate": 2e-05, "loss": 0.04098326, "step": 7049 }, { "epoch": 14.1, "grad_norm": 1.6801540851593018, "learning_rate": 2e-05, "loss": 0.03673111, "step": 7050 }, { "epoch": 14.102, "grad_norm": 1.2087290287017822, "learning_rate": 2e-05, "loss": 0.03171805, "step": 7051 }, { "epoch": 14.104, "grad_norm": 0.9094690680503845, "learning_rate": 2e-05, "loss": 0.02895694, "step": 7052 }, { "epoch": 14.106, "grad_norm": 1.1449291706085205, "learning_rate": 2e-05, "loss": 0.03981555, "step": 7053 }, { "epoch": 14.108, "grad_norm": 1.2396880388259888, "learning_rate": 2e-05, "loss": 0.03906148, "step": 7054 }, { "epoch": 14.11, "grad_norm": 0.9114099740982056, "learning_rate": 2e-05, "loss": 0.02611049, "step": 7055 }, { "epoch": 14.112, "grad_norm": 1.632718801498413, "learning_rate": 2e-05, "loss": 0.06271822, "step": 7056 }, { "epoch": 14.114, "grad_norm": 1.2491681575775146, "learning_rate": 2e-05, "loss": 0.04161642, "step": 7057 }, { "epoch": 14.116, "grad_norm": 1.6110799312591553, "learning_rate": 2e-05, "loss": 0.05063514, "step": 7058 }, { "epoch": 14.118, "grad_norm": 1.0531010627746582, "learning_rate": 2e-05, "loss": 0.03737075, "step": 7059 }, { "epoch": 14.12, "grad_norm": 1.5026668310165405, "learning_rate": 2e-05, "loss": 0.04802319, "step": 7060 }, { "epoch": 14.122, "grad_norm": 1.7665272951126099, "learning_rate": 2e-05, "loss": 0.04682126, "step": 7061 }, { "epoch": 14.124, "grad_norm": 1.150790810585022, "learning_rate": 2e-05, "loss": 0.03327106, "step": 7062 }, { "epoch": 14.126, "grad_norm": 1.171769142150879, "learning_rate": 2e-05, "loss": 0.03715512, "step": 7063 }, { "epoch": 14.128, "grad_norm": 2.134938955307007, "learning_rate": 2e-05, "loss": 0.03780596, "step": 7064 }, { "epoch": 14.13, "grad_norm": 1.1660887002944946, "learning_rate": 2e-05, "loss": 0.03346301, "step": 7065 }, { "epoch": 14.132, "grad_norm": 1.3475974798202515, "learning_rate": 2e-05, "loss": 0.04032385, "step": 7066 }, { "epoch": 14.134, "grad_norm": 0.9459397792816162, "learning_rate": 2e-05, "loss": 0.03087121, "step": 7067 }, { "epoch": 14.136, "grad_norm": 1.0326529741287231, "learning_rate": 2e-05, "loss": 0.03087562, "step": 7068 }, { "epoch": 14.138, "grad_norm": 2.211622476577759, "learning_rate": 2e-05, "loss": 0.0475852, "step": 7069 }, { "epoch": 14.14, "grad_norm": 1.0933854579925537, "learning_rate": 2e-05, "loss": 0.04264349, "step": 7070 }, { "epoch": 14.142, "grad_norm": 1.454256296157837, "learning_rate": 2e-05, "loss": 0.03477204, "step": 7071 }, { "epoch": 14.144, "grad_norm": 0.9197424054145813, "learning_rate": 2e-05, "loss": 0.03786702, "step": 7072 }, { "epoch": 14.146, "grad_norm": 1.2124618291854858, "learning_rate": 2e-05, "loss": 0.04156711, "step": 7073 }, { "epoch": 14.148, "grad_norm": 1.3121018409729004, "learning_rate": 2e-05, "loss": 0.04884036, "step": 7074 }, { "epoch": 14.15, "grad_norm": 2.1867623329162598, "learning_rate": 2e-05, "loss": 0.06405979, "step": 7075 }, { "epoch": 14.152, "grad_norm": 1.7804951667785645, "learning_rate": 2e-05, "loss": 0.04001268, "step": 7076 }, { "epoch": 14.154, "grad_norm": 0.9397218227386475, "learning_rate": 2e-05, "loss": 0.02610714, "step": 7077 }, { "epoch": 14.156, "grad_norm": 1.3507614135742188, "learning_rate": 2e-05, "loss": 0.03255537, "step": 7078 }, { "epoch": 14.158, "grad_norm": 1.2974509000778198, "learning_rate": 2e-05, "loss": 0.04950687, "step": 7079 }, { "epoch": 14.16, "grad_norm": 1.1836458444595337, "learning_rate": 2e-05, "loss": 0.04707564, "step": 7080 }, { "epoch": 14.162, "grad_norm": 1.502200722694397, "learning_rate": 2e-05, "loss": 0.04767866, "step": 7081 }, { "epoch": 14.164, "grad_norm": 1.0838667154312134, "learning_rate": 2e-05, "loss": 0.03349291, "step": 7082 }, { "epoch": 14.166, "grad_norm": 1.3062894344329834, "learning_rate": 2e-05, "loss": 0.0426055, "step": 7083 }, { "epoch": 14.168, "grad_norm": 6.699119567871094, "learning_rate": 2e-05, "loss": 0.0433975, "step": 7084 }, { "epoch": 14.17, "grad_norm": 1.8099277019500732, "learning_rate": 2e-05, "loss": 0.04317348, "step": 7085 }, { "epoch": 14.172, "grad_norm": 1.1604881286621094, "learning_rate": 2e-05, "loss": 0.04063978, "step": 7086 }, { "epoch": 14.174, "grad_norm": 3.8290858268737793, "learning_rate": 2e-05, "loss": 0.04068644, "step": 7087 }, { "epoch": 14.176, "grad_norm": 1.2605173587799072, "learning_rate": 2e-05, "loss": 0.03881906, "step": 7088 }, { "epoch": 14.178, "grad_norm": 1.4353046417236328, "learning_rate": 2e-05, "loss": 0.0350342, "step": 7089 }, { "epoch": 14.18, "grad_norm": 1.0521059036254883, "learning_rate": 2e-05, "loss": 0.03336118, "step": 7090 }, { "epoch": 14.182, "grad_norm": 1.3121209144592285, "learning_rate": 2e-05, "loss": 0.05262747, "step": 7091 }, { "epoch": 14.184, "grad_norm": 2.578676462173462, "learning_rate": 2e-05, "loss": 0.04243636, "step": 7092 }, { "epoch": 14.186, "grad_norm": 1.215235948562622, "learning_rate": 2e-05, "loss": 0.04332593, "step": 7093 }, { "epoch": 14.188, "grad_norm": 1.1321561336517334, "learning_rate": 2e-05, "loss": 0.02380128, "step": 7094 }, { "epoch": 14.19, "grad_norm": 1.042365550994873, "learning_rate": 2e-05, "loss": 0.03248084, "step": 7095 }, { "epoch": 14.192, "grad_norm": 0.8217297792434692, "learning_rate": 2e-05, "loss": 0.02964148, "step": 7096 }, { "epoch": 14.194, "grad_norm": 1.1682958602905273, "learning_rate": 2e-05, "loss": 0.02249066, "step": 7097 }, { "epoch": 14.196, "grad_norm": 0.9327736496925354, "learning_rate": 2e-05, "loss": 0.02506478, "step": 7098 }, { "epoch": 14.198, "grad_norm": 1.945264220237732, "learning_rate": 2e-05, "loss": 0.05387781, "step": 7099 }, { "epoch": 14.2, "grad_norm": 1.5276589393615723, "learning_rate": 2e-05, "loss": 0.0554502, "step": 7100 }, { "epoch": 14.202, "grad_norm": 0.9222687482833862, "learning_rate": 2e-05, "loss": 0.03282917, "step": 7101 }, { "epoch": 14.204, "grad_norm": 2.03171706199646, "learning_rate": 2e-05, "loss": 0.0489421, "step": 7102 }, { "epoch": 14.206, "grad_norm": 1.318291187286377, "learning_rate": 2e-05, "loss": 0.04106585, "step": 7103 }, { "epoch": 14.208, "grad_norm": 1.6157605648040771, "learning_rate": 2e-05, "loss": 0.04748903, "step": 7104 }, { "epoch": 14.21, "grad_norm": 1.189958095550537, "learning_rate": 2e-05, "loss": 0.04946387, "step": 7105 }, { "epoch": 14.212, "grad_norm": 1.6264127492904663, "learning_rate": 2e-05, "loss": 0.04697574, "step": 7106 }, { "epoch": 14.214, "grad_norm": 1.4696309566497803, "learning_rate": 2e-05, "loss": 0.03707322, "step": 7107 }, { "epoch": 14.216, "grad_norm": 2.527710199356079, "learning_rate": 2e-05, "loss": 0.05096244, "step": 7108 }, { "epoch": 14.218, "grad_norm": 1.6485509872436523, "learning_rate": 2e-05, "loss": 0.04996493, "step": 7109 }, { "epoch": 14.22, "grad_norm": 1.7276253700256348, "learning_rate": 2e-05, "loss": 0.04866678, "step": 7110 }, { "epoch": 14.222, "grad_norm": 1.0888484716415405, "learning_rate": 2e-05, "loss": 0.03676262, "step": 7111 }, { "epoch": 14.224, "grad_norm": 1.8122987747192383, "learning_rate": 2e-05, "loss": 0.05010951, "step": 7112 }, { "epoch": 14.226, "grad_norm": 1.1989349126815796, "learning_rate": 2e-05, "loss": 0.03650653, "step": 7113 }, { "epoch": 14.228, "grad_norm": 1.3767701387405396, "learning_rate": 2e-05, "loss": 0.03126729, "step": 7114 }, { "epoch": 14.23, "grad_norm": 0.9623883366584778, "learning_rate": 2e-05, "loss": 0.03258745, "step": 7115 }, { "epoch": 14.232, "grad_norm": 1.0491893291473389, "learning_rate": 2e-05, "loss": 0.03847987, "step": 7116 }, { "epoch": 14.234, "grad_norm": 1.050978183746338, "learning_rate": 2e-05, "loss": 0.02912135, "step": 7117 }, { "epoch": 14.236, "grad_norm": 0.9573562145233154, "learning_rate": 2e-05, "loss": 0.03787812, "step": 7118 }, { "epoch": 14.238, "grad_norm": 1.0352352857589722, "learning_rate": 2e-05, "loss": 0.03126932, "step": 7119 }, { "epoch": 14.24, "grad_norm": 1.5831891298294067, "learning_rate": 2e-05, "loss": 0.03951206, "step": 7120 }, { "epoch": 14.242, "grad_norm": 1.2232164144515991, "learning_rate": 2e-05, "loss": 0.03416546, "step": 7121 }, { "epoch": 14.244, "grad_norm": 1.9459905624389648, "learning_rate": 2e-05, "loss": 0.04290093, "step": 7122 }, { "epoch": 14.246, "grad_norm": 1.1616413593292236, "learning_rate": 2e-05, "loss": 0.03647928, "step": 7123 }, { "epoch": 14.248, "grad_norm": 1.3482621908187866, "learning_rate": 2e-05, "loss": 0.03903915, "step": 7124 }, { "epoch": 14.25, "grad_norm": 2.1263644695281982, "learning_rate": 2e-05, "loss": 0.07867762, "step": 7125 }, { "epoch": 14.252, "grad_norm": 1.00839364528656, "learning_rate": 2e-05, "loss": 0.03489882, "step": 7126 }, { "epoch": 14.254, "grad_norm": 1.8800108432769775, "learning_rate": 2e-05, "loss": 0.05812679, "step": 7127 }, { "epoch": 14.256, "grad_norm": 1.0577055215835571, "learning_rate": 2e-05, "loss": 0.03651185, "step": 7128 }, { "epoch": 14.258, "grad_norm": 1.518837809562683, "learning_rate": 2e-05, "loss": 0.03690169, "step": 7129 }, { "epoch": 14.26, "grad_norm": 1.151417851448059, "learning_rate": 2e-05, "loss": 0.03451779, "step": 7130 }, { "epoch": 14.262, "grad_norm": 0.7717501521110535, "learning_rate": 2e-05, "loss": 0.0261309, "step": 7131 }, { "epoch": 14.264, "grad_norm": 2.41092848777771, "learning_rate": 2e-05, "loss": 0.05972179, "step": 7132 }, { "epoch": 14.266, "grad_norm": 1.216785192489624, "learning_rate": 2e-05, "loss": 0.03552965, "step": 7133 }, { "epoch": 14.268, "grad_norm": 1.292988657951355, "learning_rate": 2e-05, "loss": 0.03550374, "step": 7134 }, { "epoch": 14.27, "grad_norm": 1.5447505712509155, "learning_rate": 2e-05, "loss": 0.04028059, "step": 7135 }, { "epoch": 14.272, "grad_norm": 1.8767216205596924, "learning_rate": 2e-05, "loss": 0.04290246, "step": 7136 }, { "epoch": 14.274000000000001, "grad_norm": 3.0507142543792725, "learning_rate": 2e-05, "loss": 0.06687137, "step": 7137 }, { "epoch": 14.276, "grad_norm": 1.0899710655212402, "learning_rate": 2e-05, "loss": 0.03691029, "step": 7138 }, { "epoch": 14.278, "grad_norm": 1.5203368663787842, "learning_rate": 2e-05, "loss": 0.04276366, "step": 7139 }, { "epoch": 14.28, "grad_norm": 9.928690910339355, "learning_rate": 2e-05, "loss": 0.0420375, "step": 7140 }, { "epoch": 14.282, "grad_norm": 1.0777556896209717, "learning_rate": 2e-05, "loss": 0.03737037, "step": 7141 }, { "epoch": 14.284, "grad_norm": 1.3069345951080322, "learning_rate": 2e-05, "loss": 0.04161247, "step": 7142 }, { "epoch": 14.286, "grad_norm": 0.9778799414634705, "learning_rate": 2e-05, "loss": 0.02881961, "step": 7143 }, { "epoch": 14.288, "grad_norm": 1.4050586223602295, "learning_rate": 2e-05, "loss": 0.04647424, "step": 7144 }, { "epoch": 14.29, "grad_norm": 1.1831780672073364, "learning_rate": 2e-05, "loss": 0.03482141, "step": 7145 }, { "epoch": 14.292, "grad_norm": 1.3272236585617065, "learning_rate": 2e-05, "loss": 0.04298367, "step": 7146 }, { "epoch": 14.294, "grad_norm": 1.426351547241211, "learning_rate": 2e-05, "loss": 0.04361892, "step": 7147 }, { "epoch": 14.296, "grad_norm": 2.0627951622009277, "learning_rate": 2e-05, "loss": 0.03137552, "step": 7148 }, { "epoch": 14.298, "grad_norm": 1.3358579874038696, "learning_rate": 2e-05, "loss": 0.04111365, "step": 7149 }, { "epoch": 14.3, "grad_norm": 1.5250239372253418, "learning_rate": 2e-05, "loss": 0.04927598, "step": 7150 }, { "epoch": 14.302, "grad_norm": 1.1973413228988647, "learning_rate": 2e-05, "loss": 0.0331023, "step": 7151 }, { "epoch": 14.304, "grad_norm": 1.0492922067642212, "learning_rate": 2e-05, "loss": 0.03866861, "step": 7152 }, { "epoch": 14.306, "grad_norm": 1.6442021131515503, "learning_rate": 2e-05, "loss": 0.04364835, "step": 7153 }, { "epoch": 14.308, "grad_norm": 1.534789800643921, "learning_rate": 2e-05, "loss": 0.04455413, "step": 7154 }, { "epoch": 14.31, "grad_norm": 1.0567904710769653, "learning_rate": 2e-05, "loss": 0.02571731, "step": 7155 }, { "epoch": 14.312, "grad_norm": 2.3799798488616943, "learning_rate": 2e-05, "loss": 0.05132381, "step": 7156 }, { "epoch": 14.314, "grad_norm": 1.33385169506073, "learning_rate": 2e-05, "loss": 0.04139435, "step": 7157 }, { "epoch": 14.316, "grad_norm": 1.9463222026824951, "learning_rate": 2e-05, "loss": 0.04074475, "step": 7158 }, { "epoch": 14.318, "grad_norm": 1.4470829963684082, "learning_rate": 2e-05, "loss": 0.03784862, "step": 7159 }, { "epoch": 14.32, "grad_norm": 0.891909122467041, "learning_rate": 2e-05, "loss": 0.02717247, "step": 7160 }, { "epoch": 14.322, "grad_norm": 1.0523635149002075, "learning_rate": 2e-05, "loss": 0.0322356, "step": 7161 }, { "epoch": 14.324, "grad_norm": 1.8766148090362549, "learning_rate": 2e-05, "loss": 0.0335541, "step": 7162 }, { "epoch": 14.326, "grad_norm": 1.47050940990448, "learning_rate": 2e-05, "loss": 0.03233317, "step": 7163 }, { "epoch": 14.328, "grad_norm": 3.394540309906006, "learning_rate": 2e-05, "loss": 0.04790577, "step": 7164 }, { "epoch": 14.33, "grad_norm": 1.6004360914230347, "learning_rate": 2e-05, "loss": 0.04039369, "step": 7165 }, { "epoch": 14.332, "grad_norm": 1.3633911609649658, "learning_rate": 2e-05, "loss": 0.04203112, "step": 7166 }, { "epoch": 14.334, "grad_norm": 0.8487565517425537, "learning_rate": 2e-05, "loss": 0.02747994, "step": 7167 }, { "epoch": 14.336, "grad_norm": 0.9982837438583374, "learning_rate": 2e-05, "loss": 0.03416155, "step": 7168 }, { "epoch": 14.338, "grad_norm": 1.984885811805725, "learning_rate": 2e-05, "loss": 0.03370845, "step": 7169 }, { "epoch": 14.34, "grad_norm": 2.287105083465576, "learning_rate": 2e-05, "loss": 0.05996915, "step": 7170 }, { "epoch": 14.342, "grad_norm": 1.8237513303756714, "learning_rate": 2e-05, "loss": 0.04041093, "step": 7171 }, { "epoch": 14.344, "grad_norm": 2.277205228805542, "learning_rate": 2e-05, "loss": 0.05937389, "step": 7172 }, { "epoch": 14.346, "grad_norm": 1.1008055210113525, "learning_rate": 2e-05, "loss": 0.04075878, "step": 7173 }, { "epoch": 14.348, "grad_norm": 1.2196390628814697, "learning_rate": 2e-05, "loss": 0.04837185, "step": 7174 }, { "epoch": 14.35, "grad_norm": 3.752272367477417, "learning_rate": 2e-05, "loss": 0.05566734, "step": 7175 }, { "epoch": 14.352, "grad_norm": 1.190701961517334, "learning_rate": 2e-05, "loss": 0.03635371, "step": 7176 }, { "epoch": 14.354, "grad_norm": 2.1152825355529785, "learning_rate": 2e-05, "loss": 0.04274201, "step": 7177 }, { "epoch": 14.356, "grad_norm": 1.727535367012024, "learning_rate": 2e-05, "loss": 0.03803449, "step": 7178 }, { "epoch": 14.358, "grad_norm": 1.7160369157791138, "learning_rate": 2e-05, "loss": 0.06005615, "step": 7179 }, { "epoch": 14.36, "grad_norm": 2.528369903564453, "learning_rate": 2e-05, "loss": 0.05192675, "step": 7180 }, { "epoch": 14.362, "grad_norm": 3.3644161224365234, "learning_rate": 2e-05, "loss": 0.03818004, "step": 7181 }, { "epoch": 14.364, "grad_norm": 0.9452011585235596, "learning_rate": 2e-05, "loss": 0.03166102, "step": 7182 }, { "epoch": 14.366, "grad_norm": 1.442317247390747, "learning_rate": 2e-05, "loss": 0.0474031, "step": 7183 }, { "epoch": 14.368, "grad_norm": 1.4194622039794922, "learning_rate": 2e-05, "loss": 0.0299148, "step": 7184 }, { "epoch": 14.37, "grad_norm": 1.9752638339996338, "learning_rate": 2e-05, "loss": 0.05806788, "step": 7185 }, { "epoch": 14.372, "grad_norm": 1.2850720882415771, "learning_rate": 2e-05, "loss": 0.05267624, "step": 7186 }, { "epoch": 14.374, "grad_norm": 1.331405758857727, "learning_rate": 2e-05, "loss": 0.04951733, "step": 7187 }, { "epoch": 14.376, "grad_norm": 1.620272159576416, "learning_rate": 2e-05, "loss": 0.05395202, "step": 7188 }, { "epoch": 14.378, "grad_norm": 1.2653518915176392, "learning_rate": 2e-05, "loss": 0.04429711, "step": 7189 }, { "epoch": 14.38, "grad_norm": 1.4390442371368408, "learning_rate": 2e-05, "loss": 0.05166383, "step": 7190 }, { "epoch": 14.382, "grad_norm": 0.8587131500244141, "learning_rate": 2e-05, "loss": 0.03113745, "step": 7191 }, { "epoch": 14.384, "grad_norm": 0.8975671529769897, "learning_rate": 2e-05, "loss": 0.03096909, "step": 7192 }, { "epoch": 14.386, "grad_norm": 1.5422664880752563, "learning_rate": 2e-05, "loss": 0.04836652, "step": 7193 }, { "epoch": 14.388, "grad_norm": 1.168247938156128, "learning_rate": 2e-05, "loss": 0.03516897, "step": 7194 }, { "epoch": 14.39, "grad_norm": 1.2773606777191162, "learning_rate": 2e-05, "loss": 0.03655051, "step": 7195 }, { "epoch": 14.392, "grad_norm": 0.9981243014335632, "learning_rate": 2e-05, "loss": 0.02787976, "step": 7196 }, { "epoch": 14.394, "grad_norm": 1.557073712348938, "learning_rate": 2e-05, "loss": 0.03118613, "step": 7197 }, { "epoch": 14.396, "grad_norm": 1.548323392868042, "learning_rate": 2e-05, "loss": 0.04525673, "step": 7198 }, { "epoch": 14.398, "grad_norm": 1.222340703010559, "learning_rate": 2e-05, "loss": 0.04091443, "step": 7199 }, { "epoch": 14.4, "grad_norm": 0.9894644618034363, "learning_rate": 2e-05, "loss": 0.03331787, "step": 7200 }, { "epoch": 14.402, "grad_norm": 0.983518123626709, "learning_rate": 2e-05, "loss": 0.02828628, "step": 7201 }, { "epoch": 14.404, "grad_norm": 0.9487534165382385, "learning_rate": 2e-05, "loss": 0.02842491, "step": 7202 }, { "epoch": 14.406, "grad_norm": 1.1806600093841553, "learning_rate": 2e-05, "loss": 0.03545385, "step": 7203 }, { "epoch": 14.408, "grad_norm": 1.087823510169983, "learning_rate": 2e-05, "loss": 0.03692382, "step": 7204 }, { "epoch": 14.41, "grad_norm": 2.036144495010376, "learning_rate": 2e-05, "loss": 0.04346631, "step": 7205 }, { "epoch": 14.412, "grad_norm": 1.683158040046692, "learning_rate": 2e-05, "loss": 0.03118555, "step": 7206 }, { "epoch": 14.414, "grad_norm": 1.6045148372650146, "learning_rate": 2e-05, "loss": 0.04160459, "step": 7207 }, { "epoch": 14.416, "grad_norm": 2.2519960403442383, "learning_rate": 2e-05, "loss": 0.04570108, "step": 7208 }, { "epoch": 14.418, "grad_norm": 2.1741933822631836, "learning_rate": 2e-05, "loss": 0.0513142, "step": 7209 }, { "epoch": 14.42, "grad_norm": 1.5185238122940063, "learning_rate": 2e-05, "loss": 0.04224061, "step": 7210 }, { "epoch": 14.422, "grad_norm": 2.100687265396118, "learning_rate": 2e-05, "loss": 0.05586538, "step": 7211 }, { "epoch": 14.424, "grad_norm": 1.3079783916473389, "learning_rate": 2e-05, "loss": 0.03828974, "step": 7212 }, { "epoch": 14.426, "grad_norm": 0.8442499041557312, "learning_rate": 2e-05, "loss": 0.03248565, "step": 7213 }, { "epoch": 14.428, "grad_norm": 1.8725861310958862, "learning_rate": 2e-05, "loss": 0.04926984, "step": 7214 }, { "epoch": 14.43, "grad_norm": 1.769330620765686, "learning_rate": 2e-05, "loss": 0.06175753, "step": 7215 }, { "epoch": 14.432, "grad_norm": 2.3744852542877197, "learning_rate": 2e-05, "loss": 0.05664329, "step": 7216 }, { "epoch": 14.434, "grad_norm": 1.1747453212738037, "learning_rate": 2e-05, "loss": 0.0338913, "step": 7217 }, { "epoch": 14.436, "grad_norm": 1.1039196252822876, "learning_rate": 2e-05, "loss": 0.03575258, "step": 7218 }, { "epoch": 14.438, "grad_norm": 1.7221014499664307, "learning_rate": 2e-05, "loss": 0.0631889, "step": 7219 }, { "epoch": 14.44, "grad_norm": 1.4758400917053223, "learning_rate": 2e-05, "loss": 0.0456157, "step": 7220 }, { "epoch": 14.442, "grad_norm": 0.9482483267784119, "learning_rate": 2e-05, "loss": 0.03973409, "step": 7221 }, { "epoch": 14.444, "grad_norm": 1.6295521259307861, "learning_rate": 2e-05, "loss": 0.04218559, "step": 7222 }, { "epoch": 14.446, "grad_norm": 2.036994457244873, "learning_rate": 2e-05, "loss": 0.06772701, "step": 7223 }, { "epoch": 14.448, "grad_norm": 1.4231206178665161, "learning_rate": 2e-05, "loss": 0.04165895, "step": 7224 }, { "epoch": 14.45, "grad_norm": 1.0268579721450806, "learning_rate": 2e-05, "loss": 0.03623311, "step": 7225 }, { "epoch": 14.452, "grad_norm": 0.8667217493057251, "learning_rate": 2e-05, "loss": 0.03388076, "step": 7226 }, { "epoch": 14.454, "grad_norm": 1.1748952865600586, "learning_rate": 2e-05, "loss": 0.04361609, "step": 7227 }, { "epoch": 14.456, "grad_norm": 1.0902115106582642, "learning_rate": 2e-05, "loss": 0.05192316, "step": 7228 }, { "epoch": 14.458, "grad_norm": 1.4908370971679688, "learning_rate": 2e-05, "loss": 0.04825752, "step": 7229 }, { "epoch": 14.46, "grad_norm": 2.283445358276367, "learning_rate": 2e-05, "loss": 0.04901062, "step": 7230 }, { "epoch": 14.462, "grad_norm": 0.884273886680603, "learning_rate": 2e-05, "loss": 0.02580253, "step": 7231 }, { "epoch": 14.464, "grad_norm": 1.4614207744598389, "learning_rate": 2e-05, "loss": 0.04194085, "step": 7232 }, { "epoch": 14.466, "grad_norm": 1.091875433921814, "learning_rate": 2e-05, "loss": 0.0427567, "step": 7233 }, { "epoch": 14.468, "grad_norm": 1.13577401638031, "learning_rate": 2e-05, "loss": 0.039945, "step": 7234 }, { "epoch": 14.47, "grad_norm": 1.1515393257141113, "learning_rate": 2e-05, "loss": 0.05007129, "step": 7235 }, { "epoch": 14.472, "grad_norm": 1.3759965896606445, "learning_rate": 2e-05, "loss": 0.03251833, "step": 7236 }, { "epoch": 14.474, "grad_norm": 1.1787422895431519, "learning_rate": 2e-05, "loss": 0.03462398, "step": 7237 }, { "epoch": 14.475999999999999, "grad_norm": 1.2582359313964844, "learning_rate": 2e-05, "loss": 0.03934406, "step": 7238 }, { "epoch": 14.478, "grad_norm": 1.2110345363616943, "learning_rate": 2e-05, "loss": 0.05096396, "step": 7239 }, { "epoch": 14.48, "grad_norm": 1.7258920669555664, "learning_rate": 2e-05, "loss": 0.04324333, "step": 7240 }, { "epoch": 14.482, "grad_norm": 1.1218295097351074, "learning_rate": 2e-05, "loss": 0.04241745, "step": 7241 }, { "epoch": 14.484, "grad_norm": 1.1062091588974, "learning_rate": 2e-05, "loss": 0.03462784, "step": 7242 }, { "epoch": 14.486, "grad_norm": 1.1936490535736084, "learning_rate": 2e-05, "loss": 0.03986615, "step": 7243 }, { "epoch": 14.488, "grad_norm": 0.8983294367790222, "learning_rate": 2e-05, "loss": 0.03195298, "step": 7244 }, { "epoch": 14.49, "grad_norm": 1.1619058847427368, "learning_rate": 2e-05, "loss": 0.03322784, "step": 7245 }, { "epoch": 14.492, "grad_norm": 2.645016670227051, "learning_rate": 2e-05, "loss": 0.05169255, "step": 7246 }, { "epoch": 14.494, "grad_norm": 0.9019205570220947, "learning_rate": 2e-05, "loss": 0.02769778, "step": 7247 }, { "epoch": 14.496, "grad_norm": 0.9778595566749573, "learning_rate": 2e-05, "loss": 0.0308298, "step": 7248 }, { "epoch": 14.498, "grad_norm": 0.9407392740249634, "learning_rate": 2e-05, "loss": 0.04517448, "step": 7249 }, { "epoch": 14.5, "grad_norm": 1.2029132843017578, "learning_rate": 2e-05, "loss": 0.04802193, "step": 7250 }, { "epoch": 14.502, "grad_norm": 1.8852992057800293, "learning_rate": 2e-05, "loss": 0.0467676, "step": 7251 }, { "epoch": 14.504, "grad_norm": 1.1111149787902832, "learning_rate": 2e-05, "loss": 0.04376265, "step": 7252 }, { "epoch": 14.506, "grad_norm": 0.9940442442893982, "learning_rate": 2e-05, "loss": 0.02668458, "step": 7253 }, { "epoch": 14.508, "grad_norm": 1.6125909090042114, "learning_rate": 2e-05, "loss": 0.04637981, "step": 7254 }, { "epoch": 14.51, "grad_norm": 0.8297140002250671, "learning_rate": 2e-05, "loss": 0.0253837, "step": 7255 }, { "epoch": 14.512, "grad_norm": 1.9338608980178833, "learning_rate": 2e-05, "loss": 0.03713609, "step": 7256 }, { "epoch": 14.514, "grad_norm": 1.046212911605835, "learning_rate": 2e-05, "loss": 0.05087235, "step": 7257 }, { "epoch": 14.516, "grad_norm": 0.882905900478363, "learning_rate": 2e-05, "loss": 0.02930818, "step": 7258 }, { "epoch": 14.518, "grad_norm": 2.1364095211029053, "learning_rate": 2e-05, "loss": 0.04524556, "step": 7259 }, { "epoch": 14.52, "grad_norm": 1.207232117652893, "learning_rate": 2e-05, "loss": 0.04068488, "step": 7260 }, { "epoch": 14.522, "grad_norm": 1.444748044013977, "learning_rate": 2e-05, "loss": 0.03791768, "step": 7261 }, { "epoch": 14.524000000000001, "grad_norm": 1.6502163410186768, "learning_rate": 2e-05, "loss": 0.04867134, "step": 7262 }, { "epoch": 14.526, "grad_norm": 1.8032082319259644, "learning_rate": 2e-05, "loss": 0.05229461, "step": 7263 }, { "epoch": 14.528, "grad_norm": 1.0371603965759277, "learning_rate": 2e-05, "loss": 0.03274223, "step": 7264 }, { "epoch": 14.53, "grad_norm": 0.9166057705879211, "learning_rate": 2e-05, "loss": 0.03647472, "step": 7265 }, { "epoch": 14.532, "grad_norm": 2.4131076335906982, "learning_rate": 2e-05, "loss": 0.04242657, "step": 7266 }, { "epoch": 14.534, "grad_norm": 1.417752981185913, "learning_rate": 2e-05, "loss": 0.04835052, "step": 7267 }, { "epoch": 14.536, "grad_norm": 1.1833804845809937, "learning_rate": 2e-05, "loss": 0.03013505, "step": 7268 }, { "epoch": 14.538, "grad_norm": 1.5938618183135986, "learning_rate": 2e-05, "loss": 0.05221473, "step": 7269 }, { "epoch": 14.54, "grad_norm": 1.3734673261642456, "learning_rate": 2e-05, "loss": 0.02999542, "step": 7270 }, { "epoch": 14.542, "grad_norm": 1.5847686529159546, "learning_rate": 2e-05, "loss": 0.05821497, "step": 7271 }, { "epoch": 14.544, "grad_norm": 1.3200124502182007, "learning_rate": 2e-05, "loss": 0.04123558, "step": 7272 }, { "epoch": 14.546, "grad_norm": 0.9225376844406128, "learning_rate": 2e-05, "loss": 0.0281404, "step": 7273 }, { "epoch": 14.548, "grad_norm": 1.598218560218811, "learning_rate": 2e-05, "loss": 0.04327469, "step": 7274 }, { "epoch": 14.55, "grad_norm": 1.1261836290359497, "learning_rate": 2e-05, "loss": 0.03641365, "step": 7275 }, { "epoch": 14.552, "grad_norm": 1.2988630533218384, "learning_rate": 2e-05, "loss": 0.04069004, "step": 7276 }, { "epoch": 14.554, "grad_norm": 0.8461461067199707, "learning_rate": 2e-05, "loss": 0.02741572, "step": 7277 }, { "epoch": 14.556000000000001, "grad_norm": 1.0672392845153809, "learning_rate": 2e-05, "loss": 0.03822524, "step": 7278 }, { "epoch": 14.558, "grad_norm": 1.0892995595932007, "learning_rate": 2e-05, "loss": 0.03488005, "step": 7279 }, { "epoch": 14.56, "grad_norm": 1.2225384712219238, "learning_rate": 2e-05, "loss": 0.0577505, "step": 7280 }, { "epoch": 14.562, "grad_norm": 1.3322092294692993, "learning_rate": 2e-05, "loss": 0.04341347, "step": 7281 }, { "epoch": 14.564, "grad_norm": 1.3358018398284912, "learning_rate": 2e-05, "loss": 0.04887218, "step": 7282 }, { "epoch": 14.566, "grad_norm": 0.9700892567634583, "learning_rate": 2e-05, "loss": 0.03305193, "step": 7283 }, { "epoch": 14.568, "grad_norm": 1.0093399286270142, "learning_rate": 2e-05, "loss": 0.02867941, "step": 7284 }, { "epoch": 14.57, "grad_norm": 1.5705113410949707, "learning_rate": 2e-05, "loss": 0.04736889, "step": 7285 }, { "epoch": 14.572, "grad_norm": 1.440657615661621, "learning_rate": 2e-05, "loss": 0.04822645, "step": 7286 }, { "epoch": 14.574, "grad_norm": 0.8910399079322815, "learning_rate": 2e-05, "loss": 0.03026424, "step": 7287 }, { "epoch": 14.576, "grad_norm": 1.823671579360962, "learning_rate": 2e-05, "loss": 0.03909463, "step": 7288 }, { "epoch": 14.578, "grad_norm": 1.1132062673568726, "learning_rate": 2e-05, "loss": 0.04150675, "step": 7289 }, { "epoch": 14.58, "grad_norm": 3.7259020805358887, "learning_rate": 2e-05, "loss": 0.05267645, "step": 7290 }, { "epoch": 14.582, "grad_norm": 1.3335038423538208, "learning_rate": 2e-05, "loss": 0.05576807, "step": 7291 }, { "epoch": 14.584, "grad_norm": 1.923017144203186, "learning_rate": 2e-05, "loss": 0.03881141, "step": 7292 }, { "epoch": 14.586, "grad_norm": 2.43312406539917, "learning_rate": 2e-05, "loss": 0.0413322, "step": 7293 }, { "epoch": 14.588, "grad_norm": 0.9937450289726257, "learning_rate": 2e-05, "loss": 0.03506201, "step": 7294 }, { "epoch": 14.59, "grad_norm": 1.3645753860473633, "learning_rate": 2e-05, "loss": 0.04094256, "step": 7295 }, { "epoch": 14.592, "grad_norm": 1.31569242477417, "learning_rate": 2e-05, "loss": 0.04713068, "step": 7296 }, { "epoch": 14.594, "grad_norm": 1.778739333152771, "learning_rate": 2e-05, "loss": 0.04807052, "step": 7297 }, { "epoch": 14.596, "grad_norm": 1.2723947763442993, "learning_rate": 2e-05, "loss": 0.02964616, "step": 7298 }, { "epoch": 14.598, "grad_norm": 1.331046462059021, "learning_rate": 2e-05, "loss": 0.03705756, "step": 7299 }, { "epoch": 14.6, "grad_norm": 1.5314394235610962, "learning_rate": 2e-05, "loss": 0.03695713, "step": 7300 }, { "epoch": 14.602, "grad_norm": 1.1638318300247192, "learning_rate": 2e-05, "loss": 0.04172804, "step": 7301 }, { "epoch": 14.604, "grad_norm": 1.3978321552276611, "learning_rate": 2e-05, "loss": 0.03663391, "step": 7302 }, { "epoch": 14.606, "grad_norm": 1.037785530090332, "learning_rate": 2e-05, "loss": 0.03992576, "step": 7303 }, { "epoch": 14.608, "grad_norm": 1.3401994705200195, "learning_rate": 2e-05, "loss": 0.03781562, "step": 7304 }, { "epoch": 14.61, "grad_norm": 1.0910634994506836, "learning_rate": 2e-05, "loss": 0.03079701, "step": 7305 }, { "epoch": 14.612, "grad_norm": 1.4607267379760742, "learning_rate": 2e-05, "loss": 0.03488635, "step": 7306 }, { "epoch": 14.614, "grad_norm": 1.1341897249221802, "learning_rate": 2e-05, "loss": 0.04212616, "step": 7307 }, { "epoch": 14.616, "grad_norm": 1.5016907453536987, "learning_rate": 2e-05, "loss": 0.03965358, "step": 7308 }, { "epoch": 14.618, "grad_norm": 1.958333134651184, "learning_rate": 2e-05, "loss": 0.0406105, "step": 7309 }, { "epoch": 14.62, "grad_norm": 1.0674952268600464, "learning_rate": 2e-05, "loss": 0.03282567, "step": 7310 }, { "epoch": 14.622, "grad_norm": 1.0220046043395996, "learning_rate": 2e-05, "loss": 0.04383621, "step": 7311 }, { "epoch": 14.624, "grad_norm": 1.9917553663253784, "learning_rate": 2e-05, "loss": 0.04973894, "step": 7312 }, { "epoch": 14.626, "grad_norm": 1.3297289609909058, "learning_rate": 2e-05, "loss": 0.03941993, "step": 7313 }, { "epoch": 14.628, "grad_norm": 1.3171072006225586, "learning_rate": 2e-05, "loss": 0.04885721, "step": 7314 }, { "epoch": 14.63, "grad_norm": 1.6031906604766846, "learning_rate": 2e-05, "loss": 0.05132467, "step": 7315 }, { "epoch": 14.632, "grad_norm": 1.5987430810928345, "learning_rate": 2e-05, "loss": 0.03855143, "step": 7316 }, { "epoch": 14.634, "grad_norm": 1.787304401397705, "learning_rate": 2e-05, "loss": 0.06035594, "step": 7317 }, { "epoch": 14.636, "grad_norm": 0.8832494020462036, "learning_rate": 2e-05, "loss": 0.02046807, "step": 7318 }, { "epoch": 14.638, "grad_norm": 1.2446694374084473, "learning_rate": 2e-05, "loss": 0.04552387, "step": 7319 }, { "epoch": 14.64, "grad_norm": 0.9243409633636475, "learning_rate": 2e-05, "loss": 0.02839561, "step": 7320 }, { "epoch": 14.642, "grad_norm": 1.0006197690963745, "learning_rate": 2e-05, "loss": 0.03719573, "step": 7321 }, { "epoch": 14.644, "grad_norm": 1.9680715799331665, "learning_rate": 2e-05, "loss": 0.03915306, "step": 7322 }, { "epoch": 14.646, "grad_norm": 1.1001665592193604, "learning_rate": 2e-05, "loss": 0.03548025, "step": 7323 }, { "epoch": 14.648, "grad_norm": 1.5095804929733276, "learning_rate": 2e-05, "loss": 0.0360375, "step": 7324 }, { "epoch": 14.65, "grad_norm": 1.229776382446289, "learning_rate": 2e-05, "loss": 0.04320935, "step": 7325 }, { "epoch": 14.652, "grad_norm": 1.255211591720581, "learning_rate": 2e-05, "loss": 0.03717191, "step": 7326 }, { "epoch": 14.654, "grad_norm": 3.002105474472046, "learning_rate": 2e-05, "loss": 0.05946426, "step": 7327 }, { "epoch": 14.656, "grad_norm": 1.5534347295761108, "learning_rate": 2e-05, "loss": 0.05363734, "step": 7328 }, { "epoch": 14.658, "grad_norm": 1.0002645254135132, "learning_rate": 2e-05, "loss": 0.03631153, "step": 7329 }, { "epoch": 14.66, "grad_norm": 1.5704560279846191, "learning_rate": 2e-05, "loss": 0.04248814, "step": 7330 }, { "epoch": 14.662, "grad_norm": 1.0919575691223145, "learning_rate": 2e-05, "loss": 0.03522342, "step": 7331 }, { "epoch": 14.664, "grad_norm": 1.1942470073699951, "learning_rate": 2e-05, "loss": 0.04927763, "step": 7332 }, { "epoch": 14.666, "grad_norm": 1.1156870126724243, "learning_rate": 2e-05, "loss": 0.03832618, "step": 7333 }, { "epoch": 14.668, "grad_norm": 1.6428534984588623, "learning_rate": 2e-05, "loss": 0.05436393, "step": 7334 }, { "epoch": 14.67, "grad_norm": 0.9411025047302246, "learning_rate": 2e-05, "loss": 0.03153289, "step": 7335 }, { "epoch": 14.672, "grad_norm": 0.7648595571517944, "learning_rate": 2e-05, "loss": 0.02806173, "step": 7336 }, { "epoch": 14.674, "grad_norm": 0.885816752910614, "learning_rate": 2e-05, "loss": 0.0296987, "step": 7337 }, { "epoch": 14.676, "grad_norm": 1.507460355758667, "learning_rate": 2e-05, "loss": 0.05498847, "step": 7338 }, { "epoch": 14.678, "grad_norm": 0.9338222742080688, "learning_rate": 2e-05, "loss": 0.03351939, "step": 7339 }, { "epoch": 14.68, "grad_norm": 1.9112564325332642, "learning_rate": 2e-05, "loss": 0.04286894, "step": 7340 }, { "epoch": 14.682, "grad_norm": 1.8392841815948486, "learning_rate": 2e-05, "loss": 0.0400997, "step": 7341 }, { "epoch": 14.684, "grad_norm": 2.1118195056915283, "learning_rate": 2e-05, "loss": 0.06308724, "step": 7342 }, { "epoch": 14.686, "grad_norm": 1.1515990495681763, "learning_rate": 2e-05, "loss": 0.04028989, "step": 7343 }, { "epoch": 14.688, "grad_norm": 0.8687466382980347, "learning_rate": 2e-05, "loss": 0.03164215, "step": 7344 }, { "epoch": 14.69, "grad_norm": 1.0362651348114014, "learning_rate": 2e-05, "loss": 0.03688889, "step": 7345 }, { "epoch": 14.692, "grad_norm": 1.1893278360366821, "learning_rate": 2e-05, "loss": 0.03205062, "step": 7346 }, { "epoch": 14.693999999999999, "grad_norm": 1.3111627101898193, "learning_rate": 2e-05, "loss": 0.0543824, "step": 7347 }, { "epoch": 14.696, "grad_norm": 0.9270426630973816, "learning_rate": 2e-05, "loss": 0.04037824, "step": 7348 }, { "epoch": 14.698, "grad_norm": 2.0446722507476807, "learning_rate": 2e-05, "loss": 0.05208341, "step": 7349 }, { "epoch": 14.7, "grad_norm": 1.3618665933609009, "learning_rate": 2e-05, "loss": 0.03360551, "step": 7350 }, { "epoch": 14.702, "grad_norm": 1.1025279760360718, "learning_rate": 2e-05, "loss": 0.04061274, "step": 7351 }, { "epoch": 14.704, "grad_norm": 0.9533839821815491, "learning_rate": 2e-05, "loss": 0.03007737, "step": 7352 }, { "epoch": 14.706, "grad_norm": 1.1920233964920044, "learning_rate": 2e-05, "loss": 0.04332636, "step": 7353 }, { "epoch": 14.708, "grad_norm": 1.3409409523010254, "learning_rate": 2e-05, "loss": 0.05367672, "step": 7354 }, { "epoch": 14.71, "grad_norm": 1.8209196329116821, "learning_rate": 2e-05, "loss": 0.04998752, "step": 7355 }, { "epoch": 14.712, "grad_norm": 1.0535396337509155, "learning_rate": 2e-05, "loss": 0.03294986, "step": 7356 }, { "epoch": 14.714, "grad_norm": 1.0544683933258057, "learning_rate": 2e-05, "loss": 0.04188926, "step": 7357 }, { "epoch": 14.716, "grad_norm": 1.0996098518371582, "learning_rate": 2e-05, "loss": 0.03929107, "step": 7358 }, { "epoch": 14.718, "grad_norm": 1.1038342714309692, "learning_rate": 2e-05, "loss": 0.03814331, "step": 7359 }, { "epoch": 14.72, "grad_norm": 1.0820776224136353, "learning_rate": 2e-05, "loss": 0.03988007, "step": 7360 }, { "epoch": 14.722, "grad_norm": 1.5885837078094482, "learning_rate": 2e-05, "loss": 0.04836737, "step": 7361 }, { "epoch": 14.724, "grad_norm": 1.4027137756347656, "learning_rate": 2e-05, "loss": 0.04616922, "step": 7362 }, { "epoch": 14.725999999999999, "grad_norm": 1.4879624843597412, "learning_rate": 2e-05, "loss": 0.04701858, "step": 7363 }, { "epoch": 14.728, "grad_norm": 1.7451562881469727, "learning_rate": 2e-05, "loss": 0.04082498, "step": 7364 }, { "epoch": 14.73, "grad_norm": 1.2382872104644775, "learning_rate": 2e-05, "loss": 0.03543949, "step": 7365 }, { "epoch": 14.732, "grad_norm": 1.0891491174697876, "learning_rate": 2e-05, "loss": 0.04369539, "step": 7366 }, { "epoch": 14.734, "grad_norm": 1.1295313835144043, "learning_rate": 2e-05, "loss": 0.04467921, "step": 7367 }, { "epoch": 14.736, "grad_norm": 1.008421540260315, "learning_rate": 2e-05, "loss": 0.03081367, "step": 7368 }, { "epoch": 14.738, "grad_norm": 1.0454267263412476, "learning_rate": 2e-05, "loss": 0.03988613, "step": 7369 }, { "epoch": 14.74, "grad_norm": 1.327993392944336, "learning_rate": 2e-05, "loss": 0.04315555, "step": 7370 }, { "epoch": 14.742, "grad_norm": 1.335193157196045, "learning_rate": 2e-05, "loss": 0.03227399, "step": 7371 }, { "epoch": 14.744, "grad_norm": 1.3347011804580688, "learning_rate": 2e-05, "loss": 0.05860446, "step": 7372 }, { "epoch": 14.746, "grad_norm": 0.9326810836791992, "learning_rate": 2e-05, "loss": 0.03182587, "step": 7373 }, { "epoch": 14.748, "grad_norm": 1.160326361656189, "learning_rate": 2e-05, "loss": 0.03810657, "step": 7374 }, { "epoch": 14.75, "grad_norm": 1.0090194940567017, "learning_rate": 2e-05, "loss": 0.04315179, "step": 7375 }, { "epoch": 14.752, "grad_norm": 1.0154917240142822, "learning_rate": 2e-05, "loss": 0.04223721, "step": 7376 }, { "epoch": 14.754, "grad_norm": 1.2227507829666138, "learning_rate": 2e-05, "loss": 0.03178797, "step": 7377 }, { "epoch": 14.756, "grad_norm": 1.3257445096969604, "learning_rate": 2e-05, "loss": 0.05133195, "step": 7378 }, { "epoch": 14.758, "grad_norm": 1.4168442487716675, "learning_rate": 2e-05, "loss": 0.04287499, "step": 7379 }, { "epoch": 14.76, "grad_norm": 0.9543439745903015, "learning_rate": 2e-05, "loss": 0.03545227, "step": 7380 }, { "epoch": 14.762, "grad_norm": 1.3868792057037354, "learning_rate": 2e-05, "loss": 0.04994515, "step": 7381 }, { "epoch": 14.764, "grad_norm": 0.9922243356704712, "learning_rate": 2e-05, "loss": 0.03060872, "step": 7382 }, { "epoch": 14.766, "grad_norm": 2.0604913234710693, "learning_rate": 2e-05, "loss": 0.04502428, "step": 7383 }, { "epoch": 14.768, "grad_norm": 1.438658595085144, "learning_rate": 2e-05, "loss": 0.05292256, "step": 7384 }, { "epoch": 14.77, "grad_norm": 1.722885251045227, "learning_rate": 2e-05, "loss": 0.04075219, "step": 7385 }, { "epoch": 14.772, "grad_norm": 1.515668272972107, "learning_rate": 2e-05, "loss": 0.04151274, "step": 7386 }, { "epoch": 14.774000000000001, "grad_norm": 1.3373991250991821, "learning_rate": 2e-05, "loss": 0.04042624, "step": 7387 }, { "epoch": 14.776, "grad_norm": 0.9790331125259399, "learning_rate": 2e-05, "loss": 0.03010193, "step": 7388 }, { "epoch": 14.778, "grad_norm": 0.9510990977287292, "learning_rate": 2e-05, "loss": 0.0254411, "step": 7389 }, { "epoch": 14.78, "grad_norm": 1.0509860515594482, "learning_rate": 2e-05, "loss": 0.02739971, "step": 7390 }, { "epoch": 14.782, "grad_norm": 2.5741069316864014, "learning_rate": 2e-05, "loss": 0.0422002, "step": 7391 }, { "epoch": 14.784, "grad_norm": 0.9521342515945435, "learning_rate": 2e-05, "loss": 0.03664888, "step": 7392 }, { "epoch": 14.786, "grad_norm": 1.2054708003997803, "learning_rate": 2e-05, "loss": 0.04093951, "step": 7393 }, { "epoch": 14.788, "grad_norm": 1.9645034074783325, "learning_rate": 2e-05, "loss": 0.05200192, "step": 7394 }, { "epoch": 14.79, "grad_norm": 1.1182868480682373, "learning_rate": 2e-05, "loss": 0.03928383, "step": 7395 }, { "epoch": 14.792, "grad_norm": 1.5255334377288818, "learning_rate": 2e-05, "loss": 0.04104205, "step": 7396 }, { "epoch": 14.794, "grad_norm": 1.66049063205719, "learning_rate": 2e-05, "loss": 0.05779614, "step": 7397 }, { "epoch": 14.796, "grad_norm": 1.6589398384094238, "learning_rate": 2e-05, "loss": 0.04276388, "step": 7398 }, { "epoch": 14.798, "grad_norm": 0.8656236529350281, "learning_rate": 2e-05, "loss": 0.02607629, "step": 7399 }, { "epoch": 14.8, "grad_norm": 0.8151983618736267, "learning_rate": 2e-05, "loss": 0.02651509, "step": 7400 }, { "epoch": 14.802, "grad_norm": 2.643232583999634, "learning_rate": 2e-05, "loss": 0.05977096, "step": 7401 }, { "epoch": 14.804, "grad_norm": 1.9966683387756348, "learning_rate": 2e-05, "loss": 0.0388038, "step": 7402 }, { "epoch": 14.806000000000001, "grad_norm": 1.535290241241455, "learning_rate": 2e-05, "loss": 0.04256013, "step": 7403 }, { "epoch": 14.808, "grad_norm": 1.6191688776016235, "learning_rate": 2e-05, "loss": 0.05272256, "step": 7404 }, { "epoch": 14.81, "grad_norm": 1.427708625793457, "learning_rate": 2e-05, "loss": 0.04024253, "step": 7405 }, { "epoch": 14.812, "grad_norm": 1.8937057256698608, "learning_rate": 2e-05, "loss": 0.03314988, "step": 7406 }, { "epoch": 14.814, "grad_norm": 1.1515634059906006, "learning_rate": 2e-05, "loss": 0.03387989, "step": 7407 }, { "epoch": 14.816, "grad_norm": 1.1773699522018433, "learning_rate": 2e-05, "loss": 0.04624472, "step": 7408 }, { "epoch": 14.818, "grad_norm": 1.3907095193862915, "learning_rate": 2e-05, "loss": 0.05477601, "step": 7409 }, { "epoch": 14.82, "grad_norm": 1.4064421653747559, "learning_rate": 2e-05, "loss": 0.04966973, "step": 7410 }, { "epoch": 14.822, "grad_norm": 1.3635202646255493, "learning_rate": 2e-05, "loss": 0.04148151, "step": 7411 }, { "epoch": 14.824, "grad_norm": 1.360349416732788, "learning_rate": 2e-05, "loss": 0.04895236, "step": 7412 }, { "epoch": 14.826, "grad_norm": 1.11383855342865, "learning_rate": 2e-05, "loss": 0.03705441, "step": 7413 }, { "epoch": 14.828, "grad_norm": 1.4103302955627441, "learning_rate": 2e-05, "loss": 0.04357664, "step": 7414 }, { "epoch": 14.83, "grad_norm": 1.8369293212890625, "learning_rate": 2e-05, "loss": 0.04775097, "step": 7415 }, { "epoch": 14.832, "grad_norm": 0.877086877822876, "learning_rate": 2e-05, "loss": 0.03288189, "step": 7416 }, { "epoch": 14.834, "grad_norm": 1.3127063512802124, "learning_rate": 2e-05, "loss": 0.04806757, "step": 7417 }, { "epoch": 14.836, "grad_norm": 0.9036020040512085, "learning_rate": 2e-05, "loss": 0.02839821, "step": 7418 }, { "epoch": 14.838, "grad_norm": 2.5509889125823975, "learning_rate": 2e-05, "loss": 0.05470347, "step": 7419 }, { "epoch": 14.84, "grad_norm": 2.004408121109009, "learning_rate": 2e-05, "loss": 0.05826382, "step": 7420 }, { "epoch": 14.842, "grad_norm": 0.9761636257171631, "learning_rate": 2e-05, "loss": 0.04079866, "step": 7421 }, { "epoch": 14.844, "grad_norm": 1.5073083639144897, "learning_rate": 2e-05, "loss": 0.05504016, "step": 7422 }, { "epoch": 14.846, "grad_norm": 1.0407030582427979, "learning_rate": 2e-05, "loss": 0.03778191, "step": 7423 }, { "epoch": 14.848, "grad_norm": 2.6649961471557617, "learning_rate": 2e-05, "loss": 0.04741925, "step": 7424 }, { "epoch": 14.85, "grad_norm": 1.7973006963729858, "learning_rate": 2e-05, "loss": 0.04919315, "step": 7425 }, { "epoch": 14.852, "grad_norm": 1.7582905292510986, "learning_rate": 2e-05, "loss": 0.04707357, "step": 7426 }, { "epoch": 14.854, "grad_norm": 1.3047398328781128, "learning_rate": 2e-05, "loss": 0.0351342, "step": 7427 }, { "epoch": 14.856, "grad_norm": 0.9953696131706238, "learning_rate": 2e-05, "loss": 0.03460802, "step": 7428 }, { "epoch": 14.858, "grad_norm": 1.7576355934143066, "learning_rate": 2e-05, "loss": 0.03853416, "step": 7429 }, { "epoch": 14.86, "grad_norm": 1.158715009689331, "learning_rate": 2e-05, "loss": 0.02868301, "step": 7430 }, { "epoch": 14.862, "grad_norm": 1.3568171262741089, "learning_rate": 2e-05, "loss": 0.03573724, "step": 7431 }, { "epoch": 14.864, "grad_norm": 1.1212353706359863, "learning_rate": 2e-05, "loss": 0.04652838, "step": 7432 }, { "epoch": 14.866, "grad_norm": 1.1447374820709229, "learning_rate": 2e-05, "loss": 0.03329033, "step": 7433 }, { "epoch": 14.868, "grad_norm": 0.9888138175010681, "learning_rate": 2e-05, "loss": 0.03221118, "step": 7434 }, { "epoch": 14.87, "grad_norm": 0.9587109088897705, "learning_rate": 2e-05, "loss": 0.03928025, "step": 7435 }, { "epoch": 14.872, "grad_norm": 1.1830155849456787, "learning_rate": 2e-05, "loss": 0.04943559, "step": 7436 }, { "epoch": 14.874, "grad_norm": 0.9363737106323242, "learning_rate": 2e-05, "loss": 0.03247594, "step": 7437 }, { "epoch": 14.876, "grad_norm": 1.324852705001831, "learning_rate": 2e-05, "loss": 0.04509073, "step": 7438 }, { "epoch": 14.878, "grad_norm": 1.1504321098327637, "learning_rate": 2e-05, "loss": 0.02755277, "step": 7439 }, { "epoch": 14.88, "grad_norm": 0.9816914796829224, "learning_rate": 2e-05, "loss": 0.03674835, "step": 7440 }, { "epoch": 14.882, "grad_norm": 1.2894209623336792, "learning_rate": 2e-05, "loss": 0.04641978, "step": 7441 }, { "epoch": 14.884, "grad_norm": 1.061409831047058, "learning_rate": 2e-05, "loss": 0.0386332, "step": 7442 }, { "epoch": 14.886, "grad_norm": 1.5462315082550049, "learning_rate": 2e-05, "loss": 0.04926273, "step": 7443 }, { "epoch": 14.888, "grad_norm": 1.955390214920044, "learning_rate": 2e-05, "loss": 0.04575516, "step": 7444 }, { "epoch": 14.89, "grad_norm": 0.9750779867172241, "learning_rate": 2e-05, "loss": 0.03601462, "step": 7445 }, { "epoch": 14.892, "grad_norm": 1.4240540266036987, "learning_rate": 2e-05, "loss": 0.04148903, "step": 7446 }, { "epoch": 14.894, "grad_norm": 1.4320528507232666, "learning_rate": 2e-05, "loss": 0.03879997, "step": 7447 }, { "epoch": 14.896, "grad_norm": 1.412207841873169, "learning_rate": 2e-05, "loss": 0.04467281, "step": 7448 }, { "epoch": 14.898, "grad_norm": 1.1392713785171509, "learning_rate": 2e-05, "loss": 0.04699032, "step": 7449 }, { "epoch": 14.9, "grad_norm": 3.836015462875366, "learning_rate": 2e-05, "loss": 0.04782456, "step": 7450 }, { "epoch": 14.902, "grad_norm": 0.871269941329956, "learning_rate": 2e-05, "loss": 0.03020436, "step": 7451 }, { "epoch": 14.904, "grad_norm": 1.190346121788025, "learning_rate": 2e-05, "loss": 0.04428768, "step": 7452 }, { "epoch": 14.906, "grad_norm": 1.5129135847091675, "learning_rate": 2e-05, "loss": 0.02693808, "step": 7453 }, { "epoch": 14.908, "grad_norm": 1.1676740646362305, "learning_rate": 2e-05, "loss": 0.04295947, "step": 7454 }, { "epoch": 14.91, "grad_norm": 3.26566743850708, "learning_rate": 2e-05, "loss": 0.04829793, "step": 7455 }, { "epoch": 14.912, "grad_norm": 1.788875937461853, "learning_rate": 2e-05, "loss": 0.04883059, "step": 7456 }, { "epoch": 14.914, "grad_norm": 1.2711299657821655, "learning_rate": 2e-05, "loss": 0.04746486, "step": 7457 }, { "epoch": 14.916, "grad_norm": 4.1879191398620605, "learning_rate": 2e-05, "loss": 0.05916186, "step": 7458 }, { "epoch": 14.918, "grad_norm": 1.8351664543151855, "learning_rate": 2e-05, "loss": 0.04157615, "step": 7459 }, { "epoch": 14.92, "grad_norm": 1.5852949619293213, "learning_rate": 2e-05, "loss": 0.03420291, "step": 7460 }, { "epoch": 14.922, "grad_norm": 1.8378655910491943, "learning_rate": 2e-05, "loss": 0.05364606, "step": 7461 }, { "epoch": 14.924, "grad_norm": 1.5688865184783936, "learning_rate": 2e-05, "loss": 0.03641159, "step": 7462 }, { "epoch": 14.926, "grad_norm": 1.0188319683074951, "learning_rate": 2e-05, "loss": 0.04969362, "step": 7463 }, { "epoch": 14.928, "grad_norm": 0.9952625632286072, "learning_rate": 2e-05, "loss": 0.03624305, "step": 7464 }, { "epoch": 14.93, "grad_norm": 1.1833257675170898, "learning_rate": 2e-05, "loss": 0.03593902, "step": 7465 }, { "epoch": 14.932, "grad_norm": 1.1890578269958496, "learning_rate": 2e-05, "loss": 0.03401609, "step": 7466 }, { "epoch": 14.934, "grad_norm": 0.9058767557144165, "learning_rate": 2e-05, "loss": 0.04047717, "step": 7467 }, { "epoch": 14.936, "grad_norm": 1.54295015335083, "learning_rate": 2e-05, "loss": 0.05161204, "step": 7468 }, { "epoch": 14.938, "grad_norm": 1.3080068826675415, "learning_rate": 2e-05, "loss": 0.04437819, "step": 7469 }, { "epoch": 14.94, "grad_norm": 0.7106160521507263, "learning_rate": 2e-05, "loss": 0.02167708, "step": 7470 }, { "epoch": 14.942, "grad_norm": 1.671543002128601, "learning_rate": 2e-05, "loss": 0.04790623, "step": 7471 }, { "epoch": 14.943999999999999, "grad_norm": 1.215187668800354, "learning_rate": 2e-05, "loss": 0.04291955, "step": 7472 }, { "epoch": 14.946, "grad_norm": 3.406282663345337, "learning_rate": 2e-05, "loss": 0.05099725, "step": 7473 }, { "epoch": 14.948, "grad_norm": 1.1276166439056396, "learning_rate": 2e-05, "loss": 0.04169073, "step": 7474 }, { "epoch": 14.95, "grad_norm": 1.4441436529159546, "learning_rate": 2e-05, "loss": 0.03202229, "step": 7475 }, { "epoch": 14.952, "grad_norm": 1.0247559547424316, "learning_rate": 2e-05, "loss": 0.0400821, "step": 7476 }, { "epoch": 14.954, "grad_norm": 0.9136931896209717, "learning_rate": 2e-05, "loss": 0.02682362, "step": 7477 }, { "epoch": 14.956, "grad_norm": 1.251654028892517, "learning_rate": 2e-05, "loss": 0.0477547, "step": 7478 }, { "epoch": 14.958, "grad_norm": 1.2477917671203613, "learning_rate": 2e-05, "loss": 0.03533474, "step": 7479 }, { "epoch": 14.96, "grad_norm": 1.268875241279602, "learning_rate": 2e-05, "loss": 0.04539894, "step": 7480 }, { "epoch": 14.962, "grad_norm": 1.031368613243103, "learning_rate": 2e-05, "loss": 0.0285139, "step": 7481 }, { "epoch": 14.964, "grad_norm": 2.505370616912842, "learning_rate": 2e-05, "loss": 0.04758614, "step": 7482 }, { "epoch": 14.966, "grad_norm": 1.1251351833343506, "learning_rate": 2e-05, "loss": 0.04266063, "step": 7483 }, { "epoch": 14.968, "grad_norm": 0.8792787790298462, "learning_rate": 2e-05, "loss": 0.03387887, "step": 7484 }, { "epoch": 14.97, "grad_norm": 0.8893079161643982, "learning_rate": 2e-05, "loss": 0.03276657, "step": 7485 }, { "epoch": 14.972, "grad_norm": 2.50982928276062, "learning_rate": 2e-05, "loss": 0.04137356, "step": 7486 }, { "epoch": 14.974, "grad_norm": 0.9173185229301453, "learning_rate": 2e-05, "loss": 0.03423457, "step": 7487 }, { "epoch": 14.975999999999999, "grad_norm": 1.1295170783996582, "learning_rate": 2e-05, "loss": 0.03446118, "step": 7488 }, { "epoch": 14.978, "grad_norm": 1.160467505455017, "learning_rate": 2e-05, "loss": 0.03613031, "step": 7489 }, { "epoch": 14.98, "grad_norm": 1.629970669746399, "learning_rate": 2e-05, "loss": 0.04517026, "step": 7490 }, { "epoch": 14.982, "grad_norm": 1.2040550708770752, "learning_rate": 2e-05, "loss": 0.04051655, "step": 7491 }, { "epoch": 14.984, "grad_norm": 0.9870254397392273, "learning_rate": 2e-05, "loss": 0.03302513, "step": 7492 }, { "epoch": 14.986, "grad_norm": 1.5879055261611938, "learning_rate": 2e-05, "loss": 0.04887541, "step": 7493 }, { "epoch": 14.988, "grad_norm": 0.8038175702095032, "learning_rate": 2e-05, "loss": 0.02730587, "step": 7494 }, { "epoch": 14.99, "grad_norm": 1.0914312601089478, "learning_rate": 2e-05, "loss": 0.0332062, "step": 7495 }, { "epoch": 14.992, "grad_norm": 1.0983259677886963, "learning_rate": 2e-05, "loss": 0.03311378, "step": 7496 }, { "epoch": 14.994, "grad_norm": 1.2042258977890015, "learning_rate": 2e-05, "loss": 0.04008223, "step": 7497 }, { "epoch": 14.996, "grad_norm": 1.188726544380188, "learning_rate": 2e-05, "loss": 0.03747816, "step": 7498 }, { "epoch": 14.998, "grad_norm": 0.9945477843284607, "learning_rate": 2e-05, "loss": 0.03766602, "step": 7499 }, { "epoch": 15.0, "grad_norm": 1.2218221426010132, "learning_rate": 2e-05, "loss": 0.04641752, "step": 7500 }, { "epoch": 15.0, "eval_performance": { "AngleClassification_1": 0.986, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9720558882235529, "Equal_1": 0.99, "Equal_2": 0.9740518962075848, "Equal_3": 0.9101796407185628, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.97, "Perpendicular_1": 0.996, "Perpendicular_2": 0.982, "Perpendicular_3": 0.7765531062124249, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9872000000000001, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 225.7906, "eval_samples_per_second": 46.503, "eval_steps_per_second": 0.93, "step": 7500 }, { "epoch": 15.002, "grad_norm": 1.557928204536438, "learning_rate": 2e-05, "loss": 0.04205696, "step": 7501 }, { "epoch": 15.004, "grad_norm": 1.5625946521759033, "learning_rate": 2e-05, "loss": 0.03305999, "step": 7502 }, { "epoch": 15.006, "grad_norm": 1.2314940690994263, "learning_rate": 2e-05, "loss": 0.03950959, "step": 7503 }, { "epoch": 15.008, "grad_norm": 1.0005085468292236, "learning_rate": 2e-05, "loss": 0.04110221, "step": 7504 }, { "epoch": 15.01, "grad_norm": 1.9720813035964966, "learning_rate": 2e-05, "loss": 0.05742538, "step": 7505 }, { "epoch": 15.012, "grad_norm": 1.0997799634933472, "learning_rate": 2e-05, "loss": 0.04821865, "step": 7506 }, { "epoch": 15.014, "grad_norm": 1.0127850770950317, "learning_rate": 2e-05, "loss": 0.03069933, "step": 7507 }, { "epoch": 15.016, "grad_norm": 1.1464051008224487, "learning_rate": 2e-05, "loss": 0.04115295, "step": 7508 }, { "epoch": 15.018, "grad_norm": 2.307401418685913, "learning_rate": 2e-05, "loss": 0.03383305, "step": 7509 }, { "epoch": 15.02, "grad_norm": 0.997746467590332, "learning_rate": 2e-05, "loss": 0.03681318, "step": 7510 }, { "epoch": 15.022, "grad_norm": 1.545511245727539, "learning_rate": 2e-05, "loss": 0.03775381, "step": 7511 }, { "epoch": 15.024, "grad_norm": 1.7267879247665405, "learning_rate": 2e-05, "loss": 0.05300741, "step": 7512 }, { "epoch": 15.026, "grad_norm": 1.5064767599105835, "learning_rate": 2e-05, "loss": 0.03169682, "step": 7513 }, { "epoch": 15.028, "grad_norm": 1.7127008438110352, "learning_rate": 2e-05, "loss": 0.03496314, "step": 7514 }, { "epoch": 15.03, "grad_norm": 1.106010913848877, "learning_rate": 2e-05, "loss": 0.03749539, "step": 7515 }, { "epoch": 15.032, "grad_norm": 1.1929855346679688, "learning_rate": 2e-05, "loss": 0.05238366, "step": 7516 }, { "epoch": 15.034, "grad_norm": 1.1891162395477295, "learning_rate": 2e-05, "loss": 0.04218661, "step": 7517 }, { "epoch": 15.036, "grad_norm": 1.3002663850784302, "learning_rate": 2e-05, "loss": 0.03623533, "step": 7518 }, { "epoch": 15.038, "grad_norm": 1.0941698551177979, "learning_rate": 2e-05, "loss": 0.04277907, "step": 7519 }, { "epoch": 15.04, "grad_norm": 1.277550458908081, "learning_rate": 2e-05, "loss": 0.04276868, "step": 7520 }, { "epoch": 15.042, "grad_norm": 1.7377294301986694, "learning_rate": 2e-05, "loss": 0.05513749, "step": 7521 }, { "epoch": 15.044, "grad_norm": 1.410678744316101, "learning_rate": 2e-05, "loss": 0.04281083, "step": 7522 }, { "epoch": 15.046, "grad_norm": 1.1023534536361694, "learning_rate": 2e-05, "loss": 0.03774637, "step": 7523 }, { "epoch": 15.048, "grad_norm": 1.0538054704666138, "learning_rate": 2e-05, "loss": 0.02899116, "step": 7524 }, { "epoch": 15.05, "grad_norm": 0.9917166233062744, "learning_rate": 2e-05, "loss": 0.03786308, "step": 7525 }, { "epoch": 15.052, "grad_norm": 1.0848370790481567, "learning_rate": 2e-05, "loss": 0.03448509, "step": 7526 }, { "epoch": 15.054, "grad_norm": 1.1327235698699951, "learning_rate": 2e-05, "loss": 0.04275092, "step": 7527 }, { "epoch": 15.056, "grad_norm": 0.93613201379776, "learning_rate": 2e-05, "loss": 0.03207397, "step": 7528 }, { "epoch": 15.058, "grad_norm": 1.0333102941513062, "learning_rate": 2e-05, "loss": 0.02628403, "step": 7529 }, { "epoch": 15.06, "grad_norm": 1.0832616090774536, "learning_rate": 2e-05, "loss": 0.03544102, "step": 7530 }, { "epoch": 15.062, "grad_norm": 2.4483532905578613, "learning_rate": 2e-05, "loss": 0.0385794, "step": 7531 }, { "epoch": 15.064, "grad_norm": 1.3084907531738281, "learning_rate": 2e-05, "loss": 0.03127644, "step": 7532 }, { "epoch": 15.066, "grad_norm": 1.1120984554290771, "learning_rate": 2e-05, "loss": 0.03920668, "step": 7533 }, { "epoch": 15.068, "grad_norm": 2.026540994644165, "learning_rate": 2e-05, "loss": 0.04868791, "step": 7534 }, { "epoch": 15.07, "grad_norm": 1.0464929342269897, "learning_rate": 2e-05, "loss": 0.04039855, "step": 7535 }, { "epoch": 15.072, "grad_norm": 1.6767228841781616, "learning_rate": 2e-05, "loss": 0.05304567, "step": 7536 }, { "epoch": 15.074, "grad_norm": 1.4716955423355103, "learning_rate": 2e-05, "loss": 0.04122161, "step": 7537 }, { "epoch": 15.076, "grad_norm": 2.1964807510375977, "learning_rate": 2e-05, "loss": 0.05560445, "step": 7538 }, { "epoch": 15.078, "grad_norm": 2.199388027191162, "learning_rate": 2e-05, "loss": 0.04462137, "step": 7539 }, { "epoch": 15.08, "grad_norm": 1.0083742141723633, "learning_rate": 2e-05, "loss": 0.03306791, "step": 7540 }, { "epoch": 15.082, "grad_norm": 3.076545476913452, "learning_rate": 2e-05, "loss": 0.03518068, "step": 7541 }, { "epoch": 15.084, "grad_norm": 1.1199493408203125, "learning_rate": 2e-05, "loss": 0.03014283, "step": 7542 }, { "epoch": 15.086, "grad_norm": 0.890809178352356, "learning_rate": 2e-05, "loss": 0.03238495, "step": 7543 }, { "epoch": 15.088, "grad_norm": 1.247981071472168, "learning_rate": 2e-05, "loss": 0.04605249, "step": 7544 }, { "epoch": 15.09, "grad_norm": 1.3851189613342285, "learning_rate": 2e-05, "loss": 0.03610485, "step": 7545 }, { "epoch": 15.092, "grad_norm": 1.0193473100662231, "learning_rate": 2e-05, "loss": 0.02862552, "step": 7546 }, { "epoch": 15.094, "grad_norm": 0.9744530916213989, "learning_rate": 2e-05, "loss": 0.0389993, "step": 7547 }, { "epoch": 15.096, "grad_norm": 1.0139203071594238, "learning_rate": 2e-05, "loss": 0.03570321, "step": 7548 }, { "epoch": 15.098, "grad_norm": 0.8550000190734863, "learning_rate": 2e-05, "loss": 0.02803791, "step": 7549 }, { "epoch": 15.1, "grad_norm": 1.6766440868377686, "learning_rate": 2e-05, "loss": 0.04521686, "step": 7550 }, { "epoch": 15.102, "grad_norm": 2.9240353107452393, "learning_rate": 2e-05, "loss": 0.04561481, "step": 7551 }, { "epoch": 15.104, "grad_norm": 1.196871280670166, "learning_rate": 2e-05, "loss": 0.03631102, "step": 7552 }, { "epoch": 15.106, "grad_norm": 1.0361727476119995, "learning_rate": 2e-05, "loss": 0.04521807, "step": 7553 }, { "epoch": 15.108, "grad_norm": 1.319674015045166, "learning_rate": 2e-05, "loss": 0.04189175, "step": 7554 }, { "epoch": 15.11, "grad_norm": 0.9767470955848694, "learning_rate": 2e-05, "loss": 0.03575244, "step": 7555 }, { "epoch": 15.112, "grad_norm": 1.0726113319396973, "learning_rate": 2e-05, "loss": 0.03597105, "step": 7556 }, { "epoch": 15.114, "grad_norm": 1.7892341613769531, "learning_rate": 2e-05, "loss": 0.04019105, "step": 7557 }, { "epoch": 15.116, "grad_norm": 1.277625560760498, "learning_rate": 2e-05, "loss": 0.03464985, "step": 7558 }, { "epoch": 15.118, "grad_norm": 1.0794398784637451, "learning_rate": 2e-05, "loss": 0.03574272, "step": 7559 }, { "epoch": 15.12, "grad_norm": 1.6861971616744995, "learning_rate": 2e-05, "loss": 0.03641678, "step": 7560 }, { "epoch": 15.122, "grad_norm": 2.5775976181030273, "learning_rate": 2e-05, "loss": 0.0388553, "step": 7561 }, { "epoch": 15.124, "grad_norm": 1.3023587465286255, "learning_rate": 2e-05, "loss": 0.02064148, "step": 7562 }, { "epoch": 15.126, "grad_norm": 0.9778832197189331, "learning_rate": 2e-05, "loss": 0.04047875, "step": 7563 }, { "epoch": 15.128, "grad_norm": 1.0193592309951782, "learning_rate": 2e-05, "loss": 0.04111153, "step": 7564 }, { "epoch": 15.13, "grad_norm": 1.251664638519287, "learning_rate": 2e-05, "loss": 0.03581004, "step": 7565 }, { "epoch": 15.132, "grad_norm": 1.410204291343689, "learning_rate": 2e-05, "loss": 0.04224623, "step": 7566 }, { "epoch": 15.134, "grad_norm": 1.0915162563323975, "learning_rate": 2e-05, "loss": 0.03597197, "step": 7567 }, { "epoch": 15.136, "grad_norm": 0.9125514030456543, "learning_rate": 2e-05, "loss": 0.03752094, "step": 7568 }, { "epoch": 15.138, "grad_norm": 1.5920462608337402, "learning_rate": 2e-05, "loss": 0.05846196, "step": 7569 }, { "epoch": 15.14, "grad_norm": 1.5193638801574707, "learning_rate": 2e-05, "loss": 0.03696744, "step": 7570 }, { "epoch": 15.142, "grad_norm": 1.4516115188598633, "learning_rate": 2e-05, "loss": 0.05756988, "step": 7571 }, { "epoch": 15.144, "grad_norm": 1.1954102516174316, "learning_rate": 2e-05, "loss": 0.02742215, "step": 7572 }, { "epoch": 15.146, "grad_norm": 1.6106290817260742, "learning_rate": 2e-05, "loss": 0.04774511, "step": 7573 }, { "epoch": 15.148, "grad_norm": 1.8577772378921509, "learning_rate": 2e-05, "loss": 0.03251471, "step": 7574 }, { "epoch": 15.15, "grad_norm": 2.0790493488311768, "learning_rate": 2e-05, "loss": 0.04956724, "step": 7575 }, { "epoch": 15.152, "grad_norm": 1.0265514850616455, "learning_rate": 2e-05, "loss": 0.04043811, "step": 7576 }, { "epoch": 15.154, "grad_norm": 1.4760632514953613, "learning_rate": 2e-05, "loss": 0.04143326, "step": 7577 }, { "epoch": 15.156, "grad_norm": 1.185500144958496, "learning_rate": 2e-05, "loss": 0.03098305, "step": 7578 }, { "epoch": 15.158, "grad_norm": 2.786694288253784, "learning_rate": 2e-05, "loss": 0.06076029, "step": 7579 }, { "epoch": 15.16, "grad_norm": 2.034240484237671, "learning_rate": 2e-05, "loss": 0.04073541, "step": 7580 }, { "epoch": 15.162, "grad_norm": 1.2866418361663818, "learning_rate": 2e-05, "loss": 0.04019268, "step": 7581 }, { "epoch": 15.164, "grad_norm": 1.5294523239135742, "learning_rate": 2e-05, "loss": 0.04336801, "step": 7582 }, { "epoch": 15.166, "grad_norm": 1.5890083312988281, "learning_rate": 2e-05, "loss": 0.05150065, "step": 7583 }, { "epoch": 15.168, "grad_norm": 1.3995965719223022, "learning_rate": 2e-05, "loss": 0.03880454, "step": 7584 }, { "epoch": 15.17, "grad_norm": 1.1512309312820435, "learning_rate": 2e-05, "loss": 0.04398931, "step": 7585 }, { "epoch": 15.172, "grad_norm": 1.1528098583221436, "learning_rate": 2e-05, "loss": 0.03739723, "step": 7586 }, { "epoch": 15.174, "grad_norm": 1.0696920156478882, "learning_rate": 2e-05, "loss": 0.041275, "step": 7587 }, { "epoch": 15.176, "grad_norm": 1.1969399452209473, "learning_rate": 2e-05, "loss": 0.03766329, "step": 7588 }, { "epoch": 15.178, "grad_norm": 1.4225414991378784, "learning_rate": 2e-05, "loss": 0.05640831, "step": 7589 }, { "epoch": 15.18, "grad_norm": 1.3486199378967285, "learning_rate": 2e-05, "loss": 0.04287706, "step": 7590 }, { "epoch": 15.182, "grad_norm": 1.0864142179489136, "learning_rate": 2e-05, "loss": 0.03106059, "step": 7591 }, { "epoch": 15.184, "grad_norm": 1.2384679317474365, "learning_rate": 2e-05, "loss": 0.02679091, "step": 7592 }, { "epoch": 15.186, "grad_norm": 2.18147349357605, "learning_rate": 2e-05, "loss": 0.05282824, "step": 7593 }, { "epoch": 15.188, "grad_norm": 0.9838606119155884, "learning_rate": 2e-05, "loss": 0.03508876, "step": 7594 }, { "epoch": 15.19, "grad_norm": 0.8631440997123718, "learning_rate": 2e-05, "loss": 0.02790235, "step": 7595 }, { "epoch": 15.192, "grad_norm": 0.8455491662025452, "learning_rate": 2e-05, "loss": 0.02652623, "step": 7596 }, { "epoch": 15.194, "grad_norm": 1.2782518863677979, "learning_rate": 2e-05, "loss": 0.04101584, "step": 7597 }, { "epoch": 15.196, "grad_norm": 1.6504355669021606, "learning_rate": 2e-05, "loss": 0.04782101, "step": 7598 }, { "epoch": 15.198, "grad_norm": 1.061281681060791, "learning_rate": 2e-05, "loss": 0.0424789, "step": 7599 }, { "epoch": 15.2, "grad_norm": 1.1287847757339478, "learning_rate": 2e-05, "loss": 0.04902516, "step": 7600 }, { "epoch": 15.202, "grad_norm": 1.4371165037155151, "learning_rate": 2e-05, "loss": 0.04958082, "step": 7601 }, { "epoch": 15.204, "grad_norm": 1.7850892543792725, "learning_rate": 2e-05, "loss": 0.07834439, "step": 7602 }, { "epoch": 15.206, "grad_norm": 1.0542948246002197, "learning_rate": 2e-05, "loss": 0.03976847, "step": 7603 }, { "epoch": 15.208, "grad_norm": 0.9708096981048584, "learning_rate": 2e-05, "loss": 0.03714234, "step": 7604 }, { "epoch": 15.21, "grad_norm": 1.2524781227111816, "learning_rate": 2e-05, "loss": 0.03946754, "step": 7605 }, { "epoch": 15.212, "grad_norm": 1.4123047590255737, "learning_rate": 2e-05, "loss": 0.03877112, "step": 7606 }, { "epoch": 15.214, "grad_norm": 1.051430106163025, "learning_rate": 2e-05, "loss": 0.02851279, "step": 7607 }, { "epoch": 15.216, "grad_norm": 1.3937458992004395, "learning_rate": 2e-05, "loss": 0.04662988, "step": 7608 }, { "epoch": 15.218, "grad_norm": 1.1337244510650635, "learning_rate": 2e-05, "loss": 0.03007739, "step": 7609 }, { "epoch": 15.22, "grad_norm": 1.182468056678772, "learning_rate": 2e-05, "loss": 0.03727046, "step": 7610 }, { "epoch": 15.222, "grad_norm": 1.2508074045181274, "learning_rate": 2e-05, "loss": 0.03215265, "step": 7611 }, { "epoch": 15.224, "grad_norm": 1.7075746059417725, "learning_rate": 2e-05, "loss": 0.05757492, "step": 7612 }, { "epoch": 15.226, "grad_norm": 2.3343417644500732, "learning_rate": 2e-05, "loss": 0.05249823, "step": 7613 }, { "epoch": 15.228, "grad_norm": 1.24342942237854, "learning_rate": 2e-05, "loss": 0.03456208, "step": 7614 }, { "epoch": 15.23, "grad_norm": 1.0007222890853882, "learning_rate": 2e-05, "loss": 0.04739401, "step": 7615 }, { "epoch": 15.232, "grad_norm": 1.1163939237594604, "learning_rate": 2e-05, "loss": 0.04084761, "step": 7616 }, { "epoch": 15.234, "grad_norm": 1.1431388854980469, "learning_rate": 2e-05, "loss": 0.03892222, "step": 7617 }, { "epoch": 15.236, "grad_norm": 1.26213538646698, "learning_rate": 2e-05, "loss": 0.0481387, "step": 7618 }, { "epoch": 15.238, "grad_norm": 2.136810541152954, "learning_rate": 2e-05, "loss": 0.03165914, "step": 7619 }, { "epoch": 15.24, "grad_norm": 1.6809396743774414, "learning_rate": 2e-05, "loss": 0.03271673, "step": 7620 }, { "epoch": 15.242, "grad_norm": 1.855609655380249, "learning_rate": 2e-05, "loss": 0.04284067, "step": 7621 }, { "epoch": 15.244, "grad_norm": 1.046613335609436, "learning_rate": 2e-05, "loss": 0.03612656, "step": 7622 }, { "epoch": 15.246, "grad_norm": 1.3431061506271362, "learning_rate": 2e-05, "loss": 0.0371665, "step": 7623 }, { "epoch": 15.248, "grad_norm": 1.3914761543273926, "learning_rate": 2e-05, "loss": 0.04148369, "step": 7624 }, { "epoch": 15.25, "grad_norm": 1.608260154724121, "learning_rate": 2e-05, "loss": 0.04472732, "step": 7625 }, { "epoch": 15.252, "grad_norm": 1.5010725259780884, "learning_rate": 2e-05, "loss": 0.06111418, "step": 7626 }, { "epoch": 15.254, "grad_norm": 1.4985872507095337, "learning_rate": 2e-05, "loss": 0.03983544, "step": 7627 }, { "epoch": 15.256, "grad_norm": 1.7013355493545532, "learning_rate": 2e-05, "loss": 0.06082825, "step": 7628 }, { "epoch": 15.258, "grad_norm": 1.8201754093170166, "learning_rate": 2e-05, "loss": 0.04746124, "step": 7629 }, { "epoch": 15.26, "grad_norm": 1.263026237487793, "learning_rate": 2e-05, "loss": 0.02332597, "step": 7630 }, { "epoch": 15.262, "grad_norm": 2.0221705436706543, "learning_rate": 2e-05, "loss": 0.04031182, "step": 7631 }, { "epoch": 15.264, "grad_norm": 0.9431454539299011, "learning_rate": 2e-05, "loss": 0.03405325, "step": 7632 }, { "epoch": 15.266, "grad_norm": 1.3153389692306519, "learning_rate": 2e-05, "loss": 0.0370644, "step": 7633 }, { "epoch": 15.268, "grad_norm": 1.096466064453125, "learning_rate": 2e-05, "loss": 0.03738741, "step": 7634 }, { "epoch": 15.27, "grad_norm": 2.728064775466919, "learning_rate": 2e-05, "loss": 0.03937779, "step": 7635 }, { "epoch": 15.272, "grad_norm": 1.0703537464141846, "learning_rate": 2e-05, "loss": 0.03998253, "step": 7636 }, { "epoch": 15.274000000000001, "grad_norm": 1.0958034992218018, "learning_rate": 2e-05, "loss": 0.03066216, "step": 7637 }, { "epoch": 15.276, "grad_norm": 1.029520869255066, "learning_rate": 2e-05, "loss": 0.02618698, "step": 7638 }, { "epoch": 15.278, "grad_norm": 1.2893452644348145, "learning_rate": 2e-05, "loss": 0.04865814, "step": 7639 }, { "epoch": 15.28, "grad_norm": 0.8651980757713318, "learning_rate": 2e-05, "loss": 0.03129237, "step": 7640 }, { "epoch": 15.282, "grad_norm": 1.9020334482192993, "learning_rate": 2e-05, "loss": 0.04576992, "step": 7641 }, { "epoch": 15.284, "grad_norm": 1.3959592580795288, "learning_rate": 2e-05, "loss": 0.03669586, "step": 7642 }, { "epoch": 15.286, "grad_norm": 1.8379170894622803, "learning_rate": 2e-05, "loss": 0.06496038, "step": 7643 }, { "epoch": 15.288, "grad_norm": 1.893050193786621, "learning_rate": 2e-05, "loss": 0.049568, "step": 7644 }, { "epoch": 15.29, "grad_norm": 1.7025266885757446, "learning_rate": 2e-05, "loss": 0.04464132, "step": 7645 }, { "epoch": 15.292, "grad_norm": 2.1882283687591553, "learning_rate": 2e-05, "loss": 0.04898166, "step": 7646 }, { "epoch": 15.294, "grad_norm": 2.703468084335327, "learning_rate": 2e-05, "loss": 0.0530495, "step": 7647 }, { "epoch": 15.296, "grad_norm": 3.292581081390381, "learning_rate": 2e-05, "loss": 0.04622526, "step": 7648 }, { "epoch": 15.298, "grad_norm": 1.2518445253372192, "learning_rate": 2e-05, "loss": 0.05069956, "step": 7649 }, { "epoch": 15.3, "grad_norm": 1.0237197875976562, "learning_rate": 2e-05, "loss": 0.03768081, "step": 7650 }, { "epoch": 15.302, "grad_norm": 1.203036904335022, "learning_rate": 2e-05, "loss": 0.03885972, "step": 7651 }, { "epoch": 15.304, "grad_norm": 1.184159755706787, "learning_rate": 2e-05, "loss": 0.04271705, "step": 7652 }, { "epoch": 15.306, "grad_norm": 1.2428264617919922, "learning_rate": 2e-05, "loss": 0.02890542, "step": 7653 }, { "epoch": 15.308, "grad_norm": 0.9060125946998596, "learning_rate": 2e-05, "loss": 0.02932029, "step": 7654 }, { "epoch": 15.31, "grad_norm": 2.193204641342163, "learning_rate": 2e-05, "loss": 0.05215523, "step": 7655 }, { "epoch": 15.312, "grad_norm": 1.183048963546753, "learning_rate": 2e-05, "loss": 0.03876815, "step": 7656 }, { "epoch": 15.314, "grad_norm": 1.1747117042541504, "learning_rate": 2e-05, "loss": 0.05035491, "step": 7657 }, { "epoch": 15.316, "grad_norm": 1.402410626411438, "learning_rate": 2e-05, "loss": 0.03307761, "step": 7658 }, { "epoch": 15.318, "grad_norm": 1.4300638437271118, "learning_rate": 2e-05, "loss": 0.03186027, "step": 7659 }, { "epoch": 15.32, "grad_norm": 1.235596776008606, "learning_rate": 2e-05, "loss": 0.04644901, "step": 7660 }, { "epoch": 15.322, "grad_norm": 0.8476399779319763, "learning_rate": 2e-05, "loss": 0.02548402, "step": 7661 }, { "epoch": 15.324, "grad_norm": 2.9809422492980957, "learning_rate": 2e-05, "loss": 0.05239486, "step": 7662 }, { "epoch": 15.326, "grad_norm": 1.1389051675796509, "learning_rate": 2e-05, "loss": 0.03720506, "step": 7663 }, { "epoch": 15.328, "grad_norm": 1.9306964874267578, "learning_rate": 2e-05, "loss": 0.0395176, "step": 7664 }, { "epoch": 15.33, "grad_norm": 1.088045597076416, "learning_rate": 2e-05, "loss": 0.03753523, "step": 7665 }, { "epoch": 15.332, "grad_norm": 1.3429683446884155, "learning_rate": 2e-05, "loss": 0.04176991, "step": 7666 }, { "epoch": 15.334, "grad_norm": 1.4225213527679443, "learning_rate": 2e-05, "loss": 0.0463496, "step": 7667 }, { "epoch": 15.336, "grad_norm": 1.330991268157959, "learning_rate": 2e-05, "loss": 0.03719035, "step": 7668 }, { "epoch": 15.338, "grad_norm": 1.850281834602356, "learning_rate": 2e-05, "loss": 0.05090012, "step": 7669 }, { "epoch": 15.34, "grad_norm": 0.9360353946685791, "learning_rate": 2e-05, "loss": 0.03247702, "step": 7670 }, { "epoch": 15.342, "grad_norm": 0.9975693821907043, "learning_rate": 2e-05, "loss": 0.03344682, "step": 7671 }, { "epoch": 15.344, "grad_norm": 1.7215051651000977, "learning_rate": 2e-05, "loss": 0.0533718, "step": 7672 }, { "epoch": 15.346, "grad_norm": 1.1974735260009766, "learning_rate": 2e-05, "loss": 0.04304528, "step": 7673 }, { "epoch": 15.348, "grad_norm": 1.9166557788848877, "learning_rate": 2e-05, "loss": 0.06019102, "step": 7674 }, { "epoch": 15.35, "grad_norm": 0.9654914736747742, "learning_rate": 2e-05, "loss": 0.03540226, "step": 7675 }, { "epoch": 15.352, "grad_norm": 1.1596163511276245, "learning_rate": 2e-05, "loss": 0.03922988, "step": 7676 }, { "epoch": 15.354, "grad_norm": 2.368647336959839, "learning_rate": 2e-05, "loss": 0.04518018, "step": 7677 }, { "epoch": 15.356, "grad_norm": 1.3481358289718628, "learning_rate": 2e-05, "loss": 0.03930181, "step": 7678 }, { "epoch": 15.358, "grad_norm": 1.3387371301651, "learning_rate": 2e-05, "loss": 0.05144273, "step": 7679 }, { "epoch": 15.36, "grad_norm": 1.5232727527618408, "learning_rate": 2e-05, "loss": 0.05948942, "step": 7680 }, { "epoch": 15.362, "grad_norm": 1.140566110610962, "learning_rate": 2e-05, "loss": 0.03790478, "step": 7681 }, { "epoch": 15.364, "grad_norm": 1.2953228950500488, "learning_rate": 2e-05, "loss": 0.04990582, "step": 7682 }, { "epoch": 15.366, "grad_norm": 1.0877348184585571, "learning_rate": 2e-05, "loss": 0.04641817, "step": 7683 }, { "epoch": 15.368, "grad_norm": 1.9188339710235596, "learning_rate": 2e-05, "loss": 0.06377004, "step": 7684 }, { "epoch": 15.37, "grad_norm": 1.6604701280593872, "learning_rate": 2e-05, "loss": 0.04895961, "step": 7685 }, { "epoch": 15.372, "grad_norm": 1.4072130918502808, "learning_rate": 2e-05, "loss": 0.04444826, "step": 7686 }, { "epoch": 15.374, "grad_norm": 0.940191388130188, "learning_rate": 2e-05, "loss": 0.03842501, "step": 7687 }, { "epoch": 15.376, "grad_norm": 0.9707368612289429, "learning_rate": 2e-05, "loss": 0.03641964, "step": 7688 }, { "epoch": 15.378, "grad_norm": 1.3805454969406128, "learning_rate": 2e-05, "loss": 0.04570456, "step": 7689 }, { "epoch": 15.38, "grad_norm": 1.4637885093688965, "learning_rate": 2e-05, "loss": 0.03580109, "step": 7690 }, { "epoch": 15.382, "grad_norm": 1.8317047357559204, "learning_rate": 2e-05, "loss": 0.03461726, "step": 7691 }, { "epoch": 15.384, "grad_norm": 1.2678900957107544, "learning_rate": 2e-05, "loss": 0.03470122, "step": 7692 }, { "epoch": 15.386, "grad_norm": 1.447558879852295, "learning_rate": 2e-05, "loss": 0.05309576, "step": 7693 }, { "epoch": 15.388, "grad_norm": 1.1916170120239258, "learning_rate": 2e-05, "loss": 0.04509161, "step": 7694 }, { "epoch": 15.39, "grad_norm": 1.3137471675872803, "learning_rate": 2e-05, "loss": 0.05664599, "step": 7695 }, { "epoch": 15.392, "grad_norm": 0.9028839468955994, "learning_rate": 2e-05, "loss": 0.03184561, "step": 7696 }, { "epoch": 15.394, "grad_norm": 0.8823025822639465, "learning_rate": 2e-05, "loss": 0.03336647, "step": 7697 }, { "epoch": 15.396, "grad_norm": 1.3378214836120605, "learning_rate": 2e-05, "loss": 0.0493676, "step": 7698 }, { "epoch": 15.398, "grad_norm": 0.9779813289642334, "learning_rate": 2e-05, "loss": 0.03241831, "step": 7699 }, { "epoch": 15.4, "grad_norm": 1.9845422506332397, "learning_rate": 2e-05, "loss": 0.0413547, "step": 7700 }, { "epoch": 15.402, "grad_norm": 1.5225841999053955, "learning_rate": 2e-05, "loss": 0.03932335, "step": 7701 }, { "epoch": 15.404, "grad_norm": 1.2310850620269775, "learning_rate": 2e-05, "loss": 0.03592661, "step": 7702 }, { "epoch": 15.406, "grad_norm": 1.1418870687484741, "learning_rate": 2e-05, "loss": 0.04763779, "step": 7703 }, { "epoch": 15.408, "grad_norm": 1.1342737674713135, "learning_rate": 2e-05, "loss": 0.03362895, "step": 7704 }, { "epoch": 15.41, "grad_norm": 1.2697423696517944, "learning_rate": 2e-05, "loss": 0.02714735, "step": 7705 }, { "epoch": 15.412, "grad_norm": 1.030122995376587, "learning_rate": 2e-05, "loss": 0.0416874, "step": 7706 }, { "epoch": 15.414, "grad_norm": 1.0790464878082275, "learning_rate": 2e-05, "loss": 0.02673035, "step": 7707 }, { "epoch": 15.416, "grad_norm": 1.2027027606964111, "learning_rate": 2e-05, "loss": 0.03530141, "step": 7708 }, { "epoch": 15.418, "grad_norm": 1.4824391603469849, "learning_rate": 2e-05, "loss": 0.04331167, "step": 7709 }, { "epoch": 15.42, "grad_norm": 1.0345323085784912, "learning_rate": 2e-05, "loss": 0.03011656, "step": 7710 }, { "epoch": 15.422, "grad_norm": 2.050442695617676, "learning_rate": 2e-05, "loss": 0.03534184, "step": 7711 }, { "epoch": 15.424, "grad_norm": 1.0294818878173828, "learning_rate": 2e-05, "loss": 0.03894817, "step": 7712 }, { "epoch": 15.426, "grad_norm": 2.1123604774475098, "learning_rate": 2e-05, "loss": 0.06618219, "step": 7713 }, { "epoch": 15.428, "grad_norm": 1.5144546031951904, "learning_rate": 2e-05, "loss": 0.04135579, "step": 7714 }, { "epoch": 15.43, "grad_norm": 0.8413070440292358, "learning_rate": 2e-05, "loss": 0.02757905, "step": 7715 }, { "epoch": 15.432, "grad_norm": 1.941845417022705, "learning_rate": 2e-05, "loss": 0.05244355, "step": 7716 }, { "epoch": 15.434, "grad_norm": 1.0594807863235474, "learning_rate": 2e-05, "loss": 0.04461173, "step": 7717 }, { "epoch": 15.436, "grad_norm": 1.158675193786621, "learning_rate": 2e-05, "loss": 0.03866138, "step": 7718 }, { "epoch": 15.438, "grad_norm": 2.713379144668579, "learning_rate": 2e-05, "loss": 0.03219736, "step": 7719 }, { "epoch": 15.44, "grad_norm": 1.86960768699646, "learning_rate": 2e-05, "loss": 0.04166104, "step": 7720 }, { "epoch": 15.442, "grad_norm": 1.026113748550415, "learning_rate": 2e-05, "loss": 0.05023716, "step": 7721 }, { "epoch": 15.444, "grad_norm": 3.3883445262908936, "learning_rate": 2e-05, "loss": 0.06564912, "step": 7722 }, { "epoch": 15.446, "grad_norm": 1.2680797576904297, "learning_rate": 2e-05, "loss": 0.03963304, "step": 7723 }, { "epoch": 15.448, "grad_norm": 3.2638938426971436, "learning_rate": 2e-05, "loss": 0.05487224, "step": 7724 }, { "epoch": 15.45, "grad_norm": 1.248444676399231, "learning_rate": 2e-05, "loss": 0.03439521, "step": 7725 }, { "epoch": 15.452, "grad_norm": 0.7581607103347778, "learning_rate": 2e-05, "loss": 0.02444012, "step": 7726 }, { "epoch": 15.454, "grad_norm": 1.1835821866989136, "learning_rate": 2e-05, "loss": 0.04622556, "step": 7727 }, { "epoch": 15.456, "grad_norm": 1.3690377473831177, "learning_rate": 2e-05, "loss": 0.03906656, "step": 7728 }, { "epoch": 15.458, "grad_norm": 1.3632426261901855, "learning_rate": 2e-05, "loss": 0.0395397, "step": 7729 }, { "epoch": 15.46, "grad_norm": 1.2494593858718872, "learning_rate": 2e-05, "loss": 0.0408316, "step": 7730 }, { "epoch": 15.462, "grad_norm": 1.5658994913101196, "learning_rate": 2e-05, "loss": 0.04589403, "step": 7731 }, { "epoch": 15.464, "grad_norm": 4.707587242126465, "learning_rate": 2e-05, "loss": 0.04985627, "step": 7732 }, { "epoch": 15.466, "grad_norm": 1.3491953611373901, "learning_rate": 2e-05, "loss": 0.04348074, "step": 7733 }, { "epoch": 15.468, "grad_norm": 2.7525410652160645, "learning_rate": 2e-05, "loss": 0.04507884, "step": 7734 }, { "epoch": 15.47, "grad_norm": 1.8973398208618164, "learning_rate": 2e-05, "loss": 0.05182186, "step": 7735 }, { "epoch": 15.472, "grad_norm": 1.2980663776397705, "learning_rate": 2e-05, "loss": 0.03535274, "step": 7736 }, { "epoch": 15.474, "grad_norm": 1.1315213441848755, "learning_rate": 2e-05, "loss": 0.04221178, "step": 7737 }, { "epoch": 15.475999999999999, "grad_norm": 1.1220656633377075, "learning_rate": 2e-05, "loss": 0.03012344, "step": 7738 }, { "epoch": 15.478, "grad_norm": 0.978743314743042, "learning_rate": 2e-05, "loss": 0.03460223, "step": 7739 }, { "epoch": 15.48, "grad_norm": 0.8892778158187866, "learning_rate": 2e-05, "loss": 0.0230247, "step": 7740 }, { "epoch": 15.482, "grad_norm": 1.766182541847229, "learning_rate": 2e-05, "loss": 0.04481532, "step": 7741 }, { "epoch": 15.484, "grad_norm": 1.1637574434280396, "learning_rate": 2e-05, "loss": 0.03531402, "step": 7742 }, { "epoch": 15.486, "grad_norm": 1.1224381923675537, "learning_rate": 2e-05, "loss": 0.04462992, "step": 7743 }, { "epoch": 15.488, "grad_norm": 1.7183787822723389, "learning_rate": 2e-05, "loss": 0.04551856, "step": 7744 }, { "epoch": 15.49, "grad_norm": 1.1579662561416626, "learning_rate": 2e-05, "loss": 0.03555235, "step": 7745 }, { "epoch": 15.492, "grad_norm": 0.9623416662216187, "learning_rate": 2e-05, "loss": 0.03363761, "step": 7746 }, { "epoch": 15.494, "grad_norm": 1.427245020866394, "learning_rate": 2e-05, "loss": 0.0442277, "step": 7747 }, { "epoch": 15.496, "grad_norm": 0.8991954326629639, "learning_rate": 2e-05, "loss": 0.02649901, "step": 7748 }, { "epoch": 15.498, "grad_norm": 1.0670665502548218, "learning_rate": 2e-05, "loss": 0.04006524, "step": 7749 }, { "epoch": 15.5, "grad_norm": 0.9669760465621948, "learning_rate": 2e-05, "loss": 0.04074419, "step": 7750 }, { "epoch": 15.502, "grad_norm": 1.112985372543335, "learning_rate": 2e-05, "loss": 0.03872652, "step": 7751 }, { "epoch": 15.504, "grad_norm": 1.3237699270248413, "learning_rate": 2e-05, "loss": 0.03350469, "step": 7752 }, { "epoch": 15.506, "grad_norm": 1.0669316053390503, "learning_rate": 2e-05, "loss": 0.03404175, "step": 7753 }, { "epoch": 15.508, "grad_norm": 1.0441585779190063, "learning_rate": 2e-05, "loss": 0.03397904, "step": 7754 }, { "epoch": 15.51, "grad_norm": 1.191551685333252, "learning_rate": 2e-05, "loss": 0.03678114, "step": 7755 }, { "epoch": 15.512, "grad_norm": 1.4069856405258179, "learning_rate": 2e-05, "loss": 0.03311434, "step": 7756 }, { "epoch": 15.514, "grad_norm": 1.086069941520691, "learning_rate": 2e-05, "loss": 0.03558622, "step": 7757 }, { "epoch": 15.516, "grad_norm": 0.9918950796127319, "learning_rate": 2e-05, "loss": 0.03249758, "step": 7758 }, { "epoch": 15.518, "grad_norm": 1.2385040521621704, "learning_rate": 2e-05, "loss": 0.04162885, "step": 7759 }, { "epoch": 15.52, "grad_norm": 1.160630226135254, "learning_rate": 2e-05, "loss": 0.05510244, "step": 7760 }, { "epoch": 15.522, "grad_norm": 1.53986394405365, "learning_rate": 2e-05, "loss": 0.04068907, "step": 7761 }, { "epoch": 15.524000000000001, "grad_norm": 1.2052150964736938, "learning_rate": 2e-05, "loss": 0.03750294, "step": 7762 }, { "epoch": 15.526, "grad_norm": 1.2036486864089966, "learning_rate": 2e-05, "loss": 0.04060003, "step": 7763 }, { "epoch": 15.528, "grad_norm": 1.5752509832382202, "learning_rate": 2e-05, "loss": 0.04484061, "step": 7764 }, { "epoch": 15.53, "grad_norm": 1.4314789772033691, "learning_rate": 2e-05, "loss": 0.04542669, "step": 7765 }, { "epoch": 15.532, "grad_norm": 1.3356164693832397, "learning_rate": 2e-05, "loss": 0.04883364, "step": 7766 }, { "epoch": 15.534, "grad_norm": 1.765552282333374, "learning_rate": 2e-05, "loss": 0.03921126, "step": 7767 }, { "epoch": 15.536, "grad_norm": 1.3386117219924927, "learning_rate": 2e-05, "loss": 0.03825998, "step": 7768 }, { "epoch": 15.538, "grad_norm": 1.4762791395187378, "learning_rate": 2e-05, "loss": 0.03431319, "step": 7769 }, { "epoch": 15.54, "grad_norm": 1.317010760307312, "learning_rate": 2e-05, "loss": 0.0385484, "step": 7770 }, { "epoch": 15.542, "grad_norm": 0.8216134309768677, "learning_rate": 2e-05, "loss": 0.02420896, "step": 7771 }, { "epoch": 15.544, "grad_norm": 1.2794997692108154, "learning_rate": 2e-05, "loss": 0.05096934, "step": 7772 }, { "epoch": 15.546, "grad_norm": 1.6122490167617798, "learning_rate": 2e-05, "loss": 0.06559946, "step": 7773 }, { "epoch": 15.548, "grad_norm": 2.614630937576294, "learning_rate": 2e-05, "loss": 0.05338411, "step": 7774 }, { "epoch": 15.55, "grad_norm": 1.1659183502197266, "learning_rate": 2e-05, "loss": 0.04944208, "step": 7775 }, { "epoch": 15.552, "grad_norm": 1.2592909336090088, "learning_rate": 2e-05, "loss": 0.03840531, "step": 7776 }, { "epoch": 15.554, "grad_norm": 1.0753098726272583, "learning_rate": 2e-05, "loss": 0.03585375, "step": 7777 }, { "epoch": 15.556000000000001, "grad_norm": 1.7121466398239136, "learning_rate": 2e-05, "loss": 0.04599754, "step": 7778 }, { "epoch": 15.558, "grad_norm": 1.4254724979400635, "learning_rate": 2e-05, "loss": 0.02906418, "step": 7779 }, { "epoch": 15.56, "grad_norm": 1.1564379930496216, "learning_rate": 2e-05, "loss": 0.06452915, "step": 7780 }, { "epoch": 15.562, "grad_norm": 1.134081482887268, "learning_rate": 2e-05, "loss": 0.04153468, "step": 7781 }, { "epoch": 15.564, "grad_norm": 0.8990920782089233, "learning_rate": 2e-05, "loss": 0.02347866, "step": 7782 }, { "epoch": 15.566, "grad_norm": 1.2137106657028198, "learning_rate": 2e-05, "loss": 0.04685804, "step": 7783 }, { "epoch": 15.568, "grad_norm": 1.5330703258514404, "learning_rate": 2e-05, "loss": 0.04677338, "step": 7784 }, { "epoch": 15.57, "grad_norm": 1.2445857524871826, "learning_rate": 2e-05, "loss": 0.04343664, "step": 7785 }, { "epoch": 15.572, "grad_norm": 0.970074474811554, "learning_rate": 2e-05, "loss": 0.0376314, "step": 7786 }, { "epoch": 15.574, "grad_norm": 1.5463167428970337, "learning_rate": 2e-05, "loss": 0.04986656, "step": 7787 }, { "epoch": 15.576, "grad_norm": 0.8951558470726013, "learning_rate": 2e-05, "loss": 0.02550288, "step": 7788 }, { "epoch": 15.578, "grad_norm": 1.0044795274734497, "learning_rate": 2e-05, "loss": 0.03512788, "step": 7789 }, { "epoch": 15.58, "grad_norm": 1.1778446435928345, "learning_rate": 2e-05, "loss": 0.03681631, "step": 7790 }, { "epoch": 15.582, "grad_norm": 1.3863009214401245, "learning_rate": 2e-05, "loss": 0.03972707, "step": 7791 }, { "epoch": 15.584, "grad_norm": 1.0235799551010132, "learning_rate": 2e-05, "loss": 0.03635886, "step": 7792 }, { "epoch": 15.586, "grad_norm": 1.3906949758529663, "learning_rate": 2e-05, "loss": 0.03971368, "step": 7793 }, { "epoch": 15.588, "grad_norm": 1.0922150611877441, "learning_rate": 2e-05, "loss": 0.03174236, "step": 7794 }, { "epoch": 15.59, "grad_norm": 1.214142918586731, "learning_rate": 2e-05, "loss": 0.05460586, "step": 7795 }, { "epoch": 15.592, "grad_norm": 1.5496350526809692, "learning_rate": 2e-05, "loss": 0.03093884, "step": 7796 }, { "epoch": 15.594, "grad_norm": 1.126539707183838, "learning_rate": 2e-05, "loss": 0.04682115, "step": 7797 }, { "epoch": 15.596, "grad_norm": 1.6726691722869873, "learning_rate": 2e-05, "loss": 0.0424552, "step": 7798 }, { "epoch": 15.598, "grad_norm": 1.787841796875, "learning_rate": 2e-05, "loss": 0.04241154, "step": 7799 }, { "epoch": 15.6, "grad_norm": 1.0305230617523193, "learning_rate": 2e-05, "loss": 0.02969407, "step": 7800 }, { "epoch": 15.602, "grad_norm": 1.0762102603912354, "learning_rate": 2e-05, "loss": 0.04477984, "step": 7801 }, { "epoch": 15.604, "grad_norm": 0.9955341219902039, "learning_rate": 2e-05, "loss": 0.03920032, "step": 7802 }, { "epoch": 15.606, "grad_norm": 1.0392699241638184, "learning_rate": 2e-05, "loss": 0.03647071, "step": 7803 }, { "epoch": 15.608, "grad_norm": 1.2719612121582031, "learning_rate": 2e-05, "loss": 0.0349829, "step": 7804 }, { "epoch": 15.61, "grad_norm": 1.1297434568405151, "learning_rate": 2e-05, "loss": 0.03863156, "step": 7805 }, { "epoch": 15.612, "grad_norm": 1.6058623790740967, "learning_rate": 2e-05, "loss": 0.03482516, "step": 7806 }, { "epoch": 15.614, "grad_norm": 2.3734793663024902, "learning_rate": 2e-05, "loss": 0.04142359, "step": 7807 }, { "epoch": 15.616, "grad_norm": 0.9767808318138123, "learning_rate": 2e-05, "loss": 0.0383327, "step": 7808 }, { "epoch": 15.618, "grad_norm": 1.6942983865737915, "learning_rate": 2e-05, "loss": 0.0472929, "step": 7809 }, { "epoch": 15.62, "grad_norm": 1.1018776893615723, "learning_rate": 2e-05, "loss": 0.03809138, "step": 7810 }, { "epoch": 15.622, "grad_norm": 0.9520467519760132, "learning_rate": 2e-05, "loss": 0.03386376, "step": 7811 }, { "epoch": 15.624, "grad_norm": 1.1687006950378418, "learning_rate": 2e-05, "loss": 0.04521569, "step": 7812 }, { "epoch": 15.626, "grad_norm": 1.074813961982727, "learning_rate": 2e-05, "loss": 0.03068561, "step": 7813 }, { "epoch": 15.628, "grad_norm": 1.6634571552276611, "learning_rate": 2e-05, "loss": 0.06002465, "step": 7814 }, { "epoch": 15.63, "grad_norm": 1.3967303037643433, "learning_rate": 2e-05, "loss": 0.04519261, "step": 7815 }, { "epoch": 15.632, "grad_norm": 1.5178707838058472, "learning_rate": 2e-05, "loss": 0.03018497, "step": 7816 }, { "epoch": 15.634, "grad_norm": 1.25950026512146, "learning_rate": 2e-05, "loss": 0.03467363, "step": 7817 }, { "epoch": 15.636, "grad_norm": 0.9524564146995544, "learning_rate": 2e-05, "loss": 0.04067912, "step": 7818 }, { "epoch": 15.638, "grad_norm": 0.869853138923645, "learning_rate": 2e-05, "loss": 0.03137665, "step": 7819 }, { "epoch": 15.64, "grad_norm": 1.2985329627990723, "learning_rate": 2e-05, "loss": 0.04355665, "step": 7820 }, { "epoch": 15.642, "grad_norm": 1.6391212940216064, "learning_rate": 2e-05, "loss": 0.06307023, "step": 7821 }, { "epoch": 15.644, "grad_norm": 1.1266062259674072, "learning_rate": 2e-05, "loss": 0.03864892, "step": 7822 }, { "epoch": 15.646, "grad_norm": 1.741550326347351, "learning_rate": 2e-05, "loss": 0.06039545, "step": 7823 }, { "epoch": 15.648, "grad_norm": 1.5423344373703003, "learning_rate": 2e-05, "loss": 0.04814845, "step": 7824 }, { "epoch": 15.65, "grad_norm": 1.0673892498016357, "learning_rate": 2e-05, "loss": 0.0428374, "step": 7825 }, { "epoch": 15.652, "grad_norm": 0.904364824295044, "learning_rate": 2e-05, "loss": 0.03052113, "step": 7826 }, { "epoch": 15.654, "grad_norm": 0.9480156302452087, "learning_rate": 2e-05, "loss": 0.03563298, "step": 7827 }, { "epoch": 15.656, "grad_norm": 1.5284701585769653, "learning_rate": 2e-05, "loss": 0.04021053, "step": 7828 }, { "epoch": 15.658, "grad_norm": 2.147240161895752, "learning_rate": 2e-05, "loss": 0.04992049, "step": 7829 }, { "epoch": 15.66, "grad_norm": 1.4739441871643066, "learning_rate": 2e-05, "loss": 0.04327197, "step": 7830 }, { "epoch": 15.662, "grad_norm": 0.9157878160476685, "learning_rate": 2e-05, "loss": 0.03195861, "step": 7831 }, { "epoch": 15.664, "grad_norm": 1.4952819347381592, "learning_rate": 2e-05, "loss": 0.04057154, "step": 7832 }, { "epoch": 15.666, "grad_norm": 1.425283432006836, "learning_rate": 2e-05, "loss": 0.03624725, "step": 7833 }, { "epoch": 15.668, "grad_norm": 1.0445493459701538, "learning_rate": 2e-05, "loss": 0.037708, "step": 7834 }, { "epoch": 15.67, "grad_norm": 1.6078704595565796, "learning_rate": 2e-05, "loss": 0.04818354, "step": 7835 }, { "epoch": 15.672, "grad_norm": 1.5199404954910278, "learning_rate": 2e-05, "loss": 0.02817157, "step": 7836 }, { "epoch": 15.674, "grad_norm": 1.0294718742370605, "learning_rate": 2e-05, "loss": 0.02661425, "step": 7837 }, { "epoch": 15.676, "grad_norm": 1.0648747682571411, "learning_rate": 2e-05, "loss": 0.03399039, "step": 7838 }, { "epoch": 15.678, "grad_norm": 0.7924081683158875, "learning_rate": 2e-05, "loss": 0.02621391, "step": 7839 }, { "epoch": 15.68, "grad_norm": 1.2964757680892944, "learning_rate": 2e-05, "loss": 0.0436234, "step": 7840 }, { "epoch": 15.682, "grad_norm": 1.3265763521194458, "learning_rate": 2e-05, "loss": 0.04007149, "step": 7841 }, { "epoch": 15.684, "grad_norm": 2.019439935684204, "learning_rate": 2e-05, "loss": 0.05867039, "step": 7842 }, { "epoch": 15.686, "grad_norm": 1.2825301885604858, "learning_rate": 2e-05, "loss": 0.04822504, "step": 7843 }, { "epoch": 15.688, "grad_norm": 1.897964358329773, "learning_rate": 2e-05, "loss": 0.03815127, "step": 7844 }, { "epoch": 15.69, "grad_norm": 0.8126182556152344, "learning_rate": 2e-05, "loss": 0.0339037, "step": 7845 }, { "epoch": 15.692, "grad_norm": 1.4083561897277832, "learning_rate": 2e-05, "loss": 0.05471381, "step": 7846 }, { "epoch": 15.693999999999999, "grad_norm": 0.8566709756851196, "learning_rate": 2e-05, "loss": 0.02002315, "step": 7847 }, { "epoch": 15.696, "grad_norm": 0.8835247159004211, "learning_rate": 2e-05, "loss": 0.03178211, "step": 7848 }, { "epoch": 15.698, "grad_norm": 0.9253415465354919, "learning_rate": 2e-05, "loss": 0.03928689, "step": 7849 }, { "epoch": 15.7, "grad_norm": 0.9615568518638611, "learning_rate": 2e-05, "loss": 0.04066771, "step": 7850 }, { "epoch": 15.702, "grad_norm": 1.516654372215271, "learning_rate": 2e-05, "loss": 0.03212117, "step": 7851 }, { "epoch": 15.704, "grad_norm": 1.6420469284057617, "learning_rate": 2e-05, "loss": 0.04172636, "step": 7852 }, { "epoch": 15.706, "grad_norm": 1.3478672504425049, "learning_rate": 2e-05, "loss": 0.04686579, "step": 7853 }, { "epoch": 15.708, "grad_norm": 1.3303403854370117, "learning_rate": 2e-05, "loss": 0.04044079, "step": 7854 }, { "epoch": 15.71, "grad_norm": 1.8347476720809937, "learning_rate": 2e-05, "loss": 0.05151783, "step": 7855 }, { "epoch": 15.712, "grad_norm": 1.1154577732086182, "learning_rate": 2e-05, "loss": 0.02915175, "step": 7856 }, { "epoch": 15.714, "grad_norm": 0.8913232088088989, "learning_rate": 2e-05, "loss": 0.03567526, "step": 7857 }, { "epoch": 15.716, "grad_norm": 1.7599512338638306, "learning_rate": 2e-05, "loss": 0.0343769, "step": 7858 }, { "epoch": 15.718, "grad_norm": 0.9211016297340393, "learning_rate": 2e-05, "loss": 0.02759037, "step": 7859 }, { "epoch": 15.72, "grad_norm": 1.878779649734497, "learning_rate": 2e-05, "loss": 0.04826758, "step": 7860 }, { "epoch": 15.722, "grad_norm": 1.4636439085006714, "learning_rate": 2e-05, "loss": 0.04096328, "step": 7861 }, { "epoch": 15.724, "grad_norm": 0.9986635446548462, "learning_rate": 2e-05, "loss": 0.03688172, "step": 7862 }, { "epoch": 15.725999999999999, "grad_norm": 0.9908128380775452, "learning_rate": 2e-05, "loss": 0.03275237, "step": 7863 }, { "epoch": 15.728, "grad_norm": 1.918545126914978, "learning_rate": 2e-05, "loss": 0.03817414, "step": 7864 }, { "epoch": 15.73, "grad_norm": 3.8011674880981445, "learning_rate": 2e-05, "loss": 0.05294338, "step": 7865 }, { "epoch": 15.732, "grad_norm": 1.147255301475525, "learning_rate": 2e-05, "loss": 0.03717214, "step": 7866 }, { "epoch": 15.734, "grad_norm": 0.9382572174072266, "learning_rate": 2e-05, "loss": 0.03351625, "step": 7867 }, { "epoch": 15.736, "grad_norm": 1.2691545486450195, "learning_rate": 2e-05, "loss": 0.04154573, "step": 7868 }, { "epoch": 15.738, "grad_norm": 2.889281988143921, "learning_rate": 2e-05, "loss": 0.03962121, "step": 7869 }, { "epoch": 15.74, "grad_norm": 1.1248325109481812, "learning_rate": 2e-05, "loss": 0.03695472, "step": 7870 }, { "epoch": 15.742, "grad_norm": 0.8429155349731445, "learning_rate": 2e-05, "loss": 0.02736717, "step": 7871 }, { "epoch": 15.744, "grad_norm": 1.1789798736572266, "learning_rate": 2e-05, "loss": 0.04046225, "step": 7872 }, { "epoch": 15.746, "grad_norm": 1.5250542163848877, "learning_rate": 2e-05, "loss": 0.04735745, "step": 7873 }, { "epoch": 15.748, "grad_norm": 1.8450084924697876, "learning_rate": 2e-05, "loss": 0.05485067, "step": 7874 }, { "epoch": 15.75, "grad_norm": 3.0329360961914062, "learning_rate": 2e-05, "loss": 0.06134909, "step": 7875 }, { "epoch": 15.752, "grad_norm": 1.1295710802078247, "learning_rate": 2e-05, "loss": 0.04350587, "step": 7876 }, { "epoch": 15.754, "grad_norm": 2.126436471939087, "learning_rate": 2e-05, "loss": 0.05210446, "step": 7877 }, { "epoch": 15.756, "grad_norm": 1.1575413942337036, "learning_rate": 2e-05, "loss": 0.03410969, "step": 7878 }, { "epoch": 15.758, "grad_norm": 0.9184319972991943, "learning_rate": 2e-05, "loss": 0.03023987, "step": 7879 }, { "epoch": 15.76, "grad_norm": 1.588777780532837, "learning_rate": 2e-05, "loss": 0.0422967, "step": 7880 }, { "epoch": 15.762, "grad_norm": 1.2701281309127808, "learning_rate": 2e-05, "loss": 0.04782207, "step": 7881 }, { "epoch": 15.764, "grad_norm": 1.8854856491088867, "learning_rate": 2e-05, "loss": 0.05017621, "step": 7882 }, { "epoch": 15.766, "grad_norm": 1.053436040878296, "learning_rate": 2e-05, "loss": 0.0406818, "step": 7883 }, { "epoch": 15.768, "grad_norm": 1.8011058568954468, "learning_rate": 2e-05, "loss": 0.06167689, "step": 7884 }, { "epoch": 15.77, "grad_norm": 1.2609344720840454, "learning_rate": 2e-05, "loss": 0.04231767, "step": 7885 }, { "epoch": 15.772, "grad_norm": 2.656273365020752, "learning_rate": 2e-05, "loss": 0.05135484, "step": 7886 }, { "epoch": 15.774000000000001, "grad_norm": 1.0233291387557983, "learning_rate": 2e-05, "loss": 0.03619993, "step": 7887 }, { "epoch": 15.776, "grad_norm": 1.1752357482910156, "learning_rate": 2e-05, "loss": 0.0331499, "step": 7888 }, { "epoch": 15.778, "grad_norm": 1.099245548248291, "learning_rate": 2e-05, "loss": 0.03570152, "step": 7889 }, { "epoch": 15.78, "grad_norm": 1.6190221309661865, "learning_rate": 2e-05, "loss": 0.04851193, "step": 7890 }, { "epoch": 15.782, "grad_norm": 5.613228797912598, "learning_rate": 2e-05, "loss": 0.03546342, "step": 7891 }, { "epoch": 15.784, "grad_norm": 1.6386754512786865, "learning_rate": 2e-05, "loss": 0.04636355, "step": 7892 }, { "epoch": 15.786, "grad_norm": 1.0663293600082397, "learning_rate": 2e-05, "loss": 0.04427087, "step": 7893 }, { "epoch": 15.788, "grad_norm": 4.527882099151611, "learning_rate": 2e-05, "loss": 0.03295729, "step": 7894 }, { "epoch": 15.79, "grad_norm": 1.9447163343429565, "learning_rate": 2e-05, "loss": 0.05903506, "step": 7895 }, { "epoch": 15.792, "grad_norm": 1.5174610614776611, "learning_rate": 2e-05, "loss": 0.04814832, "step": 7896 }, { "epoch": 15.794, "grad_norm": 1.3773410320281982, "learning_rate": 2e-05, "loss": 0.05088367, "step": 7897 }, { "epoch": 15.796, "grad_norm": 1.1215673685073853, "learning_rate": 2e-05, "loss": 0.02210806, "step": 7898 }, { "epoch": 15.798, "grad_norm": 1.004719853401184, "learning_rate": 2e-05, "loss": 0.03888254, "step": 7899 }, { "epoch": 15.8, "grad_norm": 1.546207070350647, "learning_rate": 2e-05, "loss": 0.041657, "step": 7900 }, { "epoch": 15.802, "grad_norm": 1.0606039762496948, "learning_rate": 2e-05, "loss": 0.0301213, "step": 7901 }, { "epoch": 15.804, "grad_norm": 1.0109505653381348, "learning_rate": 2e-05, "loss": 0.03798253, "step": 7902 }, { "epoch": 15.806000000000001, "grad_norm": 1.4582864046096802, "learning_rate": 2e-05, "loss": 0.04615562, "step": 7903 }, { "epoch": 15.808, "grad_norm": 1.4145830869674683, "learning_rate": 2e-05, "loss": 0.03849223, "step": 7904 }, { "epoch": 15.81, "grad_norm": 0.8847577571868896, "learning_rate": 2e-05, "loss": 0.03191437, "step": 7905 }, { "epoch": 15.812, "grad_norm": 1.6212092638015747, "learning_rate": 2e-05, "loss": 0.04598674, "step": 7906 }, { "epoch": 15.814, "grad_norm": 1.573914647102356, "learning_rate": 2e-05, "loss": 0.03930136, "step": 7907 }, { "epoch": 15.816, "grad_norm": 2.4983999729156494, "learning_rate": 2e-05, "loss": 0.03371926, "step": 7908 }, { "epoch": 15.818, "grad_norm": 1.5313496589660645, "learning_rate": 2e-05, "loss": 0.03282915, "step": 7909 }, { "epoch": 15.82, "grad_norm": 1.246812343597412, "learning_rate": 2e-05, "loss": 0.03869613, "step": 7910 }, { "epoch": 15.822, "grad_norm": 1.3490315675735474, "learning_rate": 2e-05, "loss": 0.04502165, "step": 7911 }, { "epoch": 15.824, "grad_norm": 1.5969985723495483, "learning_rate": 2e-05, "loss": 0.05496085, "step": 7912 }, { "epoch": 15.826, "grad_norm": 2.36088228225708, "learning_rate": 2e-05, "loss": 0.03794518, "step": 7913 }, { "epoch": 15.828, "grad_norm": 1.6516250371932983, "learning_rate": 2e-05, "loss": 0.04537345, "step": 7914 }, { "epoch": 15.83, "grad_norm": 1.5622444152832031, "learning_rate": 2e-05, "loss": 0.04535617, "step": 7915 }, { "epoch": 15.832, "grad_norm": 0.8911896347999573, "learning_rate": 2e-05, "loss": 0.03232848, "step": 7916 }, { "epoch": 15.834, "grad_norm": 1.075323224067688, "learning_rate": 2e-05, "loss": 0.03736228, "step": 7917 }, { "epoch": 15.836, "grad_norm": 1.2276790142059326, "learning_rate": 2e-05, "loss": 0.03339919, "step": 7918 }, { "epoch": 15.838, "grad_norm": 1.595897912979126, "learning_rate": 2e-05, "loss": 0.04447206, "step": 7919 }, { "epoch": 15.84, "grad_norm": 1.2562133073806763, "learning_rate": 2e-05, "loss": 0.04073772, "step": 7920 }, { "epoch": 15.842, "grad_norm": 1.1153814792633057, "learning_rate": 2e-05, "loss": 0.03090102, "step": 7921 }, { "epoch": 15.844, "grad_norm": 1.4374891519546509, "learning_rate": 2e-05, "loss": 0.03741089, "step": 7922 }, { "epoch": 15.846, "grad_norm": 0.8877723217010498, "learning_rate": 2e-05, "loss": 0.0321798, "step": 7923 }, { "epoch": 15.848, "grad_norm": 1.107550024986267, "learning_rate": 2e-05, "loss": 0.03546412, "step": 7924 }, { "epoch": 15.85, "grad_norm": 1.7710790634155273, "learning_rate": 2e-05, "loss": 0.03670359, "step": 7925 }, { "epoch": 15.852, "grad_norm": 1.2706103324890137, "learning_rate": 2e-05, "loss": 0.04913092, "step": 7926 }, { "epoch": 15.854, "grad_norm": 1.1824009418487549, "learning_rate": 2e-05, "loss": 0.03691457, "step": 7927 }, { "epoch": 15.856, "grad_norm": 1.2337533235549927, "learning_rate": 2e-05, "loss": 0.04146959, "step": 7928 }, { "epoch": 15.858, "grad_norm": 0.8419338464736938, "learning_rate": 2e-05, "loss": 0.03086981, "step": 7929 }, { "epoch": 15.86, "grad_norm": 1.4547673463821411, "learning_rate": 2e-05, "loss": 0.04086094, "step": 7930 }, { "epoch": 15.862, "grad_norm": 1.0149017572402954, "learning_rate": 2e-05, "loss": 0.03698645, "step": 7931 }, { "epoch": 15.864, "grad_norm": 1.2069945335388184, "learning_rate": 2e-05, "loss": 0.04774645, "step": 7932 }, { "epoch": 15.866, "grad_norm": 1.2200182676315308, "learning_rate": 2e-05, "loss": 0.03306384, "step": 7933 }, { "epoch": 15.868, "grad_norm": 0.8806574940681458, "learning_rate": 2e-05, "loss": 0.03241273, "step": 7934 }, { "epoch": 15.87, "grad_norm": 1.6967767477035522, "learning_rate": 2e-05, "loss": 0.04194974, "step": 7935 }, { "epoch": 15.872, "grad_norm": 1.4084641933441162, "learning_rate": 2e-05, "loss": 0.04042656, "step": 7936 }, { "epoch": 15.874, "grad_norm": 1.2383458614349365, "learning_rate": 2e-05, "loss": 0.04217947, "step": 7937 }, { "epoch": 15.876, "grad_norm": 3.609055995941162, "learning_rate": 2e-05, "loss": 0.03822013, "step": 7938 }, { "epoch": 15.878, "grad_norm": 1.2390830516815186, "learning_rate": 2e-05, "loss": 0.04104143, "step": 7939 }, { "epoch": 15.88, "grad_norm": 1.4262648820877075, "learning_rate": 2e-05, "loss": 0.04152397, "step": 7940 }, { "epoch": 15.882, "grad_norm": 1.27792227268219, "learning_rate": 2e-05, "loss": 0.04371581, "step": 7941 }, { "epoch": 15.884, "grad_norm": 0.9439547061920166, "learning_rate": 2e-05, "loss": 0.02535809, "step": 7942 }, { "epoch": 15.886, "grad_norm": 1.5940001010894775, "learning_rate": 2e-05, "loss": 0.05881022, "step": 7943 }, { "epoch": 15.888, "grad_norm": 1.0834542512893677, "learning_rate": 2e-05, "loss": 0.028259, "step": 7944 }, { "epoch": 15.89, "grad_norm": 2.248506784439087, "learning_rate": 2e-05, "loss": 0.06713785, "step": 7945 }, { "epoch": 15.892, "grad_norm": 1.2265493869781494, "learning_rate": 2e-05, "loss": 0.04777764, "step": 7946 }, { "epoch": 15.894, "grad_norm": 0.9338040351867676, "learning_rate": 2e-05, "loss": 0.03433283, "step": 7947 }, { "epoch": 15.896, "grad_norm": 1.1235183477401733, "learning_rate": 2e-05, "loss": 0.03330047, "step": 7948 }, { "epoch": 15.898, "grad_norm": 1.0070992708206177, "learning_rate": 2e-05, "loss": 0.03542108, "step": 7949 }, { "epoch": 15.9, "grad_norm": 0.8756537437438965, "learning_rate": 2e-05, "loss": 0.03316933, "step": 7950 }, { "epoch": 15.902, "grad_norm": 1.609824538230896, "learning_rate": 2e-05, "loss": 0.04423177, "step": 7951 }, { "epoch": 15.904, "grad_norm": 1.4646433591842651, "learning_rate": 2e-05, "loss": 0.05716739, "step": 7952 }, { "epoch": 15.906, "grad_norm": 1.1899229288101196, "learning_rate": 2e-05, "loss": 0.03317807, "step": 7953 }, { "epoch": 15.908, "grad_norm": 1.2386949062347412, "learning_rate": 2e-05, "loss": 0.03982987, "step": 7954 }, { "epoch": 15.91, "grad_norm": 1.570691704750061, "learning_rate": 2e-05, "loss": 0.04921625, "step": 7955 }, { "epoch": 15.912, "grad_norm": 0.6894180178642273, "learning_rate": 2e-05, "loss": 0.02395061, "step": 7956 }, { "epoch": 15.914, "grad_norm": 0.9492577314376831, "learning_rate": 2e-05, "loss": 0.03679375, "step": 7957 }, { "epoch": 15.916, "grad_norm": 1.4363038539886475, "learning_rate": 2e-05, "loss": 0.04188465, "step": 7958 }, { "epoch": 15.918, "grad_norm": 1.5397295951843262, "learning_rate": 2e-05, "loss": 0.06281649, "step": 7959 }, { "epoch": 15.92, "grad_norm": 2.3833301067352295, "learning_rate": 2e-05, "loss": 0.02980723, "step": 7960 }, { "epoch": 15.922, "grad_norm": 0.883203387260437, "learning_rate": 2e-05, "loss": 0.04569215, "step": 7961 }, { "epoch": 15.924, "grad_norm": 1.0678246021270752, "learning_rate": 2e-05, "loss": 0.03685597, "step": 7962 }, { "epoch": 15.926, "grad_norm": 1.7394555807113647, "learning_rate": 2e-05, "loss": 0.03072222, "step": 7963 }, { "epoch": 15.928, "grad_norm": 1.0772579908370972, "learning_rate": 2e-05, "loss": 0.0308566, "step": 7964 }, { "epoch": 15.93, "grad_norm": 1.9518120288848877, "learning_rate": 2e-05, "loss": 0.04014736, "step": 7965 }, { "epoch": 15.932, "grad_norm": 1.169349193572998, "learning_rate": 2e-05, "loss": 0.04369571, "step": 7966 }, { "epoch": 15.934, "grad_norm": 0.7779093384742737, "learning_rate": 2e-05, "loss": 0.02372968, "step": 7967 }, { "epoch": 15.936, "grad_norm": 1.4448097944259644, "learning_rate": 2e-05, "loss": 0.05406664, "step": 7968 }, { "epoch": 15.938, "grad_norm": 1.6183838844299316, "learning_rate": 2e-05, "loss": 0.04816063, "step": 7969 }, { "epoch": 15.94, "grad_norm": 1.1067332029342651, "learning_rate": 2e-05, "loss": 0.03822577, "step": 7970 }, { "epoch": 15.942, "grad_norm": 1.0273240804672241, "learning_rate": 2e-05, "loss": 0.03934061, "step": 7971 }, { "epoch": 15.943999999999999, "grad_norm": 1.1367223262786865, "learning_rate": 2e-05, "loss": 0.02649461, "step": 7972 }, { "epoch": 15.946, "grad_norm": 1.181577205657959, "learning_rate": 2e-05, "loss": 0.03041214, "step": 7973 }, { "epoch": 15.948, "grad_norm": 1.11647629737854, "learning_rate": 2e-05, "loss": 0.04945065, "step": 7974 }, { "epoch": 15.95, "grad_norm": 1.1161892414093018, "learning_rate": 2e-05, "loss": 0.0392701, "step": 7975 }, { "epoch": 15.952, "grad_norm": 1.4086583852767944, "learning_rate": 2e-05, "loss": 0.04204567, "step": 7976 }, { "epoch": 15.954, "grad_norm": 1.4107561111450195, "learning_rate": 2e-05, "loss": 0.02596482, "step": 7977 }, { "epoch": 15.956, "grad_norm": 0.8716321587562561, "learning_rate": 2e-05, "loss": 0.03079114, "step": 7978 }, { "epoch": 15.958, "grad_norm": 1.027134656906128, "learning_rate": 2e-05, "loss": 0.03699373, "step": 7979 }, { "epoch": 15.96, "grad_norm": 1.266416072845459, "learning_rate": 2e-05, "loss": 0.04633315, "step": 7980 }, { "epoch": 15.962, "grad_norm": 0.822369396686554, "learning_rate": 2e-05, "loss": 0.02923862, "step": 7981 }, { "epoch": 15.964, "grad_norm": 1.3281052112579346, "learning_rate": 2e-05, "loss": 0.04442987, "step": 7982 }, { "epoch": 15.966, "grad_norm": 6.2168474197387695, "learning_rate": 2e-05, "loss": 0.06364525, "step": 7983 }, { "epoch": 15.968, "grad_norm": 1.0501468181610107, "learning_rate": 2e-05, "loss": 0.04018227, "step": 7984 }, { "epoch": 15.97, "grad_norm": 1.1146767139434814, "learning_rate": 2e-05, "loss": 0.03635713, "step": 7985 }, { "epoch": 15.972, "grad_norm": 1.3646762371063232, "learning_rate": 2e-05, "loss": 0.04565104, "step": 7986 }, { "epoch": 15.974, "grad_norm": 1.211958646774292, "learning_rate": 2e-05, "loss": 0.04290543, "step": 7987 }, { "epoch": 15.975999999999999, "grad_norm": 0.8478017449378967, "learning_rate": 2e-05, "loss": 0.02894137, "step": 7988 }, { "epoch": 15.978, "grad_norm": 1.4163686037063599, "learning_rate": 2e-05, "loss": 0.05084491, "step": 7989 }, { "epoch": 15.98, "grad_norm": 1.583600401878357, "learning_rate": 2e-05, "loss": 0.04655541, "step": 7990 }, { "epoch": 15.982, "grad_norm": 1.109586477279663, "learning_rate": 2e-05, "loss": 0.03833565, "step": 7991 }, { "epoch": 15.984, "grad_norm": 1.1122475862503052, "learning_rate": 2e-05, "loss": 0.03135904, "step": 7992 }, { "epoch": 15.986, "grad_norm": 1.5063152313232422, "learning_rate": 2e-05, "loss": 0.04342312, "step": 7993 }, { "epoch": 15.988, "grad_norm": 1.3837445974349976, "learning_rate": 2e-05, "loss": 0.04920746, "step": 7994 }, { "epoch": 15.99, "grad_norm": 1.5701265335083008, "learning_rate": 2e-05, "loss": 0.04522835, "step": 7995 }, { "epoch": 15.992, "grad_norm": 0.8673783540725708, "learning_rate": 2e-05, "loss": 0.02733445, "step": 7996 }, { "epoch": 15.994, "grad_norm": 1.2412492036819458, "learning_rate": 2e-05, "loss": 0.04029534, "step": 7997 }, { "epoch": 15.996, "grad_norm": 1.0142319202423096, "learning_rate": 2e-05, "loss": 0.03173064, "step": 7998 }, { "epoch": 15.998, "grad_norm": 1.7063957452774048, "learning_rate": 2e-05, "loss": 0.04368523, "step": 7999 }, { "epoch": 16.0, "grad_norm": 0.9795099496841431, "learning_rate": 2e-05, "loss": 0.03788676, "step": 8000 }, { "epoch": 16.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9740518962075848, "Equal_1": 0.994, "Equal_2": 0.9660678642714571, "Equal_3": 0.9321357285429142, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.968, "Perpendicular_1": 0.994, "Perpendicular_2": 0.986, "Perpendicular_3": 0.751503006012024, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9928666666666667, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9700598802395209 }, "eval_runtime": 226.4764, "eval_samples_per_second": 46.362, "eval_steps_per_second": 0.927, "step": 8000 }, { "epoch": 16.002, "grad_norm": 2.4531359672546387, "learning_rate": 2e-05, "loss": 0.03846169, "step": 8001 }, { "epoch": 16.004, "grad_norm": 1.5599803924560547, "learning_rate": 2e-05, "loss": 0.05297387, "step": 8002 }, { "epoch": 16.006, "grad_norm": 1.3853962421417236, "learning_rate": 2e-05, "loss": 0.03354284, "step": 8003 }, { "epoch": 16.008, "grad_norm": 1.2347185611724854, "learning_rate": 2e-05, "loss": 0.0346626, "step": 8004 }, { "epoch": 16.01, "grad_norm": 1.0327732563018799, "learning_rate": 2e-05, "loss": 0.02749027, "step": 8005 }, { "epoch": 16.012, "grad_norm": 1.6354575157165527, "learning_rate": 2e-05, "loss": 0.05818766, "step": 8006 }, { "epoch": 16.014, "grad_norm": 1.3607192039489746, "learning_rate": 2e-05, "loss": 0.04475515, "step": 8007 }, { "epoch": 16.016, "grad_norm": 0.9957086443901062, "learning_rate": 2e-05, "loss": 0.03047884, "step": 8008 }, { "epoch": 16.018, "grad_norm": 1.7068284749984741, "learning_rate": 2e-05, "loss": 0.03341687, "step": 8009 }, { "epoch": 16.02, "grad_norm": 1.6480611562728882, "learning_rate": 2e-05, "loss": 0.05826873, "step": 8010 }, { "epoch": 16.022, "grad_norm": 0.8659833073616028, "learning_rate": 2e-05, "loss": 0.02813331, "step": 8011 }, { "epoch": 16.024, "grad_norm": 1.4187767505645752, "learning_rate": 2e-05, "loss": 0.05168469, "step": 8012 }, { "epoch": 16.026, "grad_norm": 1.1414672136306763, "learning_rate": 2e-05, "loss": 0.04796594, "step": 8013 }, { "epoch": 16.028, "grad_norm": 1.459039330482483, "learning_rate": 2e-05, "loss": 0.04561671, "step": 8014 }, { "epoch": 16.03, "grad_norm": 1.4020471572875977, "learning_rate": 2e-05, "loss": 0.053342, "step": 8015 }, { "epoch": 16.032, "grad_norm": 1.8280372619628906, "learning_rate": 2e-05, "loss": 0.06271575, "step": 8016 }, { "epoch": 16.034, "grad_norm": 1.3474657535552979, "learning_rate": 2e-05, "loss": 0.05314874, "step": 8017 }, { "epoch": 16.036, "grad_norm": 1.7729127407073975, "learning_rate": 2e-05, "loss": 0.03907238, "step": 8018 }, { "epoch": 16.038, "grad_norm": 1.0745488405227661, "learning_rate": 2e-05, "loss": 0.03631027, "step": 8019 }, { "epoch": 16.04, "grad_norm": 1.0704498291015625, "learning_rate": 2e-05, "loss": 0.03062935, "step": 8020 }, { "epoch": 16.042, "grad_norm": 0.7624976634979248, "learning_rate": 2e-05, "loss": 0.02638179, "step": 8021 }, { "epoch": 16.044, "grad_norm": 0.8391230702400208, "learning_rate": 2e-05, "loss": 0.02958067, "step": 8022 }, { "epoch": 16.046, "grad_norm": 1.1238305568695068, "learning_rate": 2e-05, "loss": 0.0421728, "step": 8023 }, { "epoch": 16.048, "grad_norm": 1.3875292539596558, "learning_rate": 2e-05, "loss": 0.03346295, "step": 8024 }, { "epoch": 16.05, "grad_norm": 1.1994253396987915, "learning_rate": 2e-05, "loss": 0.03716969, "step": 8025 }, { "epoch": 16.052, "grad_norm": 1.160305380821228, "learning_rate": 2e-05, "loss": 0.04381779, "step": 8026 }, { "epoch": 16.054, "grad_norm": 1.3147432804107666, "learning_rate": 2e-05, "loss": 0.03322311, "step": 8027 }, { "epoch": 16.056, "grad_norm": 0.9377807974815369, "learning_rate": 2e-05, "loss": 0.03092899, "step": 8028 }, { "epoch": 16.058, "grad_norm": 1.005224347114563, "learning_rate": 2e-05, "loss": 0.03116657, "step": 8029 }, { "epoch": 16.06, "grad_norm": 2.128162384033203, "learning_rate": 2e-05, "loss": 0.06992479, "step": 8030 }, { "epoch": 16.062, "grad_norm": 1.6066184043884277, "learning_rate": 2e-05, "loss": 0.02634419, "step": 8031 }, { "epoch": 16.064, "grad_norm": 1.2034430503845215, "learning_rate": 2e-05, "loss": 0.04791553, "step": 8032 }, { "epoch": 16.066, "grad_norm": 0.6897351741790771, "learning_rate": 2e-05, "loss": 0.02200021, "step": 8033 }, { "epoch": 16.068, "grad_norm": 1.2148480415344238, "learning_rate": 2e-05, "loss": 0.03783594, "step": 8034 }, { "epoch": 16.07, "grad_norm": 0.9366627931594849, "learning_rate": 2e-05, "loss": 0.04056703, "step": 8035 }, { "epoch": 16.072, "grad_norm": 1.1472421884536743, "learning_rate": 2e-05, "loss": 0.03043021, "step": 8036 }, { "epoch": 16.074, "grad_norm": 1.163169264793396, "learning_rate": 2e-05, "loss": 0.03877512, "step": 8037 }, { "epoch": 16.076, "grad_norm": 1.3891630172729492, "learning_rate": 2e-05, "loss": 0.05260683, "step": 8038 }, { "epoch": 16.078, "grad_norm": 1.711998462677002, "learning_rate": 2e-05, "loss": 0.04449131, "step": 8039 }, { "epoch": 16.08, "grad_norm": 1.0719623565673828, "learning_rate": 2e-05, "loss": 0.04482851, "step": 8040 }, { "epoch": 16.082, "grad_norm": 0.9638761281967163, "learning_rate": 2e-05, "loss": 0.03683668, "step": 8041 }, { "epoch": 16.084, "grad_norm": 2.4437239170074463, "learning_rate": 2e-05, "loss": 0.04084236, "step": 8042 }, { "epoch": 16.086, "grad_norm": 0.8303345441818237, "learning_rate": 2e-05, "loss": 0.03043423, "step": 8043 }, { "epoch": 16.088, "grad_norm": 1.1467349529266357, "learning_rate": 2e-05, "loss": 0.04815848, "step": 8044 }, { "epoch": 16.09, "grad_norm": 1.6639044284820557, "learning_rate": 2e-05, "loss": 0.05255146, "step": 8045 }, { "epoch": 16.092, "grad_norm": 0.9227938652038574, "learning_rate": 2e-05, "loss": 0.0421053, "step": 8046 }, { "epoch": 16.094, "grad_norm": 1.4248172044754028, "learning_rate": 2e-05, "loss": 0.0557301, "step": 8047 }, { "epoch": 16.096, "grad_norm": 0.9946959018707275, "learning_rate": 2e-05, "loss": 0.03997692, "step": 8048 }, { "epoch": 16.098, "grad_norm": 1.8189125061035156, "learning_rate": 2e-05, "loss": 0.03714712, "step": 8049 }, { "epoch": 16.1, "grad_norm": 4.282730579376221, "learning_rate": 2e-05, "loss": 0.04182577, "step": 8050 }, { "epoch": 16.102, "grad_norm": 1.4168959856033325, "learning_rate": 2e-05, "loss": 0.05315524, "step": 8051 }, { "epoch": 16.104, "grad_norm": 1.3047174215316772, "learning_rate": 2e-05, "loss": 0.04313055, "step": 8052 }, { "epoch": 16.106, "grad_norm": 0.9676288962364197, "learning_rate": 2e-05, "loss": 0.02951162, "step": 8053 }, { "epoch": 16.108, "grad_norm": 1.0046892166137695, "learning_rate": 2e-05, "loss": 0.03878992, "step": 8054 }, { "epoch": 16.11, "grad_norm": 1.1754720211029053, "learning_rate": 2e-05, "loss": 0.03605639, "step": 8055 }, { "epoch": 16.112, "grad_norm": 1.002890706062317, "learning_rate": 2e-05, "loss": 0.03990389, "step": 8056 }, { "epoch": 16.114, "grad_norm": 1.0528466701507568, "learning_rate": 2e-05, "loss": 0.02833366, "step": 8057 }, { "epoch": 16.116, "grad_norm": 0.8901591897010803, "learning_rate": 2e-05, "loss": 0.0330559, "step": 8058 }, { "epoch": 16.118, "grad_norm": 1.368645191192627, "learning_rate": 2e-05, "loss": 0.04019631, "step": 8059 }, { "epoch": 16.12, "grad_norm": 1.10138738155365, "learning_rate": 2e-05, "loss": 0.04215175, "step": 8060 }, { "epoch": 16.122, "grad_norm": 1.7590361833572388, "learning_rate": 2e-05, "loss": 0.05253034, "step": 8061 }, { "epoch": 16.124, "grad_norm": 1.449039101600647, "learning_rate": 2e-05, "loss": 0.04077056, "step": 8062 }, { "epoch": 16.126, "grad_norm": 1.2556906938552856, "learning_rate": 2e-05, "loss": 0.05052727, "step": 8063 }, { "epoch": 16.128, "grad_norm": 1.4511512517929077, "learning_rate": 2e-05, "loss": 0.04626414, "step": 8064 }, { "epoch": 16.13, "grad_norm": 1.2287794351577759, "learning_rate": 2e-05, "loss": 0.03254979, "step": 8065 }, { "epoch": 16.132, "grad_norm": 1.107598066329956, "learning_rate": 2e-05, "loss": 0.0401614, "step": 8066 }, { "epoch": 16.134, "grad_norm": 1.0021055936813354, "learning_rate": 2e-05, "loss": 0.04019111, "step": 8067 }, { "epoch": 16.136, "grad_norm": 1.2217875719070435, "learning_rate": 2e-05, "loss": 0.04298358, "step": 8068 }, { "epoch": 16.138, "grad_norm": 2.357957124710083, "learning_rate": 2e-05, "loss": 0.06644191, "step": 8069 }, { "epoch": 16.14, "grad_norm": 2.3666937351226807, "learning_rate": 2e-05, "loss": 0.04335817, "step": 8070 }, { "epoch": 16.142, "grad_norm": 1.1027276515960693, "learning_rate": 2e-05, "loss": 0.04169575, "step": 8071 }, { "epoch": 16.144, "grad_norm": 1.9890915155410767, "learning_rate": 2e-05, "loss": 0.05330719, "step": 8072 }, { "epoch": 16.146, "grad_norm": 1.4620836973190308, "learning_rate": 2e-05, "loss": 0.05538007, "step": 8073 }, { "epoch": 16.148, "grad_norm": 0.9738489985466003, "learning_rate": 2e-05, "loss": 0.03835566, "step": 8074 }, { "epoch": 16.15, "grad_norm": 1.272730827331543, "learning_rate": 2e-05, "loss": 0.03010005, "step": 8075 }, { "epoch": 16.152, "grad_norm": 1.4121198654174805, "learning_rate": 2e-05, "loss": 0.04414219, "step": 8076 }, { "epoch": 16.154, "grad_norm": 1.3623344898223877, "learning_rate": 2e-05, "loss": 0.02887666, "step": 8077 }, { "epoch": 16.156, "grad_norm": 1.8589338064193726, "learning_rate": 2e-05, "loss": 0.04130456, "step": 8078 }, { "epoch": 16.158, "grad_norm": 1.3108867406845093, "learning_rate": 2e-05, "loss": 0.04593939, "step": 8079 }, { "epoch": 16.16, "grad_norm": 1.0434542894363403, "learning_rate": 2e-05, "loss": 0.03494214, "step": 8080 }, { "epoch": 16.162, "grad_norm": 1.2916126251220703, "learning_rate": 2e-05, "loss": 0.05425988, "step": 8081 }, { "epoch": 16.164, "grad_norm": 1.2712737321853638, "learning_rate": 2e-05, "loss": 0.02886685, "step": 8082 }, { "epoch": 16.166, "grad_norm": 1.682323694229126, "learning_rate": 2e-05, "loss": 0.04064405, "step": 8083 }, { "epoch": 16.168, "grad_norm": 1.1361340284347534, "learning_rate": 2e-05, "loss": 0.04164793, "step": 8084 }, { "epoch": 16.17, "grad_norm": 1.3078943490982056, "learning_rate": 2e-05, "loss": 0.04108073, "step": 8085 }, { "epoch": 16.172, "grad_norm": 1.7028069496154785, "learning_rate": 2e-05, "loss": 0.04357743, "step": 8086 }, { "epoch": 16.174, "grad_norm": 1.5365149974822998, "learning_rate": 2e-05, "loss": 0.04114883, "step": 8087 }, { "epoch": 16.176, "grad_norm": 1.0788302421569824, "learning_rate": 2e-05, "loss": 0.04243885, "step": 8088 }, { "epoch": 16.178, "grad_norm": 2.0135796070098877, "learning_rate": 2e-05, "loss": 0.05229139, "step": 8089 }, { "epoch": 16.18, "grad_norm": 1.1709972620010376, "learning_rate": 2e-05, "loss": 0.04144391, "step": 8090 }, { "epoch": 16.182, "grad_norm": 1.4391306638717651, "learning_rate": 2e-05, "loss": 0.0414433, "step": 8091 }, { "epoch": 16.184, "grad_norm": 1.6893138885498047, "learning_rate": 2e-05, "loss": 0.06052459, "step": 8092 }, { "epoch": 16.186, "grad_norm": 1.3466391563415527, "learning_rate": 2e-05, "loss": 0.04916067, "step": 8093 }, { "epoch": 16.188, "grad_norm": 1.8353379964828491, "learning_rate": 2e-05, "loss": 0.04348073, "step": 8094 }, { "epoch": 16.19, "grad_norm": 0.893278956413269, "learning_rate": 2e-05, "loss": 0.02653177, "step": 8095 }, { "epoch": 16.192, "grad_norm": 2.003261089324951, "learning_rate": 2e-05, "loss": 0.03514069, "step": 8096 }, { "epoch": 16.194, "grad_norm": 1.075136661529541, "learning_rate": 2e-05, "loss": 0.03994807, "step": 8097 }, { "epoch": 16.196, "grad_norm": 1.3511254787445068, "learning_rate": 2e-05, "loss": 0.03974268, "step": 8098 }, { "epoch": 16.198, "grad_norm": 1.2686835527420044, "learning_rate": 2e-05, "loss": 0.04421372, "step": 8099 }, { "epoch": 16.2, "grad_norm": 1.0966556072235107, "learning_rate": 2e-05, "loss": 0.02724809, "step": 8100 }, { "epoch": 16.202, "grad_norm": 0.9960463047027588, "learning_rate": 2e-05, "loss": 0.03282727, "step": 8101 }, { "epoch": 16.204, "grad_norm": 1.1563421487808228, "learning_rate": 2e-05, "loss": 0.03677552, "step": 8102 }, { "epoch": 16.206, "grad_norm": 1.0785412788391113, "learning_rate": 2e-05, "loss": 0.03223889, "step": 8103 }, { "epoch": 16.208, "grad_norm": 1.2728240489959717, "learning_rate": 2e-05, "loss": 0.05057978, "step": 8104 }, { "epoch": 16.21, "grad_norm": 1.575555682182312, "learning_rate": 2e-05, "loss": 0.03975232, "step": 8105 }, { "epoch": 16.212, "grad_norm": 1.4932183027267456, "learning_rate": 2e-05, "loss": 0.04676668, "step": 8106 }, { "epoch": 16.214, "grad_norm": 2.355098247528076, "learning_rate": 2e-05, "loss": 0.04006461, "step": 8107 }, { "epoch": 16.216, "grad_norm": 1.406161904335022, "learning_rate": 2e-05, "loss": 0.04551262, "step": 8108 }, { "epoch": 16.218, "grad_norm": 2.4774460792541504, "learning_rate": 2e-05, "loss": 0.0461893, "step": 8109 }, { "epoch": 16.22, "grad_norm": 1.3433986902236938, "learning_rate": 2e-05, "loss": 0.04020276, "step": 8110 }, { "epoch": 16.222, "grad_norm": 1.4756783246994019, "learning_rate": 2e-05, "loss": 0.03817509, "step": 8111 }, { "epoch": 16.224, "grad_norm": 0.9873545169830322, "learning_rate": 2e-05, "loss": 0.03135115, "step": 8112 }, { "epoch": 16.226, "grad_norm": 2.9627325534820557, "learning_rate": 2e-05, "loss": 0.05660213, "step": 8113 }, { "epoch": 16.228, "grad_norm": 1.9772872924804688, "learning_rate": 2e-05, "loss": 0.04761301, "step": 8114 }, { "epoch": 16.23, "grad_norm": 0.9085068106651306, "learning_rate": 2e-05, "loss": 0.03426716, "step": 8115 }, { "epoch": 16.232, "grad_norm": 1.0919116735458374, "learning_rate": 2e-05, "loss": 0.04498358, "step": 8116 }, { "epoch": 16.234, "grad_norm": 1.3590514659881592, "learning_rate": 2e-05, "loss": 0.05677811, "step": 8117 }, { "epoch": 16.236, "grad_norm": 0.8966934084892273, "learning_rate": 2e-05, "loss": 0.03355091, "step": 8118 }, { "epoch": 16.238, "grad_norm": 0.8038359880447388, "learning_rate": 2e-05, "loss": 0.0229108, "step": 8119 }, { "epoch": 16.24, "grad_norm": 1.1021723747253418, "learning_rate": 2e-05, "loss": 0.04134315, "step": 8120 }, { "epoch": 16.242, "grad_norm": 0.7463804483413696, "learning_rate": 2e-05, "loss": 0.02158192, "step": 8121 }, { "epoch": 16.244, "grad_norm": 1.2729253768920898, "learning_rate": 2e-05, "loss": 0.04784512, "step": 8122 }, { "epoch": 16.246, "grad_norm": 2.956761360168457, "learning_rate": 2e-05, "loss": 0.06182123, "step": 8123 }, { "epoch": 16.248, "grad_norm": 0.9418922066688538, "learning_rate": 2e-05, "loss": 0.03233754, "step": 8124 }, { "epoch": 16.25, "grad_norm": 1.123310923576355, "learning_rate": 2e-05, "loss": 0.03820478, "step": 8125 }, { "epoch": 16.252, "grad_norm": 1.2643516063690186, "learning_rate": 2e-05, "loss": 0.04000041, "step": 8126 }, { "epoch": 16.254, "grad_norm": 0.9963107109069824, "learning_rate": 2e-05, "loss": 0.04133003, "step": 8127 }, { "epoch": 16.256, "grad_norm": 1.303662657737732, "learning_rate": 2e-05, "loss": 0.04069284, "step": 8128 }, { "epoch": 16.258, "grad_norm": 1.4123518466949463, "learning_rate": 2e-05, "loss": 0.03306764, "step": 8129 }, { "epoch": 16.26, "grad_norm": 1.1522477865219116, "learning_rate": 2e-05, "loss": 0.03297203, "step": 8130 }, { "epoch": 16.262, "grad_norm": 1.6634291410446167, "learning_rate": 2e-05, "loss": 0.03758486, "step": 8131 }, { "epoch": 16.264, "grad_norm": 1.063207745552063, "learning_rate": 2e-05, "loss": 0.03368717, "step": 8132 }, { "epoch": 16.266, "grad_norm": 1.1534427404403687, "learning_rate": 2e-05, "loss": 0.03365128, "step": 8133 }, { "epoch": 16.268, "grad_norm": 0.8939757347106934, "learning_rate": 2e-05, "loss": 0.03255916, "step": 8134 }, { "epoch": 16.27, "grad_norm": 1.0208537578582764, "learning_rate": 2e-05, "loss": 0.03255802, "step": 8135 }, { "epoch": 16.272, "grad_norm": 1.0770570039749146, "learning_rate": 2e-05, "loss": 0.03199369, "step": 8136 }, { "epoch": 16.274, "grad_norm": 1.0182234048843384, "learning_rate": 2e-05, "loss": 0.03118423, "step": 8137 }, { "epoch": 16.276, "grad_norm": 0.9261863231658936, "learning_rate": 2e-05, "loss": 0.03142765, "step": 8138 }, { "epoch": 16.278, "grad_norm": 1.27197265625, "learning_rate": 2e-05, "loss": 0.03999099, "step": 8139 }, { "epoch": 16.28, "grad_norm": 1.6125009059906006, "learning_rate": 2e-05, "loss": 0.05592204, "step": 8140 }, { "epoch": 16.282, "grad_norm": 1.396366834640503, "learning_rate": 2e-05, "loss": 0.03519509, "step": 8141 }, { "epoch": 16.284, "grad_norm": 1.3368645906448364, "learning_rate": 2e-05, "loss": 0.0475312, "step": 8142 }, { "epoch": 16.286, "grad_norm": 1.3470531702041626, "learning_rate": 2e-05, "loss": 0.05122634, "step": 8143 }, { "epoch": 16.288, "grad_norm": 2.2661569118499756, "learning_rate": 2e-05, "loss": 0.0285572, "step": 8144 }, { "epoch": 16.29, "grad_norm": 1.0553605556488037, "learning_rate": 2e-05, "loss": 0.02672314, "step": 8145 }, { "epoch": 16.292, "grad_norm": 1.019231915473938, "learning_rate": 2e-05, "loss": 0.03206313, "step": 8146 }, { "epoch": 16.294, "grad_norm": 1.0026081800460815, "learning_rate": 2e-05, "loss": 0.03553811, "step": 8147 }, { "epoch": 16.296, "grad_norm": 1.1870548725128174, "learning_rate": 2e-05, "loss": 0.03814599, "step": 8148 }, { "epoch": 16.298, "grad_norm": 1.2874722480773926, "learning_rate": 2e-05, "loss": 0.02804936, "step": 8149 }, { "epoch": 16.3, "grad_norm": 1.5287872552871704, "learning_rate": 2e-05, "loss": 0.03355492, "step": 8150 }, { "epoch": 16.302, "grad_norm": 1.6607043743133545, "learning_rate": 2e-05, "loss": 0.04553591, "step": 8151 }, { "epoch": 16.304, "grad_norm": 0.9676879048347473, "learning_rate": 2e-05, "loss": 0.02531033, "step": 8152 }, { "epoch": 16.306, "grad_norm": 1.1550991535186768, "learning_rate": 2e-05, "loss": 0.04489979, "step": 8153 }, { "epoch": 16.308, "grad_norm": 1.054743766784668, "learning_rate": 2e-05, "loss": 0.03464444, "step": 8154 }, { "epoch": 16.31, "grad_norm": 1.1906744241714478, "learning_rate": 2e-05, "loss": 0.03882031, "step": 8155 }, { "epoch": 16.312, "grad_norm": 1.6370846033096313, "learning_rate": 2e-05, "loss": 0.04127342, "step": 8156 }, { "epoch": 16.314, "grad_norm": 1.2006635665893555, "learning_rate": 2e-05, "loss": 0.04366533, "step": 8157 }, { "epoch": 16.316, "grad_norm": 1.668593168258667, "learning_rate": 2e-05, "loss": 0.04618284, "step": 8158 }, { "epoch": 16.318, "grad_norm": 1.5959596633911133, "learning_rate": 2e-05, "loss": 0.03362972, "step": 8159 }, { "epoch": 16.32, "grad_norm": 1.0409663915634155, "learning_rate": 2e-05, "loss": 0.03753944, "step": 8160 }, { "epoch": 16.322, "grad_norm": 1.2022160291671753, "learning_rate": 2e-05, "loss": 0.02933392, "step": 8161 }, { "epoch": 16.324, "grad_norm": 1.2204575538635254, "learning_rate": 2e-05, "loss": 0.05045522, "step": 8162 }, { "epoch": 16.326, "grad_norm": 1.4850815534591675, "learning_rate": 2e-05, "loss": 0.02668772, "step": 8163 }, { "epoch": 16.328, "grad_norm": 0.9488015174865723, "learning_rate": 2e-05, "loss": 0.02544276, "step": 8164 }, { "epoch": 16.33, "grad_norm": 3.8645198345184326, "learning_rate": 2e-05, "loss": 0.04063994, "step": 8165 }, { "epoch": 16.332, "grad_norm": 1.9257711172103882, "learning_rate": 2e-05, "loss": 0.03667308, "step": 8166 }, { "epoch": 16.334, "grad_norm": 1.1849285364151, "learning_rate": 2e-05, "loss": 0.04390438, "step": 8167 }, { "epoch": 16.336, "grad_norm": 1.3050767183303833, "learning_rate": 2e-05, "loss": 0.03373501, "step": 8168 }, { "epoch": 16.338, "grad_norm": 1.5707025527954102, "learning_rate": 2e-05, "loss": 0.03310432, "step": 8169 }, { "epoch": 16.34, "grad_norm": 1.6710127592086792, "learning_rate": 2e-05, "loss": 0.04479493, "step": 8170 }, { "epoch": 16.342, "grad_norm": 1.692458152770996, "learning_rate": 2e-05, "loss": 0.03380566, "step": 8171 }, { "epoch": 16.344, "grad_norm": 1.1804896593093872, "learning_rate": 2e-05, "loss": 0.04354354, "step": 8172 }, { "epoch": 16.346, "grad_norm": 0.9135777354240417, "learning_rate": 2e-05, "loss": 0.02357985, "step": 8173 }, { "epoch": 16.348, "grad_norm": 1.103266954421997, "learning_rate": 2e-05, "loss": 0.03100296, "step": 8174 }, { "epoch": 16.35, "grad_norm": 1.4131602048873901, "learning_rate": 2e-05, "loss": 0.03494125, "step": 8175 }, { "epoch": 16.352, "grad_norm": 1.4223483800888062, "learning_rate": 2e-05, "loss": 0.0523124, "step": 8176 }, { "epoch": 16.354, "grad_norm": 1.1714587211608887, "learning_rate": 2e-05, "loss": 0.04069928, "step": 8177 }, { "epoch": 16.356, "grad_norm": 1.0462075471878052, "learning_rate": 2e-05, "loss": 0.03406634, "step": 8178 }, { "epoch": 16.358, "grad_norm": 0.8194587230682373, "learning_rate": 2e-05, "loss": 0.02468334, "step": 8179 }, { "epoch": 16.36, "grad_norm": 1.5818257331848145, "learning_rate": 2e-05, "loss": 0.04018777, "step": 8180 }, { "epoch": 16.362, "grad_norm": 1.05071222782135, "learning_rate": 2e-05, "loss": 0.02858377, "step": 8181 }, { "epoch": 16.364, "grad_norm": 1.3035218715667725, "learning_rate": 2e-05, "loss": 0.03922418, "step": 8182 }, { "epoch": 16.366, "grad_norm": 1.036807894706726, "learning_rate": 2e-05, "loss": 0.03278348, "step": 8183 }, { "epoch": 16.368, "grad_norm": 1.3568602800369263, "learning_rate": 2e-05, "loss": 0.04442092, "step": 8184 }, { "epoch": 16.37, "grad_norm": 1.306277871131897, "learning_rate": 2e-05, "loss": 0.0349811, "step": 8185 }, { "epoch": 16.372, "grad_norm": 1.3252403736114502, "learning_rate": 2e-05, "loss": 0.04058518, "step": 8186 }, { "epoch": 16.374, "grad_norm": 1.2216702699661255, "learning_rate": 2e-05, "loss": 0.04433048, "step": 8187 }, { "epoch": 16.376, "grad_norm": 1.4517982006072998, "learning_rate": 2e-05, "loss": 0.04836712, "step": 8188 }, { "epoch": 16.378, "grad_norm": 1.7431961297988892, "learning_rate": 2e-05, "loss": 0.04569249, "step": 8189 }, { "epoch": 16.38, "grad_norm": 0.9581757187843323, "learning_rate": 2e-05, "loss": 0.03607025, "step": 8190 }, { "epoch": 16.382, "grad_norm": 1.0679128170013428, "learning_rate": 2e-05, "loss": 0.03121858, "step": 8191 }, { "epoch": 16.384, "grad_norm": 1.2390178442001343, "learning_rate": 2e-05, "loss": 0.03776772, "step": 8192 }, { "epoch": 16.386, "grad_norm": 1.408728837966919, "learning_rate": 2e-05, "loss": 0.03908163, "step": 8193 }, { "epoch": 16.388, "grad_norm": 1.6214293241500854, "learning_rate": 2e-05, "loss": 0.04584841, "step": 8194 }, { "epoch": 16.39, "grad_norm": 1.5242630243301392, "learning_rate": 2e-05, "loss": 0.04677683, "step": 8195 }, { "epoch": 16.392, "grad_norm": 1.2808504104614258, "learning_rate": 2e-05, "loss": 0.03237236, "step": 8196 }, { "epoch": 16.394, "grad_norm": 2.8665573596954346, "learning_rate": 2e-05, "loss": 0.04940195, "step": 8197 }, { "epoch": 16.396, "grad_norm": 0.948776125907898, "learning_rate": 2e-05, "loss": 0.03788519, "step": 8198 }, { "epoch": 16.398, "grad_norm": 1.415470838546753, "learning_rate": 2e-05, "loss": 0.03823056, "step": 8199 }, { "epoch": 16.4, "grad_norm": 2.438213348388672, "learning_rate": 2e-05, "loss": 0.03671508, "step": 8200 }, { "epoch": 16.402, "grad_norm": 1.010650634765625, "learning_rate": 2e-05, "loss": 0.03287682, "step": 8201 }, { "epoch": 16.404, "grad_norm": 1.2788907289505005, "learning_rate": 2e-05, "loss": 0.0384167, "step": 8202 }, { "epoch": 16.406, "grad_norm": 1.083628535270691, "learning_rate": 2e-05, "loss": 0.03261992, "step": 8203 }, { "epoch": 16.408, "grad_norm": 1.8817229270935059, "learning_rate": 2e-05, "loss": 0.03744752, "step": 8204 }, { "epoch": 16.41, "grad_norm": 1.501495599746704, "learning_rate": 2e-05, "loss": 0.05343234, "step": 8205 }, { "epoch": 16.412, "grad_norm": 1.213742733001709, "learning_rate": 2e-05, "loss": 0.04467801, "step": 8206 }, { "epoch": 16.414, "grad_norm": 1.283093810081482, "learning_rate": 2e-05, "loss": 0.0359704, "step": 8207 }, { "epoch": 16.416, "grad_norm": 1.3685106039047241, "learning_rate": 2e-05, "loss": 0.04109442, "step": 8208 }, { "epoch": 16.418, "grad_norm": 1.746134638786316, "learning_rate": 2e-05, "loss": 0.04227894, "step": 8209 }, { "epoch": 16.42, "grad_norm": 4.150729656219482, "learning_rate": 2e-05, "loss": 0.06023823, "step": 8210 }, { "epoch": 16.422, "grad_norm": 0.8454696536064148, "learning_rate": 2e-05, "loss": 0.02863848, "step": 8211 }, { "epoch": 16.424, "grad_norm": 3.1747887134552, "learning_rate": 2e-05, "loss": 0.05570412, "step": 8212 }, { "epoch": 16.426, "grad_norm": 1.4352025985717773, "learning_rate": 2e-05, "loss": 0.0461599, "step": 8213 }, { "epoch": 16.428, "grad_norm": 1.3556586503982544, "learning_rate": 2e-05, "loss": 0.04771223, "step": 8214 }, { "epoch": 16.43, "grad_norm": 1.113775372505188, "learning_rate": 2e-05, "loss": 0.04320124, "step": 8215 }, { "epoch": 16.432, "grad_norm": 1.0021729469299316, "learning_rate": 2e-05, "loss": 0.0404552, "step": 8216 }, { "epoch": 16.434, "grad_norm": 1.487113118171692, "learning_rate": 2e-05, "loss": 0.03398036, "step": 8217 }, { "epoch": 16.436, "grad_norm": 1.7219562530517578, "learning_rate": 2e-05, "loss": 0.04297631, "step": 8218 }, { "epoch": 16.438, "grad_norm": 0.7566974759101868, "learning_rate": 2e-05, "loss": 0.02528067, "step": 8219 }, { "epoch": 16.44, "grad_norm": 1.2634199857711792, "learning_rate": 2e-05, "loss": 0.04048873, "step": 8220 }, { "epoch": 16.442, "grad_norm": 1.3635785579681396, "learning_rate": 2e-05, "loss": 0.03699053, "step": 8221 }, { "epoch": 16.444, "grad_norm": 1.213982105255127, "learning_rate": 2e-05, "loss": 0.04167101, "step": 8222 }, { "epoch": 16.446, "grad_norm": 1.082590937614441, "learning_rate": 2e-05, "loss": 0.03293147, "step": 8223 }, { "epoch": 16.448, "grad_norm": 2.118565082550049, "learning_rate": 2e-05, "loss": 0.04190797, "step": 8224 }, { "epoch": 16.45, "grad_norm": 1.3486461639404297, "learning_rate": 2e-05, "loss": 0.05936465, "step": 8225 }, { "epoch": 16.452, "grad_norm": 1.0028716325759888, "learning_rate": 2e-05, "loss": 0.02941772, "step": 8226 }, { "epoch": 16.454, "grad_norm": 0.8530328869819641, "learning_rate": 2e-05, "loss": 0.02614375, "step": 8227 }, { "epoch": 16.456, "grad_norm": 1.1763277053833008, "learning_rate": 2e-05, "loss": 0.03713852, "step": 8228 }, { "epoch": 16.458, "grad_norm": 1.2166781425476074, "learning_rate": 2e-05, "loss": 0.03911814, "step": 8229 }, { "epoch": 16.46, "grad_norm": 1.2310494184494019, "learning_rate": 2e-05, "loss": 0.04959798, "step": 8230 }, { "epoch": 16.462, "grad_norm": 1.427141547203064, "learning_rate": 2e-05, "loss": 0.03936853, "step": 8231 }, { "epoch": 16.464, "grad_norm": 2.973794937133789, "learning_rate": 2e-05, "loss": 0.04512501, "step": 8232 }, { "epoch": 16.466, "grad_norm": 1.291011095046997, "learning_rate": 2e-05, "loss": 0.04267914, "step": 8233 }, { "epoch": 16.468, "grad_norm": 1.3030160665512085, "learning_rate": 2e-05, "loss": 0.04386271, "step": 8234 }, { "epoch": 16.47, "grad_norm": 1.5548454523086548, "learning_rate": 2e-05, "loss": 0.05778488, "step": 8235 }, { "epoch": 16.472, "grad_norm": 1.1663544178009033, "learning_rate": 2e-05, "loss": 0.03603297, "step": 8236 }, { "epoch": 16.474, "grad_norm": 1.1389724016189575, "learning_rate": 2e-05, "loss": 0.04016333, "step": 8237 }, { "epoch": 16.476, "grad_norm": 1.081159234046936, "learning_rate": 2e-05, "loss": 0.0496039, "step": 8238 }, { "epoch": 16.478, "grad_norm": 1.6191846132278442, "learning_rate": 2e-05, "loss": 0.04313356, "step": 8239 }, { "epoch": 16.48, "grad_norm": 0.9814433455467224, "learning_rate": 2e-05, "loss": 0.03610291, "step": 8240 }, { "epoch": 16.482, "grad_norm": 1.1408717632293701, "learning_rate": 2e-05, "loss": 0.03619709, "step": 8241 }, { "epoch": 16.484, "grad_norm": 1.0582938194274902, "learning_rate": 2e-05, "loss": 0.03754045, "step": 8242 }, { "epoch": 16.486, "grad_norm": 0.8372255563735962, "learning_rate": 2e-05, "loss": 0.0261168, "step": 8243 }, { "epoch": 16.488, "grad_norm": 0.9422687888145447, "learning_rate": 2e-05, "loss": 0.03500605, "step": 8244 }, { "epoch": 16.49, "grad_norm": 0.9259281158447266, "learning_rate": 2e-05, "loss": 0.04100908, "step": 8245 }, { "epoch": 16.492, "grad_norm": 1.2322639226913452, "learning_rate": 2e-05, "loss": 0.03284498, "step": 8246 }, { "epoch": 16.494, "grad_norm": 1.6331202983856201, "learning_rate": 2e-05, "loss": 0.0532867, "step": 8247 }, { "epoch": 16.496, "grad_norm": 1.4512193202972412, "learning_rate": 2e-05, "loss": 0.03742641, "step": 8248 }, { "epoch": 16.498, "grad_norm": 1.4642248153686523, "learning_rate": 2e-05, "loss": 0.05566034, "step": 8249 }, { "epoch": 16.5, "grad_norm": 1.2779650688171387, "learning_rate": 2e-05, "loss": 0.03900528, "step": 8250 }, { "epoch": 16.502, "grad_norm": 1.5982625484466553, "learning_rate": 2e-05, "loss": 0.03873933, "step": 8251 }, { "epoch": 16.504, "grad_norm": 1.2612420320510864, "learning_rate": 2e-05, "loss": 0.03912558, "step": 8252 }, { "epoch": 16.506, "grad_norm": 1.089294672012329, "learning_rate": 2e-05, "loss": 0.02468765, "step": 8253 }, { "epoch": 16.508, "grad_norm": 1.309836745262146, "learning_rate": 2e-05, "loss": 0.04034061, "step": 8254 }, { "epoch": 16.51, "grad_norm": 1.0034233331680298, "learning_rate": 2e-05, "loss": 0.03271377, "step": 8255 }, { "epoch": 16.512, "grad_norm": 1.1404598951339722, "learning_rate": 2e-05, "loss": 0.04757511, "step": 8256 }, { "epoch": 16.514, "grad_norm": 1.2727479934692383, "learning_rate": 2e-05, "loss": 0.0519487, "step": 8257 }, { "epoch": 16.516, "grad_norm": 0.9131700992584229, "learning_rate": 2e-05, "loss": 0.03295713, "step": 8258 }, { "epoch": 16.518, "grad_norm": 1.8983780145645142, "learning_rate": 2e-05, "loss": 0.04880277, "step": 8259 }, { "epoch": 16.52, "grad_norm": 0.9519051909446716, "learning_rate": 2e-05, "loss": 0.03398883, "step": 8260 }, { "epoch": 16.522, "grad_norm": 1.6024519205093384, "learning_rate": 2e-05, "loss": 0.03934075, "step": 8261 }, { "epoch": 16.524, "grad_norm": 0.7955957651138306, "learning_rate": 2e-05, "loss": 0.03359678, "step": 8262 }, { "epoch": 16.526, "grad_norm": 1.609654188156128, "learning_rate": 2e-05, "loss": 0.04246666, "step": 8263 }, { "epoch": 16.528, "grad_norm": 0.9613010287284851, "learning_rate": 2e-05, "loss": 0.03813015, "step": 8264 }, { "epoch": 16.53, "grad_norm": 1.1907222270965576, "learning_rate": 2e-05, "loss": 0.04408014, "step": 8265 }, { "epoch": 16.532, "grad_norm": 1.9539070129394531, "learning_rate": 2e-05, "loss": 0.06747687, "step": 8266 }, { "epoch": 16.534, "grad_norm": 1.061668872833252, "learning_rate": 2e-05, "loss": 0.02662, "step": 8267 }, { "epoch": 16.536, "grad_norm": 0.9600314497947693, "learning_rate": 2e-05, "loss": 0.03638612, "step": 8268 }, { "epoch": 16.538, "grad_norm": 0.9233123064041138, "learning_rate": 2e-05, "loss": 0.03593893, "step": 8269 }, { "epoch": 16.54, "grad_norm": 1.479044795036316, "learning_rate": 2e-05, "loss": 0.05184358, "step": 8270 }, { "epoch": 16.542, "grad_norm": 1.5287649631500244, "learning_rate": 2e-05, "loss": 0.04466471, "step": 8271 }, { "epoch": 16.544, "grad_norm": 2.0069024562835693, "learning_rate": 2e-05, "loss": 0.04259269, "step": 8272 }, { "epoch": 16.546, "grad_norm": 0.9243464469909668, "learning_rate": 2e-05, "loss": 0.03510771, "step": 8273 }, { "epoch": 16.548000000000002, "grad_norm": 1.2056413888931274, "learning_rate": 2e-05, "loss": 0.03631591, "step": 8274 }, { "epoch": 16.55, "grad_norm": 1.0497905015945435, "learning_rate": 2e-05, "loss": 0.0385576, "step": 8275 }, { "epoch": 16.552, "grad_norm": 0.9925622344017029, "learning_rate": 2e-05, "loss": 0.03732425, "step": 8276 }, { "epoch": 16.554, "grad_norm": 2.6606976985931396, "learning_rate": 2e-05, "loss": 0.06217859, "step": 8277 }, { "epoch": 16.556, "grad_norm": 1.4675244092941284, "learning_rate": 2e-05, "loss": 0.04956838, "step": 8278 }, { "epoch": 16.558, "grad_norm": 1.1808679103851318, "learning_rate": 2e-05, "loss": 0.04509632, "step": 8279 }, { "epoch": 16.56, "grad_norm": 1.189639687538147, "learning_rate": 2e-05, "loss": 0.03516001, "step": 8280 }, { "epoch": 16.562, "grad_norm": 1.1224857568740845, "learning_rate": 2e-05, "loss": 0.03829812, "step": 8281 }, { "epoch": 16.564, "grad_norm": 1.6615842580795288, "learning_rate": 2e-05, "loss": 0.04117198, "step": 8282 }, { "epoch": 16.566, "grad_norm": 0.9788565039634705, "learning_rate": 2e-05, "loss": 0.03983273, "step": 8283 }, { "epoch": 16.568, "grad_norm": 1.1430004835128784, "learning_rate": 2e-05, "loss": 0.04645136, "step": 8284 }, { "epoch": 16.57, "grad_norm": 1.7217742204666138, "learning_rate": 2e-05, "loss": 0.03647877, "step": 8285 }, { "epoch": 16.572, "grad_norm": 2.5833990573883057, "learning_rate": 2e-05, "loss": 0.03389883, "step": 8286 }, { "epoch": 16.574, "grad_norm": 1.0924619436264038, "learning_rate": 2e-05, "loss": 0.03044964, "step": 8287 }, { "epoch": 16.576, "grad_norm": 1.111318588256836, "learning_rate": 2e-05, "loss": 0.03224605, "step": 8288 }, { "epoch": 16.578, "grad_norm": 0.9708213806152344, "learning_rate": 2e-05, "loss": 0.03481601, "step": 8289 }, { "epoch": 16.58, "grad_norm": 1.0016098022460938, "learning_rate": 2e-05, "loss": 0.0263949, "step": 8290 }, { "epoch": 16.582, "grad_norm": 1.9268604516983032, "learning_rate": 2e-05, "loss": 0.04963619, "step": 8291 }, { "epoch": 16.584, "grad_norm": 1.561201572418213, "learning_rate": 2e-05, "loss": 0.05287721, "step": 8292 }, { "epoch": 16.586, "grad_norm": 1.2149429321289062, "learning_rate": 2e-05, "loss": 0.0360918, "step": 8293 }, { "epoch": 16.588, "grad_norm": 1.1690983772277832, "learning_rate": 2e-05, "loss": 0.04147298, "step": 8294 }, { "epoch": 16.59, "grad_norm": 1.3706759214401245, "learning_rate": 2e-05, "loss": 0.03936959, "step": 8295 }, { "epoch": 16.592, "grad_norm": 1.2356643676757812, "learning_rate": 2e-05, "loss": 0.0466823, "step": 8296 }, { "epoch": 16.594, "grad_norm": 1.1706414222717285, "learning_rate": 2e-05, "loss": 0.04192802, "step": 8297 }, { "epoch": 16.596, "grad_norm": 0.9304798245429993, "learning_rate": 2e-05, "loss": 0.03396158, "step": 8298 }, { "epoch": 16.598, "grad_norm": 0.7715820670127869, "learning_rate": 2e-05, "loss": 0.02587889, "step": 8299 }, { "epoch": 16.6, "grad_norm": 1.1630661487579346, "learning_rate": 2e-05, "loss": 0.04147232, "step": 8300 }, { "epoch": 16.602, "grad_norm": 1.4500014781951904, "learning_rate": 2e-05, "loss": 0.05441855, "step": 8301 }, { "epoch": 16.604, "grad_norm": 1.2780057191848755, "learning_rate": 2e-05, "loss": 0.04715714, "step": 8302 }, { "epoch": 16.606, "grad_norm": 1.167553424835205, "learning_rate": 2e-05, "loss": 0.04453902, "step": 8303 }, { "epoch": 16.608, "grad_norm": 0.9578707218170166, "learning_rate": 2e-05, "loss": 0.03321385, "step": 8304 }, { "epoch": 16.61, "grad_norm": 0.9922477602958679, "learning_rate": 2e-05, "loss": 0.04943824, "step": 8305 }, { "epoch": 16.612, "grad_norm": 1.229866623878479, "learning_rate": 2e-05, "loss": 0.0397451, "step": 8306 }, { "epoch": 16.614, "grad_norm": 0.8996338844299316, "learning_rate": 2e-05, "loss": 0.03554664, "step": 8307 }, { "epoch": 16.616, "grad_norm": 1.6743134260177612, "learning_rate": 2e-05, "loss": 0.05404719, "step": 8308 }, { "epoch": 16.618, "grad_norm": 1.28831946849823, "learning_rate": 2e-05, "loss": 0.05462881, "step": 8309 }, { "epoch": 16.62, "grad_norm": 1.3159453868865967, "learning_rate": 2e-05, "loss": 0.04029004, "step": 8310 }, { "epoch": 16.622, "grad_norm": 1.7807199954986572, "learning_rate": 2e-05, "loss": 0.04414508, "step": 8311 }, { "epoch": 16.624, "grad_norm": 1.0302990674972534, "learning_rate": 2e-05, "loss": 0.03156264, "step": 8312 }, { "epoch": 16.626, "grad_norm": 1.0844486951828003, "learning_rate": 2e-05, "loss": 0.03466799, "step": 8313 }, { "epoch": 16.628, "grad_norm": 0.8506113886833191, "learning_rate": 2e-05, "loss": 0.02792093, "step": 8314 }, { "epoch": 16.63, "grad_norm": 1.1834053993225098, "learning_rate": 2e-05, "loss": 0.04926874, "step": 8315 }, { "epoch": 16.632, "grad_norm": 0.8524296879768372, "learning_rate": 2e-05, "loss": 0.02497356, "step": 8316 }, { "epoch": 16.634, "grad_norm": 1.2255483865737915, "learning_rate": 2e-05, "loss": 0.045866, "step": 8317 }, { "epoch": 16.636, "grad_norm": 1.087267279624939, "learning_rate": 2e-05, "loss": 0.03373387, "step": 8318 }, { "epoch": 16.638, "grad_norm": 1.1295750141143799, "learning_rate": 2e-05, "loss": 0.0321148, "step": 8319 }, { "epoch": 16.64, "grad_norm": 1.2895820140838623, "learning_rate": 2e-05, "loss": 0.04531387, "step": 8320 }, { "epoch": 16.642, "grad_norm": 1.021094799041748, "learning_rate": 2e-05, "loss": 0.04268889, "step": 8321 }, { "epoch": 16.644, "grad_norm": 0.9108835458755493, "learning_rate": 2e-05, "loss": 0.02671606, "step": 8322 }, { "epoch": 16.646, "grad_norm": 1.4626824855804443, "learning_rate": 2e-05, "loss": 0.0437445, "step": 8323 }, { "epoch": 16.648, "grad_norm": 1.0410956144332886, "learning_rate": 2e-05, "loss": 0.04023174, "step": 8324 }, { "epoch": 16.65, "grad_norm": 1.353083848953247, "learning_rate": 2e-05, "loss": 0.04248706, "step": 8325 }, { "epoch": 16.652, "grad_norm": 1.2439631223678589, "learning_rate": 2e-05, "loss": 0.04102281, "step": 8326 }, { "epoch": 16.654, "grad_norm": 1.4870727062225342, "learning_rate": 2e-05, "loss": 0.03603457, "step": 8327 }, { "epoch": 16.656, "grad_norm": 0.8128296732902527, "learning_rate": 2e-05, "loss": 0.02902194, "step": 8328 }, { "epoch": 16.658, "grad_norm": 1.1543548107147217, "learning_rate": 2e-05, "loss": 0.04488258, "step": 8329 }, { "epoch": 16.66, "grad_norm": 1.1490774154663086, "learning_rate": 2e-05, "loss": 0.04285713, "step": 8330 }, { "epoch": 16.662, "grad_norm": 0.8013486266136169, "learning_rate": 2e-05, "loss": 0.02479377, "step": 8331 }, { "epoch": 16.664, "grad_norm": 1.3497260808944702, "learning_rate": 2e-05, "loss": 0.03930143, "step": 8332 }, { "epoch": 16.666, "grad_norm": 1.2707629203796387, "learning_rate": 2e-05, "loss": 0.04483213, "step": 8333 }, { "epoch": 16.668, "grad_norm": 1.5103529691696167, "learning_rate": 2e-05, "loss": 0.0423223, "step": 8334 }, { "epoch": 16.67, "grad_norm": 1.1483421325683594, "learning_rate": 2e-05, "loss": 0.03447499, "step": 8335 }, { "epoch": 16.672, "grad_norm": 1.1671597957611084, "learning_rate": 2e-05, "loss": 0.03986046, "step": 8336 }, { "epoch": 16.674, "grad_norm": 2.0713305473327637, "learning_rate": 2e-05, "loss": 0.04583824, "step": 8337 }, { "epoch": 16.676, "grad_norm": 1.2488347291946411, "learning_rate": 2e-05, "loss": 0.05782833, "step": 8338 }, { "epoch": 16.678, "grad_norm": 1.380580186843872, "learning_rate": 2e-05, "loss": 0.05267163, "step": 8339 }, { "epoch": 16.68, "grad_norm": 0.9324699640274048, "learning_rate": 2e-05, "loss": 0.02859825, "step": 8340 }, { "epoch": 16.682, "grad_norm": 3.066561698913574, "learning_rate": 2e-05, "loss": 0.0461663, "step": 8341 }, { "epoch": 16.684, "grad_norm": 1.730197548866272, "learning_rate": 2e-05, "loss": 0.04444271, "step": 8342 }, { "epoch": 16.686, "grad_norm": 1.0998096466064453, "learning_rate": 2e-05, "loss": 0.03587677, "step": 8343 }, { "epoch": 16.688, "grad_norm": 1.8703289031982422, "learning_rate": 2e-05, "loss": 0.04714426, "step": 8344 }, { "epoch": 16.69, "grad_norm": 2.110673666000366, "learning_rate": 2e-05, "loss": 0.04522548, "step": 8345 }, { "epoch": 16.692, "grad_norm": 1.7301418781280518, "learning_rate": 2e-05, "loss": 0.03590683, "step": 8346 }, { "epoch": 16.694, "grad_norm": 1.2836073637008667, "learning_rate": 2e-05, "loss": 0.0443484, "step": 8347 }, { "epoch": 16.696, "grad_norm": 0.9881532192230225, "learning_rate": 2e-05, "loss": 0.03640392, "step": 8348 }, { "epoch": 16.698, "grad_norm": 1.4210853576660156, "learning_rate": 2e-05, "loss": 0.03733408, "step": 8349 }, { "epoch": 16.7, "grad_norm": 1.6676608324050903, "learning_rate": 2e-05, "loss": 0.03615467, "step": 8350 }, { "epoch": 16.701999999999998, "grad_norm": 1.3031706809997559, "learning_rate": 2e-05, "loss": 0.0485234, "step": 8351 }, { "epoch": 16.704, "grad_norm": 1.5736160278320312, "learning_rate": 2e-05, "loss": 0.05092331, "step": 8352 }, { "epoch": 16.706, "grad_norm": 1.2167357206344604, "learning_rate": 2e-05, "loss": 0.02873914, "step": 8353 }, { "epoch": 16.708, "grad_norm": 1.3404465913772583, "learning_rate": 2e-05, "loss": 0.03442968, "step": 8354 }, { "epoch": 16.71, "grad_norm": 1.2709263563156128, "learning_rate": 2e-05, "loss": 0.04121051, "step": 8355 }, { "epoch": 16.712, "grad_norm": 1.1980571746826172, "learning_rate": 2e-05, "loss": 0.03082032, "step": 8356 }, { "epoch": 16.714, "grad_norm": 1.4462573528289795, "learning_rate": 2e-05, "loss": 0.05036964, "step": 8357 }, { "epoch": 16.716, "grad_norm": 0.9910008311271667, "learning_rate": 2e-05, "loss": 0.03717036, "step": 8358 }, { "epoch": 16.718, "grad_norm": 2.1541004180908203, "learning_rate": 2e-05, "loss": 0.05308878, "step": 8359 }, { "epoch": 16.72, "grad_norm": 1.1498245000839233, "learning_rate": 2e-05, "loss": 0.04460468, "step": 8360 }, { "epoch": 16.722, "grad_norm": 1.0459766387939453, "learning_rate": 2e-05, "loss": 0.03412562, "step": 8361 }, { "epoch": 16.724, "grad_norm": 1.2054882049560547, "learning_rate": 2e-05, "loss": 0.04336284, "step": 8362 }, { "epoch": 16.726, "grad_norm": 2.585453510284424, "learning_rate": 2e-05, "loss": 0.04201549, "step": 8363 }, { "epoch": 16.728, "grad_norm": 1.1822905540466309, "learning_rate": 2e-05, "loss": 0.03065944, "step": 8364 }, { "epoch": 16.73, "grad_norm": 2.4063751697540283, "learning_rate": 2e-05, "loss": 0.06484241, "step": 8365 }, { "epoch": 16.732, "grad_norm": 0.910371720790863, "learning_rate": 2e-05, "loss": 0.02728943, "step": 8366 }, { "epoch": 16.734, "grad_norm": 1.1744091510772705, "learning_rate": 2e-05, "loss": 0.0449831, "step": 8367 }, { "epoch": 16.736, "grad_norm": 1.17961847782135, "learning_rate": 2e-05, "loss": 0.047885, "step": 8368 }, { "epoch": 16.738, "grad_norm": 1.7319247722625732, "learning_rate": 2e-05, "loss": 0.06877615, "step": 8369 }, { "epoch": 16.74, "grad_norm": 0.9071101546287537, "learning_rate": 2e-05, "loss": 0.03281096, "step": 8370 }, { "epoch": 16.742, "grad_norm": 0.9504287838935852, "learning_rate": 2e-05, "loss": 0.0342159, "step": 8371 }, { "epoch": 16.744, "grad_norm": 1.09835946559906, "learning_rate": 2e-05, "loss": 0.03946387, "step": 8372 }, { "epoch": 16.746, "grad_norm": 2.7252159118652344, "learning_rate": 2e-05, "loss": 0.05097622, "step": 8373 }, { "epoch": 16.748, "grad_norm": 1.072425127029419, "learning_rate": 2e-05, "loss": 0.03863755, "step": 8374 }, { "epoch": 16.75, "grad_norm": 1.460571527481079, "learning_rate": 2e-05, "loss": 0.0505866, "step": 8375 }, { "epoch": 16.752, "grad_norm": 1.1475695371627808, "learning_rate": 2e-05, "loss": 0.03575168, "step": 8376 }, { "epoch": 16.754, "grad_norm": 2.364583969116211, "learning_rate": 2e-05, "loss": 0.04848005, "step": 8377 }, { "epoch": 16.756, "grad_norm": 1.1081641912460327, "learning_rate": 2e-05, "loss": 0.02874547, "step": 8378 }, { "epoch": 16.758, "grad_norm": 1.2067151069641113, "learning_rate": 2e-05, "loss": 0.03459092, "step": 8379 }, { "epoch": 16.76, "grad_norm": 0.7548035383224487, "learning_rate": 2e-05, "loss": 0.02668865, "step": 8380 }, { "epoch": 16.762, "grad_norm": 1.3829071521759033, "learning_rate": 2e-05, "loss": 0.03597104, "step": 8381 }, { "epoch": 16.764, "grad_norm": 1.0241215229034424, "learning_rate": 2e-05, "loss": 0.02973223, "step": 8382 }, { "epoch": 16.766, "grad_norm": 1.523497462272644, "learning_rate": 2e-05, "loss": 0.03927022, "step": 8383 }, { "epoch": 16.768, "grad_norm": 1.2706387042999268, "learning_rate": 2e-05, "loss": 0.04402893, "step": 8384 }, { "epoch": 16.77, "grad_norm": 1.0218260288238525, "learning_rate": 2e-05, "loss": 0.03629261, "step": 8385 }, { "epoch": 16.772, "grad_norm": 1.168722152709961, "learning_rate": 2e-05, "loss": 0.03109026, "step": 8386 }, { "epoch": 16.774, "grad_norm": 1.254064679145813, "learning_rate": 2e-05, "loss": 0.04259754, "step": 8387 }, { "epoch": 16.776, "grad_norm": 1.1511726379394531, "learning_rate": 2e-05, "loss": 0.03143356, "step": 8388 }, { "epoch": 16.778, "grad_norm": 1.238359808921814, "learning_rate": 2e-05, "loss": 0.0454062, "step": 8389 }, { "epoch": 16.78, "grad_norm": 1.2323909997940063, "learning_rate": 2e-05, "loss": 0.0520661, "step": 8390 }, { "epoch": 16.782, "grad_norm": 0.8986350297927856, "learning_rate": 2e-05, "loss": 0.0306076, "step": 8391 }, { "epoch": 16.784, "grad_norm": 1.8212543725967407, "learning_rate": 2e-05, "loss": 0.03436889, "step": 8392 }, { "epoch": 16.786, "grad_norm": 0.8311665654182434, "learning_rate": 2e-05, "loss": 0.02341103, "step": 8393 }, { "epoch": 16.788, "grad_norm": 2.219353199005127, "learning_rate": 2e-05, "loss": 0.0640914, "step": 8394 }, { "epoch": 16.79, "grad_norm": 1.2635961771011353, "learning_rate": 2e-05, "loss": 0.02474283, "step": 8395 }, { "epoch": 16.792, "grad_norm": 1.305991768836975, "learning_rate": 2e-05, "loss": 0.03481719, "step": 8396 }, { "epoch": 16.794, "grad_norm": 1.7838835716247559, "learning_rate": 2e-05, "loss": 0.04399516, "step": 8397 }, { "epoch": 16.796, "grad_norm": 1.320725917816162, "learning_rate": 2e-05, "loss": 0.04420858, "step": 8398 }, { "epoch": 16.798000000000002, "grad_norm": 1.0815000534057617, "learning_rate": 2e-05, "loss": 0.0291509, "step": 8399 }, { "epoch": 16.8, "grad_norm": 1.4641239643096924, "learning_rate": 2e-05, "loss": 0.04957747, "step": 8400 }, { "epoch": 16.802, "grad_norm": 0.9528389573097229, "learning_rate": 2e-05, "loss": 0.03332908, "step": 8401 }, { "epoch": 16.804, "grad_norm": 1.1721091270446777, "learning_rate": 2e-05, "loss": 0.04258526, "step": 8402 }, { "epoch": 16.806, "grad_norm": 6.899510383605957, "learning_rate": 2e-05, "loss": 0.03881, "step": 8403 }, { "epoch": 16.808, "grad_norm": 0.9211033582687378, "learning_rate": 2e-05, "loss": 0.03433126, "step": 8404 }, { "epoch": 16.81, "grad_norm": 1.0985780954360962, "learning_rate": 2e-05, "loss": 0.02993702, "step": 8405 }, { "epoch": 16.812, "grad_norm": 1.7919085025787354, "learning_rate": 2e-05, "loss": 0.03525182, "step": 8406 }, { "epoch": 16.814, "grad_norm": 1.00211763381958, "learning_rate": 2e-05, "loss": 0.03410188, "step": 8407 }, { "epoch": 16.816, "grad_norm": 0.8632226586341858, "learning_rate": 2e-05, "loss": 0.02560495, "step": 8408 }, { "epoch": 16.818, "grad_norm": 1.3063429594039917, "learning_rate": 2e-05, "loss": 0.04504444, "step": 8409 }, { "epoch": 16.82, "grad_norm": 0.8369211554527283, "learning_rate": 2e-05, "loss": 0.02775983, "step": 8410 }, { "epoch": 16.822, "grad_norm": 1.2283343076705933, "learning_rate": 2e-05, "loss": 0.03955343, "step": 8411 }, { "epoch": 16.824, "grad_norm": 1.6909712553024292, "learning_rate": 2e-05, "loss": 0.03783029, "step": 8412 }, { "epoch": 16.826, "grad_norm": 1.023411512374878, "learning_rate": 2e-05, "loss": 0.03567769, "step": 8413 }, { "epoch": 16.828, "grad_norm": 0.9995966553688049, "learning_rate": 2e-05, "loss": 0.03603249, "step": 8414 }, { "epoch": 16.83, "grad_norm": 2.203381299972534, "learning_rate": 2e-05, "loss": 0.05770217, "step": 8415 }, { "epoch": 16.832, "grad_norm": 1.1371204853057861, "learning_rate": 2e-05, "loss": 0.04010132, "step": 8416 }, { "epoch": 16.834, "grad_norm": 1.400772213935852, "learning_rate": 2e-05, "loss": 0.04290841, "step": 8417 }, { "epoch": 16.836, "grad_norm": 1.9980731010437012, "learning_rate": 2e-05, "loss": 0.03641246, "step": 8418 }, { "epoch": 16.838, "grad_norm": 2.886061906814575, "learning_rate": 2e-05, "loss": 0.04051365, "step": 8419 }, { "epoch": 16.84, "grad_norm": 2.527926206588745, "learning_rate": 2e-05, "loss": 0.04616052, "step": 8420 }, { "epoch": 16.842, "grad_norm": 1.7495914697647095, "learning_rate": 2e-05, "loss": 0.03882325, "step": 8421 }, { "epoch": 16.844, "grad_norm": 1.1820464134216309, "learning_rate": 2e-05, "loss": 0.03474192, "step": 8422 }, { "epoch": 16.846, "grad_norm": 0.8082514405250549, "learning_rate": 2e-05, "loss": 0.02354405, "step": 8423 }, { "epoch": 16.848, "grad_norm": 1.6576683521270752, "learning_rate": 2e-05, "loss": 0.04927855, "step": 8424 }, { "epoch": 16.85, "grad_norm": 1.3754609823226929, "learning_rate": 2e-05, "loss": 0.03757167, "step": 8425 }, { "epoch": 16.852, "grad_norm": 1.379211187362671, "learning_rate": 2e-05, "loss": 0.04781748, "step": 8426 }, { "epoch": 16.854, "grad_norm": 1.8801674842834473, "learning_rate": 2e-05, "loss": 0.03705425, "step": 8427 }, { "epoch": 16.856, "grad_norm": 1.499222993850708, "learning_rate": 2e-05, "loss": 0.03637782, "step": 8428 }, { "epoch": 16.858, "grad_norm": 1.910390019416809, "learning_rate": 2e-05, "loss": 0.03735828, "step": 8429 }, { "epoch": 16.86, "grad_norm": 1.2268376350402832, "learning_rate": 2e-05, "loss": 0.03624657, "step": 8430 }, { "epoch": 16.862, "grad_norm": 1.40660560131073, "learning_rate": 2e-05, "loss": 0.03333897, "step": 8431 }, { "epoch": 16.864, "grad_norm": 1.095402717590332, "learning_rate": 2e-05, "loss": 0.02599231, "step": 8432 }, { "epoch": 16.866, "grad_norm": 2.134478807449341, "learning_rate": 2e-05, "loss": 0.04386649, "step": 8433 }, { "epoch": 16.868, "grad_norm": 1.20742666721344, "learning_rate": 2e-05, "loss": 0.04349579, "step": 8434 }, { "epoch": 16.87, "grad_norm": 1.7173153162002563, "learning_rate": 2e-05, "loss": 0.05224491, "step": 8435 }, { "epoch": 16.872, "grad_norm": 1.1482794284820557, "learning_rate": 2e-05, "loss": 0.03536678, "step": 8436 }, { "epoch": 16.874, "grad_norm": 1.2016712427139282, "learning_rate": 2e-05, "loss": 0.03922977, "step": 8437 }, { "epoch": 16.876, "grad_norm": 1.3894551992416382, "learning_rate": 2e-05, "loss": 0.03573563, "step": 8438 }, { "epoch": 16.878, "grad_norm": 1.3940871953964233, "learning_rate": 2e-05, "loss": 0.04417897, "step": 8439 }, { "epoch": 16.88, "grad_norm": 1.3938254117965698, "learning_rate": 2e-05, "loss": 0.04145826, "step": 8440 }, { "epoch": 16.882, "grad_norm": 0.8536736965179443, "learning_rate": 2e-05, "loss": 0.02872265, "step": 8441 }, { "epoch": 16.884, "grad_norm": 0.8990205526351929, "learning_rate": 2e-05, "loss": 0.02876249, "step": 8442 }, { "epoch": 16.886, "grad_norm": 1.3786355257034302, "learning_rate": 2e-05, "loss": 0.04285476, "step": 8443 }, { "epoch": 16.888, "grad_norm": 1.0812320709228516, "learning_rate": 2e-05, "loss": 0.04131185, "step": 8444 }, { "epoch": 16.89, "grad_norm": 2.2041189670562744, "learning_rate": 2e-05, "loss": 0.05553081, "step": 8445 }, { "epoch": 16.892, "grad_norm": 0.9135197997093201, "learning_rate": 2e-05, "loss": 0.03385389, "step": 8446 }, { "epoch": 16.894, "grad_norm": 1.6203449964523315, "learning_rate": 2e-05, "loss": 0.0512885, "step": 8447 }, { "epoch": 16.896, "grad_norm": 2.008570909500122, "learning_rate": 2e-05, "loss": 0.0456307, "step": 8448 }, { "epoch": 16.898, "grad_norm": 1.0906918048858643, "learning_rate": 2e-05, "loss": 0.03587713, "step": 8449 }, { "epoch": 16.9, "grad_norm": 1.2898845672607422, "learning_rate": 2e-05, "loss": 0.04015197, "step": 8450 }, { "epoch": 16.902, "grad_norm": 1.479416012763977, "learning_rate": 2e-05, "loss": 0.0416062, "step": 8451 }, { "epoch": 16.904, "grad_norm": 1.684714913368225, "learning_rate": 2e-05, "loss": 0.04837813, "step": 8452 }, { "epoch": 16.906, "grad_norm": 1.2235033512115479, "learning_rate": 2e-05, "loss": 0.0416484, "step": 8453 }, { "epoch": 16.908, "grad_norm": 2.3907930850982666, "learning_rate": 2e-05, "loss": 0.03819187, "step": 8454 }, { "epoch": 16.91, "grad_norm": 1.285926103591919, "learning_rate": 2e-05, "loss": 0.04122959, "step": 8455 }, { "epoch": 16.912, "grad_norm": 1.177568793296814, "learning_rate": 2e-05, "loss": 0.02856367, "step": 8456 }, { "epoch": 16.914, "grad_norm": 1.9537116289138794, "learning_rate": 2e-05, "loss": 0.05142844, "step": 8457 }, { "epoch": 16.916, "grad_norm": 1.0300110578536987, "learning_rate": 2e-05, "loss": 0.03042921, "step": 8458 }, { "epoch": 16.918, "grad_norm": 1.1383572816848755, "learning_rate": 2e-05, "loss": 0.04310068, "step": 8459 }, { "epoch": 16.92, "grad_norm": 1.0624971389770508, "learning_rate": 2e-05, "loss": 0.03597179, "step": 8460 }, { "epoch": 16.922, "grad_norm": 1.5233795642852783, "learning_rate": 2e-05, "loss": 0.03848537, "step": 8461 }, { "epoch": 16.924, "grad_norm": 1.3909246921539307, "learning_rate": 2e-05, "loss": 0.03891563, "step": 8462 }, { "epoch": 16.926, "grad_norm": 0.9473965764045715, "learning_rate": 2e-05, "loss": 0.03634793, "step": 8463 }, { "epoch": 16.928, "grad_norm": 1.0389978885650635, "learning_rate": 2e-05, "loss": 0.04411847, "step": 8464 }, { "epoch": 16.93, "grad_norm": 0.998310387134552, "learning_rate": 2e-05, "loss": 0.03203969, "step": 8465 }, { "epoch": 16.932, "grad_norm": 1.1375733613967896, "learning_rate": 2e-05, "loss": 0.02510815, "step": 8466 }, { "epoch": 16.934, "grad_norm": 0.9891758561134338, "learning_rate": 2e-05, "loss": 0.03282684, "step": 8467 }, { "epoch": 16.936, "grad_norm": 1.2913753986358643, "learning_rate": 2e-05, "loss": 0.03847658, "step": 8468 }, { "epoch": 16.938, "grad_norm": 1.1569929122924805, "learning_rate": 2e-05, "loss": 0.0367549, "step": 8469 }, { "epoch": 16.94, "grad_norm": 1.4667600393295288, "learning_rate": 2e-05, "loss": 0.04367553, "step": 8470 }, { "epoch": 16.942, "grad_norm": 1.4492965936660767, "learning_rate": 2e-05, "loss": 0.03391558, "step": 8471 }, { "epoch": 16.944, "grad_norm": 1.4032447338104248, "learning_rate": 2e-05, "loss": 0.0395061, "step": 8472 }, { "epoch": 16.946, "grad_norm": 1.0054399967193604, "learning_rate": 2e-05, "loss": 0.03755559, "step": 8473 }, { "epoch": 16.948, "grad_norm": 1.1688107252120972, "learning_rate": 2e-05, "loss": 0.03690596, "step": 8474 }, { "epoch": 16.95, "grad_norm": 1.5043078660964966, "learning_rate": 2e-05, "loss": 0.04541836, "step": 8475 }, { "epoch": 16.951999999999998, "grad_norm": 1.0883275270462036, "learning_rate": 2e-05, "loss": 0.02984718, "step": 8476 }, { "epoch": 16.954, "grad_norm": 1.0278596878051758, "learning_rate": 2e-05, "loss": 0.0365, "step": 8477 }, { "epoch": 16.956, "grad_norm": 1.2338298559188843, "learning_rate": 2e-05, "loss": 0.04969736, "step": 8478 }, { "epoch": 16.958, "grad_norm": 0.9998340606689453, "learning_rate": 2e-05, "loss": 0.02545092, "step": 8479 }, { "epoch": 16.96, "grad_norm": 1.6648941040039062, "learning_rate": 2e-05, "loss": 0.04324679, "step": 8480 }, { "epoch": 16.962, "grad_norm": 1.4643219709396362, "learning_rate": 2e-05, "loss": 0.04028212, "step": 8481 }, { "epoch": 16.964, "grad_norm": 0.9894847869873047, "learning_rate": 2e-05, "loss": 0.03162416, "step": 8482 }, { "epoch": 16.966, "grad_norm": 1.1647943258285522, "learning_rate": 2e-05, "loss": 0.03466156, "step": 8483 }, { "epoch": 16.968, "grad_norm": 2.117931604385376, "learning_rate": 2e-05, "loss": 0.05283754, "step": 8484 }, { "epoch": 16.97, "grad_norm": 1.1155112981796265, "learning_rate": 2e-05, "loss": 0.03596318, "step": 8485 }, { "epoch": 16.972, "grad_norm": 1.9771344661712646, "learning_rate": 2e-05, "loss": 0.04496825, "step": 8486 }, { "epoch": 16.974, "grad_norm": 3.534433126449585, "learning_rate": 2e-05, "loss": 0.04280071, "step": 8487 }, { "epoch": 16.976, "grad_norm": 1.0432052612304688, "learning_rate": 2e-05, "loss": 0.03826564, "step": 8488 }, { "epoch": 16.978, "grad_norm": 0.984004557132721, "learning_rate": 2e-05, "loss": 0.03916224, "step": 8489 }, { "epoch": 16.98, "grad_norm": 1.8574531078338623, "learning_rate": 2e-05, "loss": 0.03894024, "step": 8490 }, { "epoch": 16.982, "grad_norm": 1.009029507637024, "learning_rate": 2e-05, "loss": 0.03786684, "step": 8491 }, { "epoch": 16.984, "grad_norm": 1.1529853343963623, "learning_rate": 2e-05, "loss": 0.04299698, "step": 8492 }, { "epoch": 16.986, "grad_norm": 1.2341350317001343, "learning_rate": 2e-05, "loss": 0.04567091, "step": 8493 }, { "epoch": 16.988, "grad_norm": 1.42328679561615, "learning_rate": 2e-05, "loss": 0.03224155, "step": 8494 }, { "epoch": 16.99, "grad_norm": 1.0303561687469482, "learning_rate": 2e-05, "loss": 0.03220159, "step": 8495 }, { "epoch": 16.992, "grad_norm": 1.0415387153625488, "learning_rate": 2e-05, "loss": 0.03420278, "step": 8496 }, { "epoch": 16.994, "grad_norm": 1.2398531436920166, "learning_rate": 2e-05, "loss": 0.04353315, "step": 8497 }, { "epoch": 16.996, "grad_norm": 1.6628758907318115, "learning_rate": 2e-05, "loss": 0.03730056, "step": 8498 }, { "epoch": 16.998, "grad_norm": 1.648309588432312, "learning_rate": 2e-05, "loss": 0.03792779, "step": 8499 }, { "epoch": 17.0, "grad_norm": 0.9928292036056519, "learning_rate": 2e-05, "loss": 0.03296513, "step": 8500 }, { "epoch": 17.0, "eval_performance": { "AngleClassification_1": 0.992, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9800399201596807, "Equal_1": 1.0, "Equal_2": 0.9720558882235529, "Equal_3": 0.9421157684630739, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.968, "Perpendicular_1": 0.998, "Perpendicular_2": 0.986, "Perpendicular_3": 0.8056112224448898, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9852000000000001, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 224.675, "eval_samples_per_second": 46.734, "eval_steps_per_second": 0.935, "step": 8500 }, { "epoch": 17.002, "grad_norm": 1.5359245538711548, "learning_rate": 2e-05, "loss": 0.05671161, "step": 8501 }, { "epoch": 17.004, "grad_norm": 1.0819002389907837, "learning_rate": 2e-05, "loss": 0.03144156, "step": 8502 }, { "epoch": 17.006, "grad_norm": 1.1577181816101074, "learning_rate": 2e-05, "loss": 0.0360764, "step": 8503 }, { "epoch": 17.008, "grad_norm": 1.027273178100586, "learning_rate": 2e-05, "loss": 0.03432962, "step": 8504 }, { "epoch": 17.01, "grad_norm": 1.0032328367233276, "learning_rate": 2e-05, "loss": 0.03250239, "step": 8505 }, { "epoch": 17.012, "grad_norm": 1.1186151504516602, "learning_rate": 2e-05, "loss": 0.03491692, "step": 8506 }, { "epoch": 17.014, "grad_norm": 1.2863689661026, "learning_rate": 2e-05, "loss": 0.04010284, "step": 8507 }, { "epoch": 17.016, "grad_norm": 1.4958665370941162, "learning_rate": 2e-05, "loss": 0.0394282, "step": 8508 }, { "epoch": 17.018, "grad_norm": 1.2980878353118896, "learning_rate": 2e-05, "loss": 0.03610181, "step": 8509 }, { "epoch": 17.02, "grad_norm": 0.9921215176582336, "learning_rate": 2e-05, "loss": 0.03617042, "step": 8510 }, { "epoch": 17.022, "grad_norm": 1.4588276147842407, "learning_rate": 2e-05, "loss": 0.03630994, "step": 8511 }, { "epoch": 17.024, "grad_norm": 0.8500035405158997, "learning_rate": 2e-05, "loss": 0.02948019, "step": 8512 }, { "epoch": 17.026, "grad_norm": 1.3919631242752075, "learning_rate": 2e-05, "loss": 0.0459616, "step": 8513 }, { "epoch": 17.028, "grad_norm": 0.9129427075386047, "learning_rate": 2e-05, "loss": 0.03199217, "step": 8514 }, { "epoch": 17.03, "grad_norm": 1.1024483442306519, "learning_rate": 2e-05, "loss": 0.03428885, "step": 8515 }, { "epoch": 17.032, "grad_norm": 2.4011003971099854, "learning_rate": 2e-05, "loss": 0.04503044, "step": 8516 }, { "epoch": 17.034, "grad_norm": 1.7610011100769043, "learning_rate": 2e-05, "loss": 0.04756741, "step": 8517 }, { "epoch": 17.036, "grad_norm": 0.8797142505645752, "learning_rate": 2e-05, "loss": 0.03244509, "step": 8518 }, { "epoch": 17.038, "grad_norm": 4.1719841957092285, "learning_rate": 2e-05, "loss": 0.0382235, "step": 8519 }, { "epoch": 17.04, "grad_norm": 1.2462174892425537, "learning_rate": 2e-05, "loss": 0.02870779, "step": 8520 }, { "epoch": 17.042, "grad_norm": 1.8371191024780273, "learning_rate": 2e-05, "loss": 0.04556612, "step": 8521 }, { "epoch": 17.044, "grad_norm": 1.719848394393921, "learning_rate": 2e-05, "loss": 0.04793326, "step": 8522 }, { "epoch": 17.046, "grad_norm": 1.2223604917526245, "learning_rate": 2e-05, "loss": 0.0482173, "step": 8523 }, { "epoch": 17.048, "grad_norm": 1.471356987953186, "learning_rate": 2e-05, "loss": 0.04068169, "step": 8524 }, { "epoch": 17.05, "grad_norm": 1.1412619352340698, "learning_rate": 2e-05, "loss": 0.03082416, "step": 8525 }, { "epoch": 17.052, "grad_norm": 1.2751214504241943, "learning_rate": 2e-05, "loss": 0.03872452, "step": 8526 }, { "epoch": 17.054, "grad_norm": 1.5205154418945312, "learning_rate": 2e-05, "loss": 0.04934125, "step": 8527 }, { "epoch": 17.056, "grad_norm": 0.7746216654777527, "learning_rate": 2e-05, "loss": 0.02595963, "step": 8528 }, { "epoch": 17.058, "grad_norm": 0.9941741824150085, "learning_rate": 2e-05, "loss": 0.03169616, "step": 8529 }, { "epoch": 17.06, "grad_norm": 1.0125973224639893, "learning_rate": 2e-05, "loss": 0.03136482, "step": 8530 }, { "epoch": 17.062, "grad_norm": 1.6693202257156372, "learning_rate": 2e-05, "loss": 0.05287129, "step": 8531 }, { "epoch": 17.064, "grad_norm": 1.0966691970825195, "learning_rate": 2e-05, "loss": 0.04766946, "step": 8532 }, { "epoch": 17.066, "grad_norm": 1.5236393213272095, "learning_rate": 2e-05, "loss": 0.0366102, "step": 8533 }, { "epoch": 17.068, "grad_norm": 1.4767037630081177, "learning_rate": 2e-05, "loss": 0.03293071, "step": 8534 }, { "epoch": 17.07, "grad_norm": 1.2356542348861694, "learning_rate": 2e-05, "loss": 0.04590891, "step": 8535 }, { "epoch": 17.072, "grad_norm": 1.7014425992965698, "learning_rate": 2e-05, "loss": 0.05307303, "step": 8536 }, { "epoch": 17.074, "grad_norm": 2.0473504066467285, "learning_rate": 2e-05, "loss": 0.05347956, "step": 8537 }, { "epoch": 17.076, "grad_norm": 1.637966275215149, "learning_rate": 2e-05, "loss": 0.04445482, "step": 8538 }, { "epoch": 17.078, "grad_norm": 1.6850576400756836, "learning_rate": 2e-05, "loss": 0.03858908, "step": 8539 }, { "epoch": 17.08, "grad_norm": 1.4971059560775757, "learning_rate": 2e-05, "loss": 0.04155015, "step": 8540 }, { "epoch": 17.082, "grad_norm": 1.4756637811660767, "learning_rate": 2e-05, "loss": 0.03561596, "step": 8541 }, { "epoch": 17.084, "grad_norm": 1.5668524503707886, "learning_rate": 2e-05, "loss": 0.03507215, "step": 8542 }, { "epoch": 17.086, "grad_norm": 1.2801655530929565, "learning_rate": 2e-05, "loss": 0.0293637, "step": 8543 }, { "epoch": 17.088, "grad_norm": 1.1081616878509521, "learning_rate": 2e-05, "loss": 0.03186993, "step": 8544 }, { "epoch": 17.09, "grad_norm": 1.3080055713653564, "learning_rate": 2e-05, "loss": 0.04235549, "step": 8545 }, { "epoch": 17.092, "grad_norm": 0.9470260739326477, "learning_rate": 2e-05, "loss": 0.03442793, "step": 8546 }, { "epoch": 17.094, "grad_norm": 1.1802936792373657, "learning_rate": 2e-05, "loss": 0.03714792, "step": 8547 }, { "epoch": 17.096, "grad_norm": 1.807731032371521, "learning_rate": 2e-05, "loss": 0.04230053, "step": 8548 }, { "epoch": 17.098, "grad_norm": 1.4350210428237915, "learning_rate": 2e-05, "loss": 0.04914776, "step": 8549 }, { "epoch": 17.1, "grad_norm": 0.9851990938186646, "learning_rate": 2e-05, "loss": 0.02804206, "step": 8550 }, { "epoch": 17.102, "grad_norm": 1.2642531394958496, "learning_rate": 2e-05, "loss": 0.04768068, "step": 8551 }, { "epoch": 17.104, "grad_norm": 1.1797047853469849, "learning_rate": 2e-05, "loss": 0.04758175, "step": 8552 }, { "epoch": 17.106, "grad_norm": 1.6947985887527466, "learning_rate": 2e-05, "loss": 0.04096333, "step": 8553 }, { "epoch": 17.108, "grad_norm": 1.112533450126648, "learning_rate": 2e-05, "loss": 0.04062056, "step": 8554 }, { "epoch": 17.11, "grad_norm": 1.6702601909637451, "learning_rate": 2e-05, "loss": 0.04908922, "step": 8555 }, { "epoch": 17.112, "grad_norm": 1.363795280456543, "learning_rate": 2e-05, "loss": 0.04686725, "step": 8556 }, { "epoch": 17.114, "grad_norm": 1.6973438262939453, "learning_rate": 2e-05, "loss": 0.04548879, "step": 8557 }, { "epoch": 17.116, "grad_norm": 1.5676871538162231, "learning_rate": 2e-05, "loss": 0.04950342, "step": 8558 }, { "epoch": 17.118, "grad_norm": 3.8536858558654785, "learning_rate": 2e-05, "loss": 0.03215913, "step": 8559 }, { "epoch": 17.12, "grad_norm": 1.0589704513549805, "learning_rate": 2e-05, "loss": 0.03114047, "step": 8560 }, { "epoch": 17.122, "grad_norm": 1.477299451828003, "learning_rate": 2e-05, "loss": 0.05461933, "step": 8561 }, { "epoch": 17.124, "grad_norm": 0.8978338837623596, "learning_rate": 2e-05, "loss": 0.03086586, "step": 8562 }, { "epoch": 17.126, "grad_norm": 1.019030213356018, "learning_rate": 2e-05, "loss": 0.0387928, "step": 8563 }, { "epoch": 17.128, "grad_norm": 1.5825538635253906, "learning_rate": 2e-05, "loss": 0.0439937, "step": 8564 }, { "epoch": 17.13, "grad_norm": 1.0221093893051147, "learning_rate": 2e-05, "loss": 0.03445491, "step": 8565 }, { "epoch": 17.132, "grad_norm": 1.4619035720825195, "learning_rate": 2e-05, "loss": 0.0406983, "step": 8566 }, { "epoch": 17.134, "grad_norm": 1.3383300304412842, "learning_rate": 2e-05, "loss": 0.04617282, "step": 8567 }, { "epoch": 17.136, "grad_norm": 1.0340158939361572, "learning_rate": 2e-05, "loss": 0.03375333, "step": 8568 }, { "epoch": 17.138, "grad_norm": 1.8310198783874512, "learning_rate": 2e-05, "loss": 0.05561754, "step": 8569 }, { "epoch": 17.14, "grad_norm": 1.751508116722107, "learning_rate": 2e-05, "loss": 0.04880706, "step": 8570 }, { "epoch": 17.142, "grad_norm": 0.790224015712738, "learning_rate": 2e-05, "loss": 0.02321922, "step": 8571 }, { "epoch": 17.144, "grad_norm": 0.9479782581329346, "learning_rate": 2e-05, "loss": 0.03536796, "step": 8572 }, { "epoch": 17.146, "grad_norm": 0.9049095511436462, "learning_rate": 2e-05, "loss": 0.03191496, "step": 8573 }, { "epoch": 17.148, "grad_norm": 0.8870818614959717, "learning_rate": 2e-05, "loss": 0.0257305, "step": 8574 }, { "epoch": 17.15, "grad_norm": 0.8961000442504883, "learning_rate": 2e-05, "loss": 0.03139597, "step": 8575 }, { "epoch": 17.152, "grad_norm": 1.450302004814148, "learning_rate": 2e-05, "loss": 0.0425199, "step": 8576 }, { "epoch": 17.154, "grad_norm": 0.8010420203208923, "learning_rate": 2e-05, "loss": 0.02336349, "step": 8577 }, { "epoch": 17.156, "grad_norm": 1.380361557006836, "learning_rate": 2e-05, "loss": 0.04118584, "step": 8578 }, { "epoch": 17.158, "grad_norm": 1.7118769884109497, "learning_rate": 2e-05, "loss": 0.04124471, "step": 8579 }, { "epoch": 17.16, "grad_norm": 1.4042177200317383, "learning_rate": 2e-05, "loss": 0.02495999, "step": 8580 }, { "epoch": 17.162, "grad_norm": 1.469964861869812, "learning_rate": 2e-05, "loss": 0.03213714, "step": 8581 }, { "epoch": 17.164, "grad_norm": 1.2877546548843384, "learning_rate": 2e-05, "loss": 0.04177179, "step": 8582 }, { "epoch": 17.166, "grad_norm": 1.1574366092681885, "learning_rate": 2e-05, "loss": 0.05359238, "step": 8583 }, { "epoch": 17.168, "grad_norm": 0.9044530987739563, "learning_rate": 2e-05, "loss": 0.03078216, "step": 8584 }, { "epoch": 17.17, "grad_norm": 1.222913384437561, "learning_rate": 2e-05, "loss": 0.03048852, "step": 8585 }, { "epoch": 17.172, "grad_norm": 1.726912260055542, "learning_rate": 2e-05, "loss": 0.03875314, "step": 8586 }, { "epoch": 17.174, "grad_norm": 1.3017159700393677, "learning_rate": 2e-05, "loss": 0.03192435, "step": 8587 }, { "epoch": 17.176, "grad_norm": 0.9921463131904602, "learning_rate": 2e-05, "loss": 0.04696386, "step": 8588 }, { "epoch": 17.178, "grad_norm": 1.2307310104370117, "learning_rate": 2e-05, "loss": 0.04169056, "step": 8589 }, { "epoch": 17.18, "grad_norm": 1.5270488262176514, "learning_rate": 2e-05, "loss": 0.04758389, "step": 8590 }, { "epoch": 17.182, "grad_norm": 1.4675136804580688, "learning_rate": 2e-05, "loss": 0.04742341, "step": 8591 }, { "epoch": 17.184, "grad_norm": 1.0483239889144897, "learning_rate": 2e-05, "loss": 0.0411918, "step": 8592 }, { "epoch": 17.186, "grad_norm": 2.392256021499634, "learning_rate": 2e-05, "loss": 0.04580018, "step": 8593 }, { "epoch": 17.188, "grad_norm": 1.1972886323928833, "learning_rate": 2e-05, "loss": 0.04364569, "step": 8594 }, { "epoch": 17.19, "grad_norm": 1.207067847251892, "learning_rate": 2e-05, "loss": 0.03662961, "step": 8595 }, { "epoch": 17.192, "grad_norm": 1.2904925346374512, "learning_rate": 2e-05, "loss": 0.04472976, "step": 8596 }, { "epoch": 17.194, "grad_norm": 1.3027197122573853, "learning_rate": 2e-05, "loss": 0.04618686, "step": 8597 }, { "epoch": 17.196, "grad_norm": 1.3695553541183472, "learning_rate": 2e-05, "loss": 0.03824482, "step": 8598 }, { "epoch": 17.198, "grad_norm": 1.132347583770752, "learning_rate": 2e-05, "loss": 0.03607019, "step": 8599 }, { "epoch": 17.2, "grad_norm": 1.3135677576065063, "learning_rate": 2e-05, "loss": 0.03511221, "step": 8600 }, { "epoch": 17.202, "grad_norm": 1.5953634977340698, "learning_rate": 2e-05, "loss": 0.04552853, "step": 8601 }, { "epoch": 17.204, "grad_norm": 1.1461580991744995, "learning_rate": 2e-05, "loss": 0.03521287, "step": 8602 }, { "epoch": 17.206, "grad_norm": 1.0003410577774048, "learning_rate": 2e-05, "loss": 0.03447621, "step": 8603 }, { "epoch": 17.208, "grad_norm": 1.228497862815857, "learning_rate": 2e-05, "loss": 0.03070836, "step": 8604 }, { "epoch": 17.21, "grad_norm": 1.393392562866211, "learning_rate": 2e-05, "loss": 0.03640123, "step": 8605 }, { "epoch": 17.212, "grad_norm": 2.2513628005981445, "learning_rate": 2e-05, "loss": 0.0693409, "step": 8606 }, { "epoch": 17.214, "grad_norm": 1.7178155183792114, "learning_rate": 2e-05, "loss": 0.04885394, "step": 8607 }, { "epoch": 17.216, "grad_norm": 1.4733831882476807, "learning_rate": 2e-05, "loss": 0.04129115, "step": 8608 }, { "epoch": 17.218, "grad_norm": 1.5989679098129272, "learning_rate": 2e-05, "loss": 0.04948647, "step": 8609 }, { "epoch": 17.22, "grad_norm": 2.34555983543396, "learning_rate": 2e-05, "loss": 0.05427173, "step": 8610 }, { "epoch": 17.222, "grad_norm": 1.772535800933838, "learning_rate": 2e-05, "loss": 0.03654601, "step": 8611 }, { "epoch": 17.224, "grad_norm": 1.5619287490844727, "learning_rate": 2e-05, "loss": 0.04149141, "step": 8612 }, { "epoch": 17.226, "grad_norm": 2.2589328289031982, "learning_rate": 2e-05, "loss": 0.04871174, "step": 8613 }, { "epoch": 17.228, "grad_norm": 1.3272687196731567, "learning_rate": 2e-05, "loss": 0.04861603, "step": 8614 }, { "epoch": 17.23, "grad_norm": 1.4670130014419556, "learning_rate": 2e-05, "loss": 0.04482318, "step": 8615 }, { "epoch": 17.232, "grad_norm": 0.79426109790802, "learning_rate": 2e-05, "loss": 0.03222965, "step": 8616 }, { "epoch": 17.234, "grad_norm": 1.3933720588684082, "learning_rate": 2e-05, "loss": 0.03937185, "step": 8617 }, { "epoch": 17.236, "grad_norm": 1.5846929550170898, "learning_rate": 2e-05, "loss": 0.04592674, "step": 8618 }, { "epoch": 17.238, "grad_norm": 3.0722763538360596, "learning_rate": 2e-05, "loss": 0.05167246, "step": 8619 }, { "epoch": 17.24, "grad_norm": 2.039022207260132, "learning_rate": 2e-05, "loss": 0.04853941, "step": 8620 }, { "epoch": 17.242, "grad_norm": 1.1206055879592896, "learning_rate": 2e-05, "loss": 0.03702849, "step": 8621 }, { "epoch": 17.244, "grad_norm": 0.754891037940979, "learning_rate": 2e-05, "loss": 0.02432076, "step": 8622 }, { "epoch": 17.246, "grad_norm": 1.6215736865997314, "learning_rate": 2e-05, "loss": 0.04649737, "step": 8623 }, { "epoch": 17.248, "grad_norm": 0.9462254643440247, "learning_rate": 2e-05, "loss": 0.02921049, "step": 8624 }, { "epoch": 17.25, "grad_norm": 1.0046939849853516, "learning_rate": 2e-05, "loss": 0.03344686, "step": 8625 }, { "epoch": 17.252, "grad_norm": 1.1843231916427612, "learning_rate": 2e-05, "loss": 0.03827581, "step": 8626 }, { "epoch": 17.254, "grad_norm": 1.1492016315460205, "learning_rate": 2e-05, "loss": 0.04559443, "step": 8627 }, { "epoch": 17.256, "grad_norm": 0.8264913558959961, "learning_rate": 2e-05, "loss": 0.02462912, "step": 8628 }, { "epoch": 17.258, "grad_norm": 0.999622642993927, "learning_rate": 2e-05, "loss": 0.03232847, "step": 8629 }, { "epoch": 17.26, "grad_norm": 1.0850486755371094, "learning_rate": 2e-05, "loss": 0.04395636, "step": 8630 }, { "epoch": 17.262, "grad_norm": 0.9636176228523254, "learning_rate": 2e-05, "loss": 0.03554864, "step": 8631 }, { "epoch": 17.264, "grad_norm": 1.4041770696640015, "learning_rate": 2e-05, "loss": 0.04400384, "step": 8632 }, { "epoch": 17.266, "grad_norm": 1.1981825828552246, "learning_rate": 2e-05, "loss": 0.03959544, "step": 8633 }, { "epoch": 17.268, "grad_norm": 0.9154717326164246, "learning_rate": 2e-05, "loss": 0.03228838, "step": 8634 }, { "epoch": 17.27, "grad_norm": 1.599541425704956, "learning_rate": 2e-05, "loss": 0.04345845, "step": 8635 }, { "epoch": 17.272, "grad_norm": 1.3427467346191406, "learning_rate": 2e-05, "loss": 0.04242849, "step": 8636 }, { "epoch": 17.274, "grad_norm": 1.09714937210083, "learning_rate": 2e-05, "loss": 0.03136939, "step": 8637 }, { "epoch": 17.276, "grad_norm": 0.9319362640380859, "learning_rate": 2e-05, "loss": 0.03076743, "step": 8638 }, { "epoch": 17.278, "grad_norm": 1.5905580520629883, "learning_rate": 2e-05, "loss": 0.0551797, "step": 8639 }, { "epoch": 17.28, "grad_norm": 1.0840466022491455, "learning_rate": 2e-05, "loss": 0.03980708, "step": 8640 }, { "epoch": 17.282, "grad_norm": 1.0239530801773071, "learning_rate": 2e-05, "loss": 0.02846963, "step": 8641 }, { "epoch": 17.284, "grad_norm": 2.967784881591797, "learning_rate": 2e-05, "loss": 0.04059572, "step": 8642 }, { "epoch": 17.286, "grad_norm": 0.8498859405517578, "learning_rate": 2e-05, "loss": 0.02425959, "step": 8643 }, { "epoch": 17.288, "grad_norm": 0.8297300338745117, "learning_rate": 2e-05, "loss": 0.02112471, "step": 8644 }, { "epoch": 17.29, "grad_norm": 1.2205857038497925, "learning_rate": 2e-05, "loss": 0.03408385, "step": 8645 }, { "epoch": 17.292, "grad_norm": 1.3790645599365234, "learning_rate": 2e-05, "loss": 0.03867252, "step": 8646 }, { "epoch": 17.294, "grad_norm": 1.2994039058685303, "learning_rate": 2e-05, "loss": 0.04984041, "step": 8647 }, { "epoch": 17.296, "grad_norm": 0.8615119457244873, "learning_rate": 2e-05, "loss": 0.03654513, "step": 8648 }, { "epoch": 17.298, "grad_norm": 0.9678096771240234, "learning_rate": 2e-05, "loss": 0.03164845, "step": 8649 }, { "epoch": 17.3, "grad_norm": 1.1525732278823853, "learning_rate": 2e-05, "loss": 0.03211594, "step": 8650 }, { "epoch": 17.302, "grad_norm": 1.0735673904418945, "learning_rate": 2e-05, "loss": 0.03395882, "step": 8651 }, { "epoch": 17.304, "grad_norm": 2.3253841400146484, "learning_rate": 2e-05, "loss": 0.05617633, "step": 8652 }, { "epoch": 17.306, "grad_norm": 0.997426450252533, "learning_rate": 2e-05, "loss": 0.04420609, "step": 8653 }, { "epoch": 17.308, "grad_norm": 1.2284607887268066, "learning_rate": 2e-05, "loss": 0.04732598, "step": 8654 }, { "epoch": 17.31, "grad_norm": 1.2906852960586548, "learning_rate": 2e-05, "loss": 0.0439896, "step": 8655 }, { "epoch": 17.312, "grad_norm": 1.703698754310608, "learning_rate": 2e-05, "loss": 0.05111379, "step": 8656 }, { "epoch": 17.314, "grad_norm": 1.282839298248291, "learning_rate": 2e-05, "loss": 0.03477667, "step": 8657 }, { "epoch": 17.316, "grad_norm": 1.2357392311096191, "learning_rate": 2e-05, "loss": 0.04413423, "step": 8658 }, { "epoch": 17.318, "grad_norm": 1.3249421119689941, "learning_rate": 2e-05, "loss": 0.03586092, "step": 8659 }, { "epoch": 17.32, "grad_norm": 4.587998867034912, "learning_rate": 2e-05, "loss": 0.0535292, "step": 8660 }, { "epoch": 17.322, "grad_norm": 1.1066759824752808, "learning_rate": 2e-05, "loss": 0.03603195, "step": 8661 }, { "epoch": 17.324, "grad_norm": 1.246421456336975, "learning_rate": 2e-05, "loss": 0.04933175, "step": 8662 }, { "epoch": 17.326, "grad_norm": 1.2832732200622559, "learning_rate": 2e-05, "loss": 0.04066588, "step": 8663 }, { "epoch": 17.328, "grad_norm": 1.3204326629638672, "learning_rate": 2e-05, "loss": 0.04957343, "step": 8664 }, { "epoch": 17.33, "grad_norm": 1.1169078350067139, "learning_rate": 2e-05, "loss": 0.02344235, "step": 8665 }, { "epoch": 17.332, "grad_norm": 1.4164308309555054, "learning_rate": 2e-05, "loss": 0.03812492, "step": 8666 }, { "epoch": 17.334, "grad_norm": 1.2576162815093994, "learning_rate": 2e-05, "loss": 0.04109219, "step": 8667 }, { "epoch": 17.336, "grad_norm": 2.6111488342285156, "learning_rate": 2e-05, "loss": 0.04867829, "step": 8668 }, { "epoch": 17.338, "grad_norm": 1.2373993396759033, "learning_rate": 2e-05, "loss": 0.04425323, "step": 8669 }, { "epoch": 17.34, "grad_norm": 1.303887128829956, "learning_rate": 2e-05, "loss": 0.04141787, "step": 8670 }, { "epoch": 17.342, "grad_norm": 0.7339890003204346, "learning_rate": 2e-05, "loss": 0.02217896, "step": 8671 }, { "epoch": 17.344, "grad_norm": 1.0050698518753052, "learning_rate": 2e-05, "loss": 0.03489015, "step": 8672 }, { "epoch": 17.346, "grad_norm": 1.595320224761963, "learning_rate": 2e-05, "loss": 0.04613564, "step": 8673 }, { "epoch": 17.348, "grad_norm": 2.3125736713409424, "learning_rate": 2e-05, "loss": 0.05707267, "step": 8674 }, { "epoch": 17.35, "grad_norm": 1.2593269348144531, "learning_rate": 2e-05, "loss": 0.04003227, "step": 8675 }, { "epoch": 17.352, "grad_norm": 1.4623792171478271, "learning_rate": 2e-05, "loss": 0.04562994, "step": 8676 }, { "epoch": 17.354, "grad_norm": 1.3954448699951172, "learning_rate": 2e-05, "loss": 0.04902184, "step": 8677 }, { "epoch": 17.356, "grad_norm": 1.0105029344558716, "learning_rate": 2e-05, "loss": 0.03931555, "step": 8678 }, { "epoch": 17.358, "grad_norm": 1.3040993213653564, "learning_rate": 2e-05, "loss": 0.04138822, "step": 8679 }, { "epoch": 17.36, "grad_norm": 1.165553331375122, "learning_rate": 2e-05, "loss": 0.04625338, "step": 8680 }, { "epoch": 17.362, "grad_norm": 1.3289366960525513, "learning_rate": 2e-05, "loss": 0.0535622, "step": 8681 }, { "epoch": 17.364, "grad_norm": 0.9976545572280884, "learning_rate": 2e-05, "loss": 0.03734158, "step": 8682 }, { "epoch": 17.366, "grad_norm": 1.734401822090149, "learning_rate": 2e-05, "loss": 0.05232745, "step": 8683 }, { "epoch": 17.368, "grad_norm": 0.8954886198043823, "learning_rate": 2e-05, "loss": 0.03906727, "step": 8684 }, { "epoch": 17.37, "grad_norm": 1.2380257844924927, "learning_rate": 2e-05, "loss": 0.043408, "step": 8685 }, { "epoch": 17.372, "grad_norm": 1.0980156660079956, "learning_rate": 2e-05, "loss": 0.0367986, "step": 8686 }, { "epoch": 17.374, "grad_norm": 1.0723457336425781, "learning_rate": 2e-05, "loss": 0.03390981, "step": 8687 }, { "epoch": 17.376, "grad_norm": 0.8375378847122192, "learning_rate": 2e-05, "loss": 0.02859962, "step": 8688 }, { "epoch": 17.378, "grad_norm": 0.9571086764335632, "learning_rate": 2e-05, "loss": 0.0336821, "step": 8689 }, { "epoch": 17.38, "grad_norm": 0.8915534019470215, "learning_rate": 2e-05, "loss": 0.03872114, "step": 8690 }, { "epoch": 17.382, "grad_norm": 0.9639217853546143, "learning_rate": 2e-05, "loss": 0.03235034, "step": 8691 }, { "epoch": 17.384, "grad_norm": 1.5173991918563843, "learning_rate": 2e-05, "loss": 0.04852716, "step": 8692 }, { "epoch": 17.386, "grad_norm": 1.1856600046157837, "learning_rate": 2e-05, "loss": 0.04101206, "step": 8693 }, { "epoch": 17.388, "grad_norm": 1.314608097076416, "learning_rate": 2e-05, "loss": 0.04026335, "step": 8694 }, { "epoch": 17.39, "grad_norm": 1.3086051940917969, "learning_rate": 2e-05, "loss": 0.04220086, "step": 8695 }, { "epoch": 17.392, "grad_norm": 3.2224113941192627, "learning_rate": 2e-05, "loss": 0.0493929, "step": 8696 }, { "epoch": 17.394, "grad_norm": 1.146926999092102, "learning_rate": 2e-05, "loss": 0.0448442, "step": 8697 }, { "epoch": 17.396, "grad_norm": 1.0564935207366943, "learning_rate": 2e-05, "loss": 0.03356594, "step": 8698 }, { "epoch": 17.398, "grad_norm": 1.4471290111541748, "learning_rate": 2e-05, "loss": 0.04279934, "step": 8699 }, { "epoch": 17.4, "grad_norm": 2.1449854373931885, "learning_rate": 2e-05, "loss": 0.05847483, "step": 8700 }, { "epoch": 17.402, "grad_norm": 1.1086057424545288, "learning_rate": 2e-05, "loss": 0.04579893, "step": 8701 }, { "epoch": 17.404, "grad_norm": 0.9492136240005493, "learning_rate": 2e-05, "loss": 0.03735118, "step": 8702 }, { "epoch": 17.406, "grad_norm": 1.0751152038574219, "learning_rate": 2e-05, "loss": 0.03839558, "step": 8703 }, { "epoch": 17.408, "grad_norm": 1.668332576751709, "learning_rate": 2e-05, "loss": 0.04653643, "step": 8704 }, { "epoch": 17.41, "grad_norm": 1.0757803916931152, "learning_rate": 2e-05, "loss": 0.02798032, "step": 8705 }, { "epoch": 17.412, "grad_norm": 1.6126792430877686, "learning_rate": 2e-05, "loss": 0.04464233, "step": 8706 }, { "epoch": 17.414, "grad_norm": 0.908248782157898, "learning_rate": 2e-05, "loss": 0.0273521, "step": 8707 }, { "epoch": 17.416, "grad_norm": 0.8748065829277039, "learning_rate": 2e-05, "loss": 0.03034199, "step": 8708 }, { "epoch": 17.418, "grad_norm": 1.784939169883728, "learning_rate": 2e-05, "loss": 0.03804438, "step": 8709 }, { "epoch": 17.42, "grad_norm": 1.6154533624649048, "learning_rate": 2e-05, "loss": 0.06213375, "step": 8710 }, { "epoch": 17.422, "grad_norm": 1.2361748218536377, "learning_rate": 2e-05, "loss": 0.04120607, "step": 8711 }, { "epoch": 17.424, "grad_norm": 1.214632511138916, "learning_rate": 2e-05, "loss": 0.03416174, "step": 8712 }, { "epoch": 17.426, "grad_norm": 1.820543885231018, "learning_rate": 2e-05, "loss": 0.05989226, "step": 8713 }, { "epoch": 17.428, "grad_norm": 2.1264379024505615, "learning_rate": 2e-05, "loss": 0.039062, "step": 8714 }, { "epoch": 17.43, "grad_norm": 1.3278511762619019, "learning_rate": 2e-05, "loss": 0.03967718, "step": 8715 }, { "epoch": 17.432, "grad_norm": 1.0769484043121338, "learning_rate": 2e-05, "loss": 0.02441366, "step": 8716 }, { "epoch": 17.434, "grad_norm": 1.1130462884902954, "learning_rate": 2e-05, "loss": 0.04102819, "step": 8717 }, { "epoch": 17.436, "grad_norm": 1.465743899345398, "learning_rate": 2e-05, "loss": 0.03753132, "step": 8718 }, { "epoch": 17.438, "grad_norm": 1.4395439624786377, "learning_rate": 2e-05, "loss": 0.03411968, "step": 8719 }, { "epoch": 17.44, "grad_norm": 1.0044280290603638, "learning_rate": 2e-05, "loss": 0.02785654, "step": 8720 }, { "epoch": 17.442, "grad_norm": 1.6734334230422974, "learning_rate": 2e-05, "loss": 0.04651678, "step": 8721 }, { "epoch": 17.444, "grad_norm": 1.4639875888824463, "learning_rate": 2e-05, "loss": 0.04295172, "step": 8722 }, { "epoch": 17.446, "grad_norm": 1.1858381032943726, "learning_rate": 2e-05, "loss": 0.04879812, "step": 8723 }, { "epoch": 17.448, "grad_norm": 1.0685664415359497, "learning_rate": 2e-05, "loss": 0.03533549, "step": 8724 }, { "epoch": 17.45, "grad_norm": 0.9514467716217041, "learning_rate": 2e-05, "loss": 0.03018641, "step": 8725 }, { "epoch": 17.452, "grad_norm": 1.306121826171875, "learning_rate": 2e-05, "loss": 0.04384874, "step": 8726 }, { "epoch": 17.454, "grad_norm": 1.8598406314849854, "learning_rate": 2e-05, "loss": 0.03335168, "step": 8727 }, { "epoch": 17.456, "grad_norm": 1.1732486486434937, "learning_rate": 2e-05, "loss": 0.04366779, "step": 8728 }, { "epoch": 17.458, "grad_norm": 1.665764570236206, "learning_rate": 2e-05, "loss": 0.04087559, "step": 8729 }, { "epoch": 17.46, "grad_norm": 1.0640555620193481, "learning_rate": 2e-05, "loss": 0.03340855, "step": 8730 }, { "epoch": 17.462, "grad_norm": 1.245887041091919, "learning_rate": 2e-05, "loss": 0.03726444, "step": 8731 }, { "epoch": 17.464, "grad_norm": 0.983849048614502, "learning_rate": 2e-05, "loss": 0.03766588, "step": 8732 }, { "epoch": 17.466, "grad_norm": 1.142432689666748, "learning_rate": 2e-05, "loss": 0.03255175, "step": 8733 }, { "epoch": 17.468, "grad_norm": 1.0495455265045166, "learning_rate": 2e-05, "loss": 0.04515807, "step": 8734 }, { "epoch": 17.47, "grad_norm": 0.7884633541107178, "learning_rate": 2e-05, "loss": 0.02538252, "step": 8735 }, { "epoch": 17.472, "grad_norm": 1.0561290979385376, "learning_rate": 2e-05, "loss": 0.02514949, "step": 8736 }, { "epoch": 17.474, "grad_norm": 0.990422248840332, "learning_rate": 2e-05, "loss": 0.03782775, "step": 8737 }, { "epoch": 17.476, "grad_norm": 1.1141197681427002, "learning_rate": 2e-05, "loss": 0.04302982, "step": 8738 }, { "epoch": 17.478, "grad_norm": 1.5302205085754395, "learning_rate": 2e-05, "loss": 0.04796479, "step": 8739 }, { "epoch": 17.48, "grad_norm": 1.6050845384597778, "learning_rate": 2e-05, "loss": 0.04221671, "step": 8740 }, { "epoch": 17.482, "grad_norm": 0.8999359011650085, "learning_rate": 2e-05, "loss": 0.03230982, "step": 8741 }, { "epoch": 17.484, "grad_norm": 1.0996882915496826, "learning_rate": 2e-05, "loss": 0.03524046, "step": 8742 }, { "epoch": 17.486, "grad_norm": 1.7287040948867798, "learning_rate": 2e-05, "loss": 0.04654096, "step": 8743 }, { "epoch": 17.488, "grad_norm": 1.0177775621414185, "learning_rate": 2e-05, "loss": 0.03912631, "step": 8744 }, { "epoch": 17.49, "grad_norm": 1.454356074333191, "learning_rate": 2e-05, "loss": 0.06515303, "step": 8745 }, { "epoch": 17.492, "grad_norm": 1.3505653142929077, "learning_rate": 2e-05, "loss": 0.03070353, "step": 8746 }, { "epoch": 17.494, "grad_norm": 1.6107195615768433, "learning_rate": 2e-05, "loss": 0.03938747, "step": 8747 }, { "epoch": 17.496, "grad_norm": 0.9125130772590637, "learning_rate": 2e-05, "loss": 0.03277614, "step": 8748 }, { "epoch": 17.498, "grad_norm": 1.1236929893493652, "learning_rate": 2e-05, "loss": 0.03174346, "step": 8749 }, { "epoch": 17.5, "grad_norm": 1.740049123764038, "learning_rate": 2e-05, "loss": 0.05141984, "step": 8750 }, { "epoch": 17.502, "grad_norm": 1.1514123678207397, "learning_rate": 2e-05, "loss": 0.04636499, "step": 8751 }, { "epoch": 17.504, "grad_norm": 1.0444940328598022, "learning_rate": 2e-05, "loss": 0.03405978, "step": 8752 }, { "epoch": 17.506, "grad_norm": 1.5515344142913818, "learning_rate": 2e-05, "loss": 0.04465108, "step": 8753 }, { "epoch": 17.508, "grad_norm": 1.082181453704834, "learning_rate": 2e-05, "loss": 0.04043542, "step": 8754 }, { "epoch": 17.51, "grad_norm": 1.2333120107650757, "learning_rate": 2e-05, "loss": 0.0289123, "step": 8755 }, { "epoch": 17.512, "grad_norm": 1.195300579071045, "learning_rate": 2e-05, "loss": 0.02863741, "step": 8756 }, { "epoch": 17.514, "grad_norm": 1.0455743074417114, "learning_rate": 2e-05, "loss": 0.02629495, "step": 8757 }, { "epoch": 17.516, "grad_norm": 1.3960273265838623, "learning_rate": 2e-05, "loss": 0.03684978, "step": 8758 }, { "epoch": 17.518, "grad_norm": 2.1864404678344727, "learning_rate": 2e-05, "loss": 0.04845744, "step": 8759 }, { "epoch": 17.52, "grad_norm": 1.1524195671081543, "learning_rate": 2e-05, "loss": 0.03482993, "step": 8760 }, { "epoch": 17.522, "grad_norm": 1.7668960094451904, "learning_rate": 2e-05, "loss": 0.05991198, "step": 8761 }, { "epoch": 17.524, "grad_norm": 1.4282852411270142, "learning_rate": 2e-05, "loss": 0.0387131, "step": 8762 }, { "epoch": 17.526, "grad_norm": 1.9121938943862915, "learning_rate": 2e-05, "loss": 0.03121714, "step": 8763 }, { "epoch": 17.528, "grad_norm": 1.2554548978805542, "learning_rate": 2e-05, "loss": 0.03467404, "step": 8764 }, { "epoch": 17.53, "grad_norm": 0.8771201968193054, "learning_rate": 2e-05, "loss": 0.02793094, "step": 8765 }, { "epoch": 17.532, "grad_norm": 1.9788553714752197, "learning_rate": 2e-05, "loss": 0.05596177, "step": 8766 }, { "epoch": 17.534, "grad_norm": 1.5069613456726074, "learning_rate": 2e-05, "loss": 0.03567219, "step": 8767 }, { "epoch": 17.536, "grad_norm": 0.9605765342712402, "learning_rate": 2e-05, "loss": 0.02830346, "step": 8768 }, { "epoch": 17.538, "grad_norm": 1.8907548189163208, "learning_rate": 2e-05, "loss": 0.03865782, "step": 8769 }, { "epoch": 17.54, "grad_norm": 1.1795321702957153, "learning_rate": 2e-05, "loss": 0.04310453, "step": 8770 }, { "epoch": 17.542, "grad_norm": 1.2544761896133423, "learning_rate": 2e-05, "loss": 0.04492037, "step": 8771 }, { "epoch": 17.544, "grad_norm": 1.2026770114898682, "learning_rate": 2e-05, "loss": 0.04398282, "step": 8772 }, { "epoch": 17.546, "grad_norm": 1.1460901498794556, "learning_rate": 2e-05, "loss": 0.04888318, "step": 8773 }, { "epoch": 17.548000000000002, "grad_norm": 1.1545543670654297, "learning_rate": 2e-05, "loss": 0.03680679, "step": 8774 }, { "epoch": 17.55, "grad_norm": 4.126521110534668, "learning_rate": 2e-05, "loss": 0.04829864, "step": 8775 }, { "epoch": 17.552, "grad_norm": 1.9707434177398682, "learning_rate": 2e-05, "loss": 0.04758622, "step": 8776 }, { "epoch": 17.554, "grad_norm": 0.9309414029121399, "learning_rate": 2e-05, "loss": 0.02845479, "step": 8777 }, { "epoch": 17.556, "grad_norm": 1.9799511432647705, "learning_rate": 2e-05, "loss": 0.03359676, "step": 8778 }, { "epoch": 17.558, "grad_norm": 1.0151963233947754, "learning_rate": 2e-05, "loss": 0.0310269, "step": 8779 }, { "epoch": 17.56, "grad_norm": 1.2106175422668457, "learning_rate": 2e-05, "loss": 0.03649351, "step": 8780 }, { "epoch": 17.562, "grad_norm": 1.3251441717147827, "learning_rate": 2e-05, "loss": 0.03624501, "step": 8781 }, { "epoch": 17.564, "grad_norm": 1.4198929071426392, "learning_rate": 2e-05, "loss": 0.04427239, "step": 8782 }, { "epoch": 17.566, "grad_norm": 1.4967962503433228, "learning_rate": 2e-05, "loss": 0.04211658, "step": 8783 }, { "epoch": 17.568, "grad_norm": 1.3227828741073608, "learning_rate": 2e-05, "loss": 0.03693622, "step": 8784 }, { "epoch": 17.57, "grad_norm": 1.9066166877746582, "learning_rate": 2e-05, "loss": 0.04153109, "step": 8785 }, { "epoch": 17.572, "grad_norm": 2.053757667541504, "learning_rate": 2e-05, "loss": 0.04684748, "step": 8786 }, { "epoch": 17.574, "grad_norm": 1.594438076019287, "learning_rate": 2e-05, "loss": 0.04171894, "step": 8787 }, { "epoch": 17.576, "grad_norm": 2.138349771499634, "learning_rate": 2e-05, "loss": 0.04888215, "step": 8788 }, { "epoch": 17.578, "grad_norm": 3.804786205291748, "learning_rate": 2e-05, "loss": 0.04971538, "step": 8789 }, { "epoch": 17.58, "grad_norm": 1.1046136617660522, "learning_rate": 2e-05, "loss": 0.03527421, "step": 8790 }, { "epoch": 17.582, "grad_norm": 0.9375891089439392, "learning_rate": 2e-05, "loss": 0.03168319, "step": 8791 }, { "epoch": 17.584, "grad_norm": 1.4513614177703857, "learning_rate": 2e-05, "loss": 0.04399638, "step": 8792 }, { "epoch": 17.586, "grad_norm": 1.7088370323181152, "learning_rate": 2e-05, "loss": 0.03902084, "step": 8793 }, { "epoch": 17.588, "grad_norm": 1.3598990440368652, "learning_rate": 2e-05, "loss": 0.04764039, "step": 8794 }, { "epoch": 17.59, "grad_norm": 0.8974660634994507, "learning_rate": 2e-05, "loss": 0.03144586, "step": 8795 }, { "epoch": 17.592, "grad_norm": 2.354409694671631, "learning_rate": 2e-05, "loss": 0.03874226, "step": 8796 }, { "epoch": 17.594, "grad_norm": 1.3667627573013306, "learning_rate": 2e-05, "loss": 0.03704256, "step": 8797 }, { "epoch": 17.596, "grad_norm": 1.2642638683319092, "learning_rate": 2e-05, "loss": 0.03443506, "step": 8798 }, { "epoch": 17.598, "grad_norm": 1.09198796749115, "learning_rate": 2e-05, "loss": 0.03795373, "step": 8799 }, { "epoch": 17.6, "grad_norm": 1.4099838733673096, "learning_rate": 2e-05, "loss": 0.04986624, "step": 8800 }, { "epoch": 17.602, "grad_norm": 1.045273780822754, "learning_rate": 2e-05, "loss": 0.03775528, "step": 8801 }, { "epoch": 17.604, "grad_norm": 1.1584659814834595, "learning_rate": 2e-05, "loss": 0.03400667, "step": 8802 }, { "epoch": 17.606, "grad_norm": 1.334259271621704, "learning_rate": 2e-05, "loss": 0.04448001, "step": 8803 }, { "epoch": 17.608, "grad_norm": 0.9515101909637451, "learning_rate": 2e-05, "loss": 0.03244823, "step": 8804 }, { "epoch": 17.61, "grad_norm": 1.0947153568267822, "learning_rate": 2e-05, "loss": 0.03794955, "step": 8805 }, { "epoch": 17.612, "grad_norm": 1.1249397993087769, "learning_rate": 2e-05, "loss": 0.03956688, "step": 8806 }, { "epoch": 17.614, "grad_norm": 2.4383716583251953, "learning_rate": 2e-05, "loss": 0.05163242, "step": 8807 }, { "epoch": 17.616, "grad_norm": 1.0712907314300537, "learning_rate": 2e-05, "loss": 0.03782569, "step": 8808 }, { "epoch": 17.618, "grad_norm": 1.4813928604125977, "learning_rate": 2e-05, "loss": 0.0450106, "step": 8809 }, { "epoch": 17.62, "grad_norm": 1.138479232788086, "learning_rate": 2e-05, "loss": 0.04436382, "step": 8810 }, { "epoch": 17.622, "grad_norm": 1.0437335968017578, "learning_rate": 2e-05, "loss": 0.03661916, "step": 8811 }, { "epoch": 17.624, "grad_norm": 1.5815696716308594, "learning_rate": 2e-05, "loss": 0.03470255, "step": 8812 }, { "epoch": 17.626, "grad_norm": 1.6087177991867065, "learning_rate": 2e-05, "loss": 0.03476683, "step": 8813 }, { "epoch": 17.628, "grad_norm": 1.9084067344665527, "learning_rate": 2e-05, "loss": 0.07009329, "step": 8814 }, { "epoch": 17.63, "grad_norm": 1.6235365867614746, "learning_rate": 2e-05, "loss": 0.05514959, "step": 8815 }, { "epoch": 17.632, "grad_norm": 0.9894453883171082, "learning_rate": 2e-05, "loss": 0.0359512, "step": 8816 }, { "epoch": 17.634, "grad_norm": 1.0228773355484009, "learning_rate": 2e-05, "loss": 0.03386589, "step": 8817 }, { "epoch": 17.636, "grad_norm": 1.1252025365829468, "learning_rate": 2e-05, "loss": 0.03185301, "step": 8818 }, { "epoch": 17.638, "grad_norm": 0.8710758090019226, "learning_rate": 2e-05, "loss": 0.03058339, "step": 8819 }, { "epoch": 17.64, "grad_norm": 0.9895837306976318, "learning_rate": 2e-05, "loss": 0.02903359, "step": 8820 }, { "epoch": 17.642, "grad_norm": 0.8992276191711426, "learning_rate": 2e-05, "loss": 0.03188637, "step": 8821 }, { "epoch": 17.644, "grad_norm": 1.0285167694091797, "learning_rate": 2e-05, "loss": 0.03457221, "step": 8822 }, { "epoch": 17.646, "grad_norm": 1.0728398561477661, "learning_rate": 2e-05, "loss": 0.03096753, "step": 8823 }, { "epoch": 17.648, "grad_norm": 1.6772762537002563, "learning_rate": 2e-05, "loss": 0.03523809, "step": 8824 }, { "epoch": 17.65, "grad_norm": 1.2626025676727295, "learning_rate": 2e-05, "loss": 0.04584585, "step": 8825 }, { "epoch": 17.652, "grad_norm": 1.759260654449463, "learning_rate": 2e-05, "loss": 0.04807704, "step": 8826 }, { "epoch": 17.654, "grad_norm": 1.131996750831604, "learning_rate": 2e-05, "loss": 0.04056395, "step": 8827 }, { "epoch": 17.656, "grad_norm": 1.1082013845443726, "learning_rate": 2e-05, "loss": 0.0314077, "step": 8828 }, { "epoch": 17.658, "grad_norm": 1.1205099821090698, "learning_rate": 2e-05, "loss": 0.03793276, "step": 8829 }, { "epoch": 17.66, "grad_norm": 1.1743974685668945, "learning_rate": 2e-05, "loss": 0.03634968, "step": 8830 }, { "epoch": 17.662, "grad_norm": 1.6560696363449097, "learning_rate": 2e-05, "loss": 0.03286514, "step": 8831 }, { "epoch": 17.664, "grad_norm": 1.1040749549865723, "learning_rate": 2e-05, "loss": 0.04252566, "step": 8832 }, { "epoch": 17.666, "grad_norm": 1.7130646705627441, "learning_rate": 2e-05, "loss": 0.03491319, "step": 8833 }, { "epoch": 17.668, "grad_norm": 1.0113508701324463, "learning_rate": 2e-05, "loss": 0.03599076, "step": 8834 }, { "epoch": 17.67, "grad_norm": 1.4188059568405151, "learning_rate": 2e-05, "loss": 0.04455789, "step": 8835 }, { "epoch": 17.672, "grad_norm": 1.0483813285827637, "learning_rate": 2e-05, "loss": 0.04276256, "step": 8836 }, { "epoch": 17.674, "grad_norm": 1.285906434059143, "learning_rate": 2e-05, "loss": 0.03929163, "step": 8837 }, { "epoch": 17.676, "grad_norm": 1.7655030488967896, "learning_rate": 2e-05, "loss": 0.04695624, "step": 8838 }, { "epoch": 17.678, "grad_norm": 1.8398094177246094, "learning_rate": 2e-05, "loss": 0.03223392, "step": 8839 }, { "epoch": 17.68, "grad_norm": 2.002708673477173, "learning_rate": 2e-05, "loss": 0.04849662, "step": 8840 }, { "epoch": 17.682, "grad_norm": 1.5900839567184448, "learning_rate": 2e-05, "loss": 0.03409413, "step": 8841 }, { "epoch": 17.684, "grad_norm": 1.126819133758545, "learning_rate": 2e-05, "loss": 0.03573578, "step": 8842 }, { "epoch": 17.686, "grad_norm": 2.102517604827881, "learning_rate": 2e-05, "loss": 0.04867955, "step": 8843 }, { "epoch": 17.688, "grad_norm": 1.2388790845870972, "learning_rate": 2e-05, "loss": 0.04104718, "step": 8844 }, { "epoch": 17.69, "grad_norm": 1.0995584726333618, "learning_rate": 2e-05, "loss": 0.02717126, "step": 8845 }, { "epoch": 17.692, "grad_norm": 1.248007893562317, "learning_rate": 2e-05, "loss": 0.03687617, "step": 8846 }, { "epoch": 17.694, "grad_norm": 0.9979821443557739, "learning_rate": 2e-05, "loss": 0.03173301, "step": 8847 }, { "epoch": 17.696, "grad_norm": 0.8810282945632935, "learning_rate": 2e-05, "loss": 0.0286958, "step": 8848 }, { "epoch": 17.698, "grad_norm": 0.9130051136016846, "learning_rate": 2e-05, "loss": 0.03557264, "step": 8849 }, { "epoch": 17.7, "grad_norm": 1.8055018186569214, "learning_rate": 2e-05, "loss": 0.04309881, "step": 8850 }, { "epoch": 17.701999999999998, "grad_norm": 1.4664124250411987, "learning_rate": 2e-05, "loss": 0.03576339, "step": 8851 }, { "epoch": 17.704, "grad_norm": 1.6170819997787476, "learning_rate": 2e-05, "loss": 0.04012603, "step": 8852 }, { "epoch": 17.706, "grad_norm": 0.9175069332122803, "learning_rate": 2e-05, "loss": 0.02509233, "step": 8853 }, { "epoch": 17.708, "grad_norm": 1.5121434926986694, "learning_rate": 2e-05, "loss": 0.04627547, "step": 8854 }, { "epoch": 17.71, "grad_norm": 0.8749890923500061, "learning_rate": 2e-05, "loss": 0.02864091, "step": 8855 }, { "epoch": 17.712, "grad_norm": 1.1682401895523071, "learning_rate": 2e-05, "loss": 0.05001675, "step": 8856 }, { "epoch": 17.714, "grad_norm": 3.2054643630981445, "learning_rate": 2e-05, "loss": 0.05487347, "step": 8857 }, { "epoch": 17.716, "grad_norm": 0.9378389120101929, "learning_rate": 2e-05, "loss": 0.03289169, "step": 8858 }, { "epoch": 17.718, "grad_norm": 0.9760103225708008, "learning_rate": 2e-05, "loss": 0.03878334, "step": 8859 }, { "epoch": 17.72, "grad_norm": 0.9112187623977661, "learning_rate": 2e-05, "loss": 0.03134559, "step": 8860 }, { "epoch": 17.722, "grad_norm": 1.4359509944915771, "learning_rate": 2e-05, "loss": 0.03744188, "step": 8861 }, { "epoch": 17.724, "grad_norm": 1.3996148109436035, "learning_rate": 2e-05, "loss": 0.0524065, "step": 8862 }, { "epoch": 17.726, "grad_norm": 2.3694539070129395, "learning_rate": 2e-05, "loss": 0.04601893, "step": 8863 }, { "epoch": 17.728, "grad_norm": 1.5944074392318726, "learning_rate": 2e-05, "loss": 0.04030184, "step": 8864 }, { "epoch": 17.73, "grad_norm": 1.122509241104126, "learning_rate": 2e-05, "loss": 0.04312155, "step": 8865 }, { "epoch": 17.732, "grad_norm": 0.9193485975265503, "learning_rate": 2e-05, "loss": 0.02679619, "step": 8866 }, { "epoch": 17.734, "grad_norm": 0.9387381076812744, "learning_rate": 2e-05, "loss": 0.03091367, "step": 8867 }, { "epoch": 17.736, "grad_norm": 1.2607978582382202, "learning_rate": 2e-05, "loss": 0.05118863, "step": 8868 }, { "epoch": 17.738, "grad_norm": 0.9573045969009399, "learning_rate": 2e-05, "loss": 0.03779061, "step": 8869 }, { "epoch": 17.74, "grad_norm": 0.6224287748336792, "learning_rate": 2e-05, "loss": 0.01465039, "step": 8870 }, { "epoch": 17.742, "grad_norm": 0.8895289897918701, "learning_rate": 2e-05, "loss": 0.02783668, "step": 8871 }, { "epoch": 17.744, "grad_norm": 1.273004174232483, "learning_rate": 2e-05, "loss": 0.0393869, "step": 8872 }, { "epoch": 17.746, "grad_norm": 1.316355586051941, "learning_rate": 2e-05, "loss": 0.03810573, "step": 8873 }, { "epoch": 17.748, "grad_norm": 1.432334542274475, "learning_rate": 2e-05, "loss": 0.03763995, "step": 8874 }, { "epoch": 17.75, "grad_norm": 1.5677868127822876, "learning_rate": 2e-05, "loss": 0.03377351, "step": 8875 }, { "epoch": 17.752, "grad_norm": 1.0479463338851929, "learning_rate": 2e-05, "loss": 0.03200875, "step": 8876 }, { "epoch": 17.754, "grad_norm": 1.2957429885864258, "learning_rate": 2e-05, "loss": 0.04381375, "step": 8877 }, { "epoch": 17.756, "grad_norm": 1.3754754066467285, "learning_rate": 2e-05, "loss": 0.03435459, "step": 8878 }, { "epoch": 17.758, "grad_norm": 1.1922913789749146, "learning_rate": 2e-05, "loss": 0.04529712, "step": 8879 }, { "epoch": 17.76, "grad_norm": 1.199591875076294, "learning_rate": 2e-05, "loss": 0.0458049, "step": 8880 }, { "epoch": 17.762, "grad_norm": 0.9154902696609497, "learning_rate": 2e-05, "loss": 0.02930012, "step": 8881 }, { "epoch": 17.764, "grad_norm": 1.2725895643234253, "learning_rate": 2e-05, "loss": 0.03426202, "step": 8882 }, { "epoch": 17.766, "grad_norm": 0.9326797127723694, "learning_rate": 2e-05, "loss": 0.03297822, "step": 8883 }, { "epoch": 17.768, "grad_norm": 0.8994696736335754, "learning_rate": 2e-05, "loss": 0.02493419, "step": 8884 }, { "epoch": 17.77, "grad_norm": 2.138491153717041, "learning_rate": 2e-05, "loss": 0.05079397, "step": 8885 }, { "epoch": 17.772, "grad_norm": 1.1900322437286377, "learning_rate": 2e-05, "loss": 0.02941693, "step": 8886 }, { "epoch": 17.774, "grad_norm": 1.9832277297973633, "learning_rate": 2e-05, "loss": 0.04463857, "step": 8887 }, { "epoch": 17.776, "grad_norm": 1.9642775058746338, "learning_rate": 2e-05, "loss": 0.03731795, "step": 8888 }, { "epoch": 17.778, "grad_norm": 0.9470435380935669, "learning_rate": 2e-05, "loss": 0.04286867, "step": 8889 }, { "epoch": 17.78, "grad_norm": 1.3439148664474487, "learning_rate": 2e-05, "loss": 0.03530122, "step": 8890 }, { "epoch": 17.782, "grad_norm": 1.1060848236083984, "learning_rate": 2e-05, "loss": 0.03386058, "step": 8891 }, { "epoch": 17.784, "grad_norm": 1.1476891040802002, "learning_rate": 2e-05, "loss": 0.02871957, "step": 8892 }, { "epoch": 17.786, "grad_norm": 1.2838307619094849, "learning_rate": 2e-05, "loss": 0.04055165, "step": 8893 }, { "epoch": 17.788, "grad_norm": 1.1484177112579346, "learning_rate": 2e-05, "loss": 0.04514886, "step": 8894 }, { "epoch": 17.79, "grad_norm": 0.9906123280525208, "learning_rate": 2e-05, "loss": 0.02874031, "step": 8895 }, { "epoch": 17.792, "grad_norm": 1.6335127353668213, "learning_rate": 2e-05, "loss": 0.03589419, "step": 8896 }, { "epoch": 17.794, "grad_norm": 1.1815122365951538, "learning_rate": 2e-05, "loss": 0.03519525, "step": 8897 }, { "epoch": 17.796, "grad_norm": 1.1306949853897095, "learning_rate": 2e-05, "loss": 0.0422332, "step": 8898 }, { "epoch": 17.798000000000002, "grad_norm": 1.1811727285385132, "learning_rate": 2e-05, "loss": 0.0236742, "step": 8899 }, { "epoch": 17.8, "grad_norm": 1.1139687299728394, "learning_rate": 2e-05, "loss": 0.03131263, "step": 8900 }, { "epoch": 17.802, "grad_norm": 1.0541051626205444, "learning_rate": 2e-05, "loss": 0.03613989, "step": 8901 }, { "epoch": 17.804, "grad_norm": 4.176294326782227, "learning_rate": 2e-05, "loss": 0.05417047, "step": 8902 }, { "epoch": 17.806, "grad_norm": 6.361077785491943, "learning_rate": 2e-05, "loss": 0.03812846, "step": 8903 }, { "epoch": 17.808, "grad_norm": 1.0918827056884766, "learning_rate": 2e-05, "loss": 0.03272487, "step": 8904 }, { "epoch": 17.81, "grad_norm": 1.6112608909606934, "learning_rate": 2e-05, "loss": 0.03804348, "step": 8905 }, { "epoch": 17.812, "grad_norm": 2.0723252296447754, "learning_rate": 2e-05, "loss": 0.06864107, "step": 8906 }, { "epoch": 17.814, "grad_norm": 1.529592752456665, "learning_rate": 2e-05, "loss": 0.04428712, "step": 8907 }, { "epoch": 17.816, "grad_norm": 1.1639082431793213, "learning_rate": 2e-05, "loss": 0.03550513, "step": 8908 }, { "epoch": 17.818, "grad_norm": 1.0168837308883667, "learning_rate": 2e-05, "loss": 0.03686506, "step": 8909 }, { "epoch": 17.82, "grad_norm": 2.2372448444366455, "learning_rate": 2e-05, "loss": 0.03935503, "step": 8910 }, { "epoch": 17.822, "grad_norm": 1.1555935144424438, "learning_rate": 2e-05, "loss": 0.03554429, "step": 8911 }, { "epoch": 17.824, "grad_norm": 1.2676202058792114, "learning_rate": 2e-05, "loss": 0.03534994, "step": 8912 }, { "epoch": 17.826, "grad_norm": 0.994365394115448, "learning_rate": 2e-05, "loss": 0.03948274, "step": 8913 }, { "epoch": 17.828, "grad_norm": 1.0153899192810059, "learning_rate": 2e-05, "loss": 0.04318513, "step": 8914 }, { "epoch": 17.83, "grad_norm": 1.1899369955062866, "learning_rate": 2e-05, "loss": 0.03803097, "step": 8915 }, { "epoch": 17.832, "grad_norm": 1.1281800270080566, "learning_rate": 2e-05, "loss": 0.04341338, "step": 8916 }, { "epoch": 17.834, "grad_norm": 1.215263843536377, "learning_rate": 2e-05, "loss": 0.03343781, "step": 8917 }, { "epoch": 17.836, "grad_norm": 1.0481007099151611, "learning_rate": 2e-05, "loss": 0.03632434, "step": 8918 }, { "epoch": 17.838, "grad_norm": 1.002598524093628, "learning_rate": 2e-05, "loss": 0.03273392, "step": 8919 }, { "epoch": 17.84, "grad_norm": 1.5318379402160645, "learning_rate": 2e-05, "loss": 0.0415705, "step": 8920 }, { "epoch": 17.842, "grad_norm": 1.0889846086502075, "learning_rate": 2e-05, "loss": 0.05010916, "step": 8921 }, { "epoch": 17.844, "grad_norm": 1.3431620597839355, "learning_rate": 2e-05, "loss": 0.0383195, "step": 8922 }, { "epoch": 17.846, "grad_norm": 1.0937150716781616, "learning_rate": 2e-05, "loss": 0.03587068, "step": 8923 }, { "epoch": 17.848, "grad_norm": 1.370288610458374, "learning_rate": 2e-05, "loss": 0.03967947, "step": 8924 }, { "epoch": 17.85, "grad_norm": 1.174694299697876, "learning_rate": 2e-05, "loss": 0.03189139, "step": 8925 }, { "epoch": 17.852, "grad_norm": 1.2493538856506348, "learning_rate": 2e-05, "loss": 0.04369488, "step": 8926 }, { "epoch": 17.854, "grad_norm": 0.869674563407898, "learning_rate": 2e-05, "loss": 0.02739144, "step": 8927 }, { "epoch": 17.856, "grad_norm": 1.0925018787384033, "learning_rate": 2e-05, "loss": 0.02988427, "step": 8928 }, { "epoch": 17.858, "grad_norm": 1.8499395847320557, "learning_rate": 2e-05, "loss": 0.04179956, "step": 8929 }, { "epoch": 17.86, "grad_norm": 1.565024971961975, "learning_rate": 2e-05, "loss": 0.04363959, "step": 8930 }, { "epoch": 17.862, "grad_norm": 2.0215606689453125, "learning_rate": 2e-05, "loss": 0.06115989, "step": 8931 }, { "epoch": 17.864, "grad_norm": 1.428415060043335, "learning_rate": 2e-05, "loss": 0.03929399, "step": 8932 }, { "epoch": 17.866, "grad_norm": 1.633080244064331, "learning_rate": 2e-05, "loss": 0.0476169, "step": 8933 }, { "epoch": 17.868, "grad_norm": 0.9545271992683411, "learning_rate": 2e-05, "loss": 0.03109122, "step": 8934 }, { "epoch": 17.87, "grad_norm": 0.9013693332672119, "learning_rate": 2e-05, "loss": 0.02906408, "step": 8935 }, { "epoch": 17.872, "grad_norm": 1.48403799533844, "learning_rate": 2e-05, "loss": 0.03733386, "step": 8936 }, { "epoch": 17.874, "grad_norm": 1.052964448928833, "learning_rate": 2e-05, "loss": 0.02890426, "step": 8937 }, { "epoch": 17.876, "grad_norm": 1.0533002614974976, "learning_rate": 2e-05, "loss": 0.03821634, "step": 8938 }, { "epoch": 17.878, "grad_norm": 1.5066550970077515, "learning_rate": 2e-05, "loss": 0.03312371, "step": 8939 }, { "epoch": 17.88, "grad_norm": 1.573814868927002, "learning_rate": 2e-05, "loss": 0.03799012, "step": 8940 }, { "epoch": 17.882, "grad_norm": 1.0559918880462646, "learning_rate": 2e-05, "loss": 0.03499437, "step": 8941 }, { "epoch": 17.884, "grad_norm": 1.9235373735427856, "learning_rate": 2e-05, "loss": 0.0413806, "step": 8942 }, { "epoch": 17.886, "grad_norm": 1.5015840530395508, "learning_rate": 2e-05, "loss": 0.04995912, "step": 8943 }, { "epoch": 17.888, "grad_norm": 1.776807188987732, "learning_rate": 2e-05, "loss": 0.05956412, "step": 8944 }, { "epoch": 17.89, "grad_norm": 1.410510540008545, "learning_rate": 2e-05, "loss": 0.03684714, "step": 8945 }, { "epoch": 17.892, "grad_norm": 1.7156952619552612, "learning_rate": 2e-05, "loss": 0.04199193, "step": 8946 }, { "epoch": 17.894, "grad_norm": 1.0581722259521484, "learning_rate": 2e-05, "loss": 0.04411618, "step": 8947 }, { "epoch": 17.896, "grad_norm": 0.9372844696044922, "learning_rate": 2e-05, "loss": 0.03011969, "step": 8948 }, { "epoch": 17.898, "grad_norm": 1.1055439710617065, "learning_rate": 2e-05, "loss": 0.03142232, "step": 8949 }, { "epoch": 17.9, "grad_norm": 1.7129147052764893, "learning_rate": 2e-05, "loss": 0.05224614, "step": 8950 }, { "epoch": 17.902, "grad_norm": 1.1259431838989258, "learning_rate": 2e-05, "loss": 0.02720915, "step": 8951 }, { "epoch": 17.904, "grad_norm": 1.270009160041809, "learning_rate": 2e-05, "loss": 0.03761644, "step": 8952 }, { "epoch": 17.906, "grad_norm": 2.4524595737457275, "learning_rate": 2e-05, "loss": 0.07125869, "step": 8953 }, { "epoch": 17.908, "grad_norm": 1.4844930171966553, "learning_rate": 2e-05, "loss": 0.04264005, "step": 8954 }, { "epoch": 17.91, "grad_norm": 1.1660057306289673, "learning_rate": 2e-05, "loss": 0.03573887, "step": 8955 }, { "epoch": 17.912, "grad_norm": 1.0840332508087158, "learning_rate": 2e-05, "loss": 0.03188574, "step": 8956 }, { "epoch": 17.914, "grad_norm": 1.0970531702041626, "learning_rate": 2e-05, "loss": 0.03758013, "step": 8957 }, { "epoch": 17.916, "grad_norm": 1.4083123207092285, "learning_rate": 2e-05, "loss": 0.05492579, "step": 8958 }, { "epoch": 17.918, "grad_norm": 0.977988600730896, "learning_rate": 2e-05, "loss": 0.04775369, "step": 8959 }, { "epoch": 17.92, "grad_norm": 1.1078952550888062, "learning_rate": 2e-05, "loss": 0.0334915, "step": 8960 }, { "epoch": 17.922, "grad_norm": 1.5872620344161987, "learning_rate": 2e-05, "loss": 0.04199402, "step": 8961 }, { "epoch": 17.924, "grad_norm": 1.0953086614608765, "learning_rate": 2e-05, "loss": 0.03520524, "step": 8962 }, { "epoch": 17.926, "grad_norm": 1.360888123512268, "learning_rate": 2e-05, "loss": 0.03609081, "step": 8963 }, { "epoch": 17.928, "grad_norm": 1.6418564319610596, "learning_rate": 2e-05, "loss": 0.05257796, "step": 8964 }, { "epoch": 17.93, "grad_norm": 1.8100069761276245, "learning_rate": 2e-05, "loss": 0.04600957, "step": 8965 }, { "epoch": 17.932, "grad_norm": 1.3393256664276123, "learning_rate": 2e-05, "loss": 0.04731317, "step": 8966 }, { "epoch": 17.934, "grad_norm": 1.3219692707061768, "learning_rate": 2e-05, "loss": 0.03483067, "step": 8967 }, { "epoch": 17.936, "grad_norm": 1.0681779384613037, "learning_rate": 2e-05, "loss": 0.03454648, "step": 8968 }, { "epoch": 17.938, "grad_norm": 1.652298092842102, "learning_rate": 2e-05, "loss": 0.04448248, "step": 8969 }, { "epoch": 17.94, "grad_norm": 0.922049343585968, "learning_rate": 2e-05, "loss": 0.03188713, "step": 8970 }, { "epoch": 17.942, "grad_norm": 1.0262778997421265, "learning_rate": 2e-05, "loss": 0.0290083, "step": 8971 }, { "epoch": 17.944, "grad_norm": 1.3358492851257324, "learning_rate": 2e-05, "loss": 0.03838519, "step": 8972 }, { "epoch": 17.946, "grad_norm": 1.0291062593460083, "learning_rate": 2e-05, "loss": 0.04672035, "step": 8973 }, { "epoch": 17.948, "grad_norm": 2.5560879707336426, "learning_rate": 2e-05, "loss": 0.05149681, "step": 8974 }, { "epoch": 17.95, "grad_norm": 1.6139315366744995, "learning_rate": 2e-05, "loss": 0.04165, "step": 8975 }, { "epoch": 17.951999999999998, "grad_norm": 6.4694671630859375, "learning_rate": 2e-05, "loss": 0.04160897, "step": 8976 }, { "epoch": 17.954, "grad_norm": 1.6222550868988037, "learning_rate": 2e-05, "loss": 0.04970656, "step": 8977 }, { "epoch": 17.956, "grad_norm": 2.5975213050842285, "learning_rate": 2e-05, "loss": 0.06618728, "step": 8978 }, { "epoch": 17.958, "grad_norm": 1.73178231716156, "learning_rate": 2e-05, "loss": 0.05411918, "step": 8979 }, { "epoch": 17.96, "grad_norm": 1.350254774093628, "learning_rate": 2e-05, "loss": 0.04233923, "step": 8980 }, { "epoch": 17.962, "grad_norm": 1.6815485954284668, "learning_rate": 2e-05, "loss": 0.03182229, "step": 8981 }, { "epoch": 17.964, "grad_norm": 1.3329178094863892, "learning_rate": 2e-05, "loss": 0.04567986, "step": 8982 }, { "epoch": 17.966, "grad_norm": 1.5888012647628784, "learning_rate": 2e-05, "loss": 0.03676793, "step": 8983 }, { "epoch": 17.968, "grad_norm": 1.7541040182113647, "learning_rate": 2e-05, "loss": 0.04250543, "step": 8984 }, { "epoch": 17.97, "grad_norm": 0.9295905828475952, "learning_rate": 2e-05, "loss": 0.02375791, "step": 8985 }, { "epoch": 17.972, "grad_norm": 1.495064616203308, "learning_rate": 2e-05, "loss": 0.05015454, "step": 8986 }, { "epoch": 17.974, "grad_norm": 1.4035049676895142, "learning_rate": 2e-05, "loss": 0.04667794, "step": 8987 }, { "epoch": 17.976, "grad_norm": 1.0482661724090576, "learning_rate": 2e-05, "loss": 0.03988696, "step": 8988 }, { "epoch": 17.978, "grad_norm": 1.0965052843093872, "learning_rate": 2e-05, "loss": 0.03282965, "step": 8989 }, { "epoch": 17.98, "grad_norm": 1.3380331993103027, "learning_rate": 2e-05, "loss": 0.03774549, "step": 8990 }, { "epoch": 17.982, "grad_norm": 1.6290559768676758, "learning_rate": 2e-05, "loss": 0.0511911, "step": 8991 }, { "epoch": 17.984, "grad_norm": 1.6857942342758179, "learning_rate": 2e-05, "loss": 0.04408575, "step": 8992 }, { "epoch": 17.986, "grad_norm": 1.830045223236084, "learning_rate": 2e-05, "loss": 0.04361068, "step": 8993 }, { "epoch": 17.988, "grad_norm": 0.9048553705215454, "learning_rate": 2e-05, "loss": 0.02919978, "step": 8994 }, { "epoch": 17.99, "grad_norm": 0.9750759601593018, "learning_rate": 2e-05, "loss": 0.0303827, "step": 8995 }, { "epoch": 17.992, "grad_norm": 1.0447287559509277, "learning_rate": 2e-05, "loss": 0.03247008, "step": 8996 }, { "epoch": 17.994, "grad_norm": 0.956243634223938, "learning_rate": 2e-05, "loss": 0.03645263, "step": 8997 }, { "epoch": 17.996, "grad_norm": 1.3518587350845337, "learning_rate": 2e-05, "loss": 0.03873083, "step": 8998 }, { "epoch": 17.998, "grad_norm": 1.2905421257019043, "learning_rate": 2e-05, "loss": 0.03926174, "step": 8999 }, { "epoch": 18.0, "grad_norm": 1.2127301692962646, "learning_rate": 2e-05, "loss": 0.03626023, "step": 9000 }, { "epoch": 18.0, "eval_performance": { "AngleClassification_1": 0.99, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.986, "Equal_2": 0.9640718562874252, "Equal_3": 0.9620758483033932, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.974, "Perpendicular_1": 0.998, "Perpendicular_2": 0.98, "Perpendicular_3": 0.7785571142284569, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9867999999999999, "PointLiesOnLine_1": 0.9879759519038076, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9720558882235529 }, "eval_runtime": 228.009, "eval_samples_per_second": 46.051, "eval_steps_per_second": 0.921, "step": 9000 }, { "epoch": 18.002, "grad_norm": 1.231685996055603, "learning_rate": 2e-05, "loss": 0.04187885, "step": 9001 }, { "epoch": 18.004, "grad_norm": 1.3986369371414185, "learning_rate": 2e-05, "loss": 0.04639814, "step": 9002 }, { "epoch": 18.006, "grad_norm": 1.5632754564285278, "learning_rate": 2e-05, "loss": 0.04010525, "step": 9003 }, { "epoch": 18.008, "grad_norm": 1.2588595151901245, "learning_rate": 2e-05, "loss": 0.03489036, "step": 9004 }, { "epoch": 18.01, "grad_norm": 1.3520110845565796, "learning_rate": 2e-05, "loss": 0.04751738, "step": 9005 }, { "epoch": 18.012, "grad_norm": 2.3300204277038574, "learning_rate": 2e-05, "loss": 0.0426918, "step": 9006 }, { "epoch": 18.014, "grad_norm": 2.744277000427246, "learning_rate": 2e-05, "loss": 0.04523922, "step": 9007 }, { "epoch": 18.016, "grad_norm": 2.2508177757263184, "learning_rate": 2e-05, "loss": 0.04182559, "step": 9008 }, { "epoch": 18.018, "grad_norm": 0.9154175519943237, "learning_rate": 2e-05, "loss": 0.03380223, "step": 9009 }, { "epoch": 18.02, "grad_norm": 1.9386759996414185, "learning_rate": 2e-05, "loss": 0.04360243, "step": 9010 }, { "epoch": 18.022, "grad_norm": 0.991741955280304, "learning_rate": 2e-05, "loss": 0.03203537, "step": 9011 }, { "epoch": 18.024, "grad_norm": 0.8281901478767395, "learning_rate": 2e-05, "loss": 0.02432592, "step": 9012 }, { "epoch": 18.026, "grad_norm": 1.4991620779037476, "learning_rate": 2e-05, "loss": 0.03889698, "step": 9013 }, { "epoch": 18.028, "grad_norm": 1.2863342761993408, "learning_rate": 2e-05, "loss": 0.03137337, "step": 9014 }, { "epoch": 18.03, "grad_norm": 1.2053855657577515, "learning_rate": 2e-05, "loss": 0.04222876, "step": 9015 }, { "epoch": 18.032, "grad_norm": 1.5866951942443848, "learning_rate": 2e-05, "loss": 0.03733943, "step": 9016 }, { "epoch": 18.034, "grad_norm": 1.1909058094024658, "learning_rate": 2e-05, "loss": 0.03765493, "step": 9017 }, { "epoch": 18.036, "grad_norm": 1.2826887369155884, "learning_rate": 2e-05, "loss": 0.04001563, "step": 9018 }, { "epoch": 18.038, "grad_norm": 1.197710394859314, "learning_rate": 2e-05, "loss": 0.04139885, "step": 9019 }, { "epoch": 18.04, "grad_norm": 1.2452356815338135, "learning_rate": 2e-05, "loss": 0.04198163, "step": 9020 }, { "epoch": 18.042, "grad_norm": 2.8156187534332275, "learning_rate": 2e-05, "loss": 0.05130291, "step": 9021 }, { "epoch": 18.044, "grad_norm": 2.5071191787719727, "learning_rate": 2e-05, "loss": 0.05090611, "step": 9022 }, { "epoch": 18.046, "grad_norm": 1.4233297109603882, "learning_rate": 2e-05, "loss": 0.04434834, "step": 9023 }, { "epoch": 18.048, "grad_norm": 1.2306054830551147, "learning_rate": 2e-05, "loss": 0.03618914, "step": 9024 }, { "epoch": 18.05, "grad_norm": 0.9205886721611023, "learning_rate": 2e-05, "loss": 0.03187226, "step": 9025 }, { "epoch": 18.052, "grad_norm": 0.9290618300437927, "learning_rate": 2e-05, "loss": 0.03350983, "step": 9026 }, { "epoch": 18.054, "grad_norm": 1.254515290260315, "learning_rate": 2e-05, "loss": 0.04691765, "step": 9027 }, { "epoch": 18.056, "grad_norm": 1.5553855895996094, "learning_rate": 2e-05, "loss": 0.03906859, "step": 9028 }, { "epoch": 18.058, "grad_norm": 0.97332364320755, "learning_rate": 2e-05, "loss": 0.03339428, "step": 9029 }, { "epoch": 18.06, "grad_norm": 1.1257230043411255, "learning_rate": 2e-05, "loss": 0.03837292, "step": 9030 }, { "epoch": 18.062, "grad_norm": 2.0017855167388916, "learning_rate": 2e-05, "loss": 0.06406546, "step": 9031 }, { "epoch": 18.064, "grad_norm": 1.08551824092865, "learning_rate": 2e-05, "loss": 0.04137385, "step": 9032 }, { "epoch": 18.066, "grad_norm": 1.4952075481414795, "learning_rate": 2e-05, "loss": 0.02714139, "step": 9033 }, { "epoch": 18.068, "grad_norm": 2.379415273666382, "learning_rate": 2e-05, "loss": 0.06173275, "step": 9034 }, { "epoch": 18.07, "grad_norm": 4.081813335418701, "learning_rate": 2e-05, "loss": 0.04542533, "step": 9035 }, { "epoch": 18.072, "grad_norm": 4.134639263153076, "learning_rate": 2e-05, "loss": 0.03946038, "step": 9036 }, { "epoch": 18.074, "grad_norm": 2.83439302444458, "learning_rate": 2e-05, "loss": 0.05039775, "step": 9037 }, { "epoch": 18.076, "grad_norm": 0.9736847877502441, "learning_rate": 2e-05, "loss": 0.03757753, "step": 9038 }, { "epoch": 18.078, "grad_norm": 1.2713407278060913, "learning_rate": 2e-05, "loss": 0.04058048, "step": 9039 }, { "epoch": 18.08, "grad_norm": 0.9621219038963318, "learning_rate": 2e-05, "loss": 0.0348967, "step": 9040 }, { "epoch": 18.082, "grad_norm": 1.281557321548462, "learning_rate": 2e-05, "loss": 0.04492697, "step": 9041 }, { "epoch": 18.084, "grad_norm": 0.9458388686180115, "learning_rate": 2e-05, "loss": 0.02952169, "step": 9042 }, { "epoch": 18.086, "grad_norm": 1.5327826738357544, "learning_rate": 2e-05, "loss": 0.04477659, "step": 9043 }, { "epoch": 18.088, "grad_norm": 1.005460500717163, "learning_rate": 2e-05, "loss": 0.03337438, "step": 9044 }, { "epoch": 18.09, "grad_norm": 1.441056728363037, "learning_rate": 2e-05, "loss": 0.03924822, "step": 9045 }, { "epoch": 18.092, "grad_norm": 1.4437836408615112, "learning_rate": 2e-05, "loss": 0.0402789, "step": 9046 }, { "epoch": 18.094, "grad_norm": 1.009237289428711, "learning_rate": 2e-05, "loss": 0.03744453, "step": 9047 }, { "epoch": 18.096, "grad_norm": 1.128477692604065, "learning_rate": 2e-05, "loss": 0.02704899, "step": 9048 }, { "epoch": 18.098, "grad_norm": 1.3519699573516846, "learning_rate": 2e-05, "loss": 0.04252512, "step": 9049 }, { "epoch": 18.1, "grad_norm": 1.2263469696044922, "learning_rate": 2e-05, "loss": 0.05155253, "step": 9050 }, { "epoch": 18.102, "grad_norm": 1.0096732378005981, "learning_rate": 2e-05, "loss": 0.03330822, "step": 9051 }, { "epoch": 18.104, "grad_norm": 0.9080517888069153, "learning_rate": 2e-05, "loss": 0.02538734, "step": 9052 }, { "epoch": 18.106, "grad_norm": 0.9808663725852966, "learning_rate": 2e-05, "loss": 0.03376862, "step": 9053 }, { "epoch": 18.108, "grad_norm": 1.1579385995864868, "learning_rate": 2e-05, "loss": 0.04645274, "step": 9054 }, { "epoch": 18.11, "grad_norm": 0.9846909046173096, "learning_rate": 2e-05, "loss": 0.03212841, "step": 9055 }, { "epoch": 18.112, "grad_norm": 1.7504842281341553, "learning_rate": 2e-05, "loss": 0.03778321, "step": 9056 }, { "epoch": 18.114, "grad_norm": 0.9329354763031006, "learning_rate": 2e-05, "loss": 0.03925298, "step": 9057 }, { "epoch": 18.116, "grad_norm": 1.1019492149353027, "learning_rate": 2e-05, "loss": 0.04632554, "step": 9058 }, { "epoch": 18.118, "grad_norm": 1.338378667831421, "learning_rate": 2e-05, "loss": 0.0489829, "step": 9059 }, { "epoch": 18.12, "grad_norm": 2.2307145595550537, "learning_rate": 2e-05, "loss": 0.05171408, "step": 9060 }, { "epoch": 18.122, "grad_norm": 0.8201557993888855, "learning_rate": 2e-05, "loss": 0.02635722, "step": 9061 }, { "epoch": 18.124, "grad_norm": 1.2300578355789185, "learning_rate": 2e-05, "loss": 0.04092699, "step": 9062 }, { "epoch": 18.126, "grad_norm": 1.0208004713058472, "learning_rate": 2e-05, "loss": 0.03296494, "step": 9063 }, { "epoch": 18.128, "grad_norm": 2.13179612159729, "learning_rate": 2e-05, "loss": 0.03377965, "step": 9064 }, { "epoch": 18.13, "grad_norm": 1.694725513458252, "learning_rate": 2e-05, "loss": 0.0344084, "step": 9065 }, { "epoch": 18.132, "grad_norm": 0.876532256603241, "learning_rate": 2e-05, "loss": 0.03326464, "step": 9066 }, { "epoch": 18.134, "grad_norm": 1.6769760847091675, "learning_rate": 2e-05, "loss": 0.04691413, "step": 9067 }, { "epoch": 18.136, "grad_norm": 1.3153525590896606, "learning_rate": 2e-05, "loss": 0.0393918, "step": 9068 }, { "epoch": 18.138, "grad_norm": 1.3879621028900146, "learning_rate": 2e-05, "loss": 0.05645397, "step": 9069 }, { "epoch": 18.14, "grad_norm": 1.1712987422943115, "learning_rate": 2e-05, "loss": 0.0332922, "step": 9070 }, { "epoch": 18.142, "grad_norm": 1.4377306699752808, "learning_rate": 2e-05, "loss": 0.0378057, "step": 9071 }, { "epoch": 18.144, "grad_norm": 1.4129458665847778, "learning_rate": 2e-05, "loss": 0.04543114, "step": 9072 }, { "epoch": 18.146, "grad_norm": 1.4906466007232666, "learning_rate": 2e-05, "loss": 0.04048578, "step": 9073 }, { "epoch": 18.148, "grad_norm": 2.6621620655059814, "learning_rate": 2e-05, "loss": 0.0373352, "step": 9074 }, { "epoch": 18.15, "grad_norm": 1.7208998203277588, "learning_rate": 2e-05, "loss": 0.03141182, "step": 9075 }, { "epoch": 18.152, "grad_norm": 0.9720871448516846, "learning_rate": 2e-05, "loss": 0.02910824, "step": 9076 }, { "epoch": 18.154, "grad_norm": 0.9811252355575562, "learning_rate": 2e-05, "loss": 0.02743096, "step": 9077 }, { "epoch": 18.156, "grad_norm": 1.1748898029327393, "learning_rate": 2e-05, "loss": 0.05181575, "step": 9078 }, { "epoch": 18.158, "grad_norm": 1.0329571962356567, "learning_rate": 2e-05, "loss": 0.0312444, "step": 9079 }, { "epoch": 18.16, "grad_norm": 0.837674081325531, "learning_rate": 2e-05, "loss": 0.03148135, "step": 9080 }, { "epoch": 18.162, "grad_norm": 0.8234748840332031, "learning_rate": 2e-05, "loss": 0.02464331, "step": 9081 }, { "epoch": 18.164, "grad_norm": 1.1602654457092285, "learning_rate": 2e-05, "loss": 0.03699915, "step": 9082 }, { "epoch": 18.166, "grad_norm": 1.1784570217132568, "learning_rate": 2e-05, "loss": 0.04048996, "step": 9083 }, { "epoch": 18.168, "grad_norm": 0.9544180631637573, "learning_rate": 2e-05, "loss": 0.04346775, "step": 9084 }, { "epoch": 18.17, "grad_norm": 1.2257094383239746, "learning_rate": 2e-05, "loss": 0.04198171, "step": 9085 }, { "epoch": 18.172, "grad_norm": 1.140378475189209, "learning_rate": 2e-05, "loss": 0.03696894, "step": 9086 }, { "epoch": 18.174, "grad_norm": 1.2596681118011475, "learning_rate": 2e-05, "loss": 0.04216701, "step": 9087 }, { "epoch": 18.176, "grad_norm": 0.9000008702278137, "learning_rate": 2e-05, "loss": 0.03101258, "step": 9088 }, { "epoch": 18.178, "grad_norm": 2.3488030433654785, "learning_rate": 2e-05, "loss": 0.04373006, "step": 9089 }, { "epoch": 18.18, "grad_norm": 1.4289541244506836, "learning_rate": 2e-05, "loss": 0.04205768, "step": 9090 }, { "epoch": 18.182, "grad_norm": 1.566737413406372, "learning_rate": 2e-05, "loss": 0.03449672, "step": 9091 }, { "epoch": 18.184, "grad_norm": 1.769487977027893, "learning_rate": 2e-05, "loss": 0.04758864, "step": 9092 }, { "epoch": 18.186, "grad_norm": 2.658430576324463, "learning_rate": 2e-05, "loss": 0.05280557, "step": 9093 }, { "epoch": 18.188, "grad_norm": 1.109715461730957, "learning_rate": 2e-05, "loss": 0.03698013, "step": 9094 }, { "epoch": 18.19, "grad_norm": 0.7642002701759338, "learning_rate": 2e-05, "loss": 0.02297545, "step": 9095 }, { "epoch": 18.192, "grad_norm": 1.0916920900344849, "learning_rate": 2e-05, "loss": 0.03552222, "step": 9096 }, { "epoch": 18.194, "grad_norm": 1.0630213022232056, "learning_rate": 2e-05, "loss": 0.04369684, "step": 9097 }, { "epoch": 18.196, "grad_norm": 2.2464945316314697, "learning_rate": 2e-05, "loss": 0.05468228, "step": 9098 }, { "epoch": 18.198, "grad_norm": 1.486708402633667, "learning_rate": 2e-05, "loss": 0.04212853, "step": 9099 }, { "epoch": 18.2, "grad_norm": 1.0412966012954712, "learning_rate": 2e-05, "loss": 0.02887579, "step": 9100 }, { "epoch": 18.202, "grad_norm": 1.2484759092330933, "learning_rate": 2e-05, "loss": 0.03135784, "step": 9101 }, { "epoch": 18.204, "grad_norm": 0.8449518084526062, "learning_rate": 2e-05, "loss": 0.02829957, "step": 9102 }, { "epoch": 18.206, "grad_norm": 0.952001690864563, "learning_rate": 2e-05, "loss": 0.03775496, "step": 9103 }, { "epoch": 18.208, "grad_norm": 2.8372695446014404, "learning_rate": 2e-05, "loss": 0.06035031, "step": 9104 }, { "epoch": 18.21, "grad_norm": 1.566945195198059, "learning_rate": 2e-05, "loss": 0.03821325, "step": 9105 }, { "epoch": 18.212, "grad_norm": 0.9700403213500977, "learning_rate": 2e-05, "loss": 0.03175277, "step": 9106 }, { "epoch": 18.214, "grad_norm": 0.7930232882499695, "learning_rate": 2e-05, "loss": 0.02877456, "step": 9107 }, { "epoch": 18.216, "grad_norm": 1.8784908056259155, "learning_rate": 2e-05, "loss": 0.03008546, "step": 9108 }, { "epoch": 18.218, "grad_norm": 0.8640495538711548, "learning_rate": 2e-05, "loss": 0.02784545, "step": 9109 }, { "epoch": 18.22, "grad_norm": 1.8679314851760864, "learning_rate": 2e-05, "loss": 0.04344907, "step": 9110 }, { "epoch": 18.222, "grad_norm": 0.9665970206260681, "learning_rate": 2e-05, "loss": 0.03159123, "step": 9111 }, { "epoch": 18.224, "grad_norm": 2.1841301918029785, "learning_rate": 2e-05, "loss": 0.04222661, "step": 9112 }, { "epoch": 18.226, "grad_norm": 1.0266964435577393, "learning_rate": 2e-05, "loss": 0.03269233, "step": 9113 }, { "epoch": 18.228, "grad_norm": 1.1559340953826904, "learning_rate": 2e-05, "loss": 0.03609435, "step": 9114 }, { "epoch": 18.23, "grad_norm": 1.799062728881836, "learning_rate": 2e-05, "loss": 0.06516895, "step": 9115 }, { "epoch": 18.232, "grad_norm": 1.1087626218795776, "learning_rate": 2e-05, "loss": 0.03570325, "step": 9116 }, { "epoch": 18.234, "grad_norm": 1.0736451148986816, "learning_rate": 2e-05, "loss": 0.04321848, "step": 9117 }, { "epoch": 18.236, "grad_norm": 0.8759839534759521, "learning_rate": 2e-05, "loss": 0.02739135, "step": 9118 }, { "epoch": 18.238, "grad_norm": 1.3641234636306763, "learning_rate": 2e-05, "loss": 0.03601893, "step": 9119 }, { "epoch": 18.24, "grad_norm": 1.033177137374878, "learning_rate": 2e-05, "loss": 0.03693587, "step": 9120 }, { "epoch": 18.242, "grad_norm": 1.4592231512069702, "learning_rate": 2e-05, "loss": 0.04819867, "step": 9121 }, { "epoch": 18.244, "grad_norm": 2.7049989700317383, "learning_rate": 2e-05, "loss": 0.06350993, "step": 9122 }, { "epoch": 18.246, "grad_norm": 2.129951238632202, "learning_rate": 2e-05, "loss": 0.03744026, "step": 9123 }, { "epoch": 18.248, "grad_norm": 3.555757761001587, "learning_rate": 2e-05, "loss": 0.0389844, "step": 9124 }, { "epoch": 18.25, "grad_norm": 1.77292001247406, "learning_rate": 2e-05, "loss": 0.03673134, "step": 9125 }, { "epoch": 18.252, "grad_norm": 1.491458535194397, "learning_rate": 2e-05, "loss": 0.04791275, "step": 9126 }, { "epoch": 18.254, "grad_norm": 1.0636353492736816, "learning_rate": 2e-05, "loss": 0.02857388, "step": 9127 }, { "epoch": 18.256, "grad_norm": 2.2157514095306396, "learning_rate": 2e-05, "loss": 0.03792644, "step": 9128 }, { "epoch": 18.258, "grad_norm": 1.6188396215438843, "learning_rate": 2e-05, "loss": 0.03909956, "step": 9129 }, { "epoch": 18.26, "grad_norm": 1.4402801990509033, "learning_rate": 2e-05, "loss": 0.03763185, "step": 9130 }, { "epoch": 18.262, "grad_norm": 0.8505237102508545, "learning_rate": 2e-05, "loss": 0.02659447, "step": 9131 }, { "epoch": 18.264, "grad_norm": 2.192814826965332, "learning_rate": 2e-05, "loss": 0.04314809, "step": 9132 }, { "epoch": 18.266, "grad_norm": 0.9546207189559937, "learning_rate": 2e-05, "loss": 0.02968393, "step": 9133 }, { "epoch": 18.268, "grad_norm": 0.9098508954048157, "learning_rate": 2e-05, "loss": 0.03072307, "step": 9134 }, { "epoch": 18.27, "grad_norm": 1.3232694864273071, "learning_rate": 2e-05, "loss": 0.04392505, "step": 9135 }, { "epoch": 18.272, "grad_norm": 1.270898461341858, "learning_rate": 2e-05, "loss": 0.0372013, "step": 9136 }, { "epoch": 18.274, "grad_norm": 1.075697422027588, "learning_rate": 2e-05, "loss": 0.02996073, "step": 9137 }, { "epoch": 18.276, "grad_norm": 1.9473919868469238, "learning_rate": 2e-05, "loss": 0.05669764, "step": 9138 }, { "epoch": 18.278, "grad_norm": 1.5390831232070923, "learning_rate": 2e-05, "loss": 0.05108123, "step": 9139 }, { "epoch": 18.28, "grad_norm": 2.1636602878570557, "learning_rate": 2e-05, "loss": 0.03187565, "step": 9140 }, { "epoch": 18.282, "grad_norm": 0.8288912177085876, "learning_rate": 2e-05, "loss": 0.02420423, "step": 9141 }, { "epoch": 18.284, "grad_norm": 1.1085426807403564, "learning_rate": 2e-05, "loss": 0.03734434, "step": 9142 }, { "epoch": 18.286, "grad_norm": 1.2875310182571411, "learning_rate": 2e-05, "loss": 0.0356874, "step": 9143 }, { "epoch": 18.288, "grad_norm": 1.8907451629638672, "learning_rate": 2e-05, "loss": 0.0487394, "step": 9144 }, { "epoch": 18.29, "grad_norm": 0.9681052565574646, "learning_rate": 2e-05, "loss": 0.03512295, "step": 9145 }, { "epoch": 18.292, "grad_norm": 1.4254331588745117, "learning_rate": 2e-05, "loss": 0.03582203, "step": 9146 }, { "epoch": 18.294, "grad_norm": 1.5225062370300293, "learning_rate": 2e-05, "loss": 0.03475947, "step": 9147 }, { "epoch": 18.296, "grad_norm": 1.242882490158081, "learning_rate": 2e-05, "loss": 0.03556247, "step": 9148 }, { "epoch": 18.298, "grad_norm": 1.0429311990737915, "learning_rate": 2e-05, "loss": 0.03461884, "step": 9149 }, { "epoch": 18.3, "grad_norm": 1.4364418983459473, "learning_rate": 2e-05, "loss": 0.03950094, "step": 9150 }, { "epoch": 18.302, "grad_norm": 1.8164830207824707, "learning_rate": 2e-05, "loss": 0.06410436, "step": 9151 }, { "epoch": 18.304, "grad_norm": 1.4118413925170898, "learning_rate": 2e-05, "loss": 0.04401948, "step": 9152 }, { "epoch": 18.306, "grad_norm": 1.2205784320831299, "learning_rate": 2e-05, "loss": 0.03355864, "step": 9153 }, { "epoch": 18.308, "grad_norm": 1.2198060750961304, "learning_rate": 2e-05, "loss": 0.0309828, "step": 9154 }, { "epoch": 18.31, "grad_norm": 1.1974701881408691, "learning_rate": 2e-05, "loss": 0.02763157, "step": 9155 }, { "epoch": 18.312, "grad_norm": 1.0077528953552246, "learning_rate": 2e-05, "loss": 0.03438156, "step": 9156 }, { "epoch": 18.314, "grad_norm": 0.8457460999488831, "learning_rate": 2e-05, "loss": 0.02893993, "step": 9157 }, { "epoch": 18.316, "grad_norm": 0.9720850586891174, "learning_rate": 2e-05, "loss": 0.04536493, "step": 9158 }, { "epoch": 18.318, "grad_norm": 1.0783201456069946, "learning_rate": 2e-05, "loss": 0.03790034, "step": 9159 }, { "epoch": 18.32, "grad_norm": 1.0271093845367432, "learning_rate": 2e-05, "loss": 0.03963686, "step": 9160 }, { "epoch": 18.322, "grad_norm": 1.4147483110427856, "learning_rate": 2e-05, "loss": 0.03861061, "step": 9161 }, { "epoch": 18.324, "grad_norm": 0.8326164484024048, "learning_rate": 2e-05, "loss": 0.02533394, "step": 9162 }, { "epoch": 18.326, "grad_norm": 1.0313715934753418, "learning_rate": 2e-05, "loss": 0.03305768, "step": 9163 }, { "epoch": 18.328, "grad_norm": 0.957078218460083, "learning_rate": 2e-05, "loss": 0.03420855, "step": 9164 }, { "epoch": 18.33, "grad_norm": 1.2537825107574463, "learning_rate": 2e-05, "loss": 0.04591241, "step": 9165 }, { "epoch": 18.332, "grad_norm": 1.0857492685317993, "learning_rate": 2e-05, "loss": 0.03592741, "step": 9166 }, { "epoch": 18.334, "grad_norm": 1.878787875175476, "learning_rate": 2e-05, "loss": 0.03608268, "step": 9167 }, { "epoch": 18.336, "grad_norm": 1.1953189373016357, "learning_rate": 2e-05, "loss": 0.03804512, "step": 9168 }, { "epoch": 18.338, "grad_norm": 1.5415542125701904, "learning_rate": 2e-05, "loss": 0.02820013, "step": 9169 }, { "epoch": 18.34, "grad_norm": 1.491436243057251, "learning_rate": 2e-05, "loss": 0.04925518, "step": 9170 }, { "epoch": 18.342, "grad_norm": 1.553156852722168, "learning_rate": 2e-05, "loss": 0.04156277, "step": 9171 }, { "epoch": 18.344, "grad_norm": 1.8503408432006836, "learning_rate": 2e-05, "loss": 0.05377498, "step": 9172 }, { "epoch": 18.346, "grad_norm": 1.2439097166061401, "learning_rate": 2e-05, "loss": 0.03265806, "step": 9173 }, { "epoch": 18.348, "grad_norm": 1.4481669664382935, "learning_rate": 2e-05, "loss": 0.02740476, "step": 9174 }, { "epoch": 18.35, "grad_norm": 1.2171218395233154, "learning_rate": 2e-05, "loss": 0.02944897, "step": 9175 }, { "epoch": 18.352, "grad_norm": 0.9065278768539429, "learning_rate": 2e-05, "loss": 0.02675935, "step": 9176 }, { "epoch": 18.354, "grad_norm": 1.3472291231155396, "learning_rate": 2e-05, "loss": 0.04434574, "step": 9177 }, { "epoch": 18.356, "grad_norm": 1.878563642501831, "learning_rate": 2e-05, "loss": 0.03182423, "step": 9178 }, { "epoch": 18.358, "grad_norm": 3.4069790840148926, "learning_rate": 2e-05, "loss": 0.05331387, "step": 9179 }, { "epoch": 18.36, "grad_norm": 1.0158302783966064, "learning_rate": 2e-05, "loss": 0.02548378, "step": 9180 }, { "epoch": 18.362, "grad_norm": 1.7241535186767578, "learning_rate": 2e-05, "loss": 0.05759217, "step": 9181 }, { "epoch": 18.364, "grad_norm": 1.2429004907608032, "learning_rate": 2e-05, "loss": 0.04820465, "step": 9182 }, { "epoch": 18.366, "grad_norm": 1.4486147165298462, "learning_rate": 2e-05, "loss": 0.04670038, "step": 9183 }, { "epoch": 18.368, "grad_norm": 1.1848948001861572, "learning_rate": 2e-05, "loss": 0.03758841, "step": 9184 }, { "epoch": 18.37, "grad_norm": 1.4840543270111084, "learning_rate": 2e-05, "loss": 0.03756346, "step": 9185 }, { "epoch": 18.372, "grad_norm": 1.2354716062545776, "learning_rate": 2e-05, "loss": 0.03380906, "step": 9186 }, { "epoch": 18.374, "grad_norm": 1.507066249847412, "learning_rate": 2e-05, "loss": 0.04540253, "step": 9187 }, { "epoch": 18.376, "grad_norm": 0.8342724442481995, "learning_rate": 2e-05, "loss": 0.02721803, "step": 9188 }, { "epoch": 18.378, "grad_norm": 1.0873569250106812, "learning_rate": 2e-05, "loss": 0.03739965, "step": 9189 }, { "epoch": 18.38, "grad_norm": 1.0909212827682495, "learning_rate": 2e-05, "loss": 0.04435694, "step": 9190 }, { "epoch": 18.382, "grad_norm": 0.9635040760040283, "learning_rate": 2e-05, "loss": 0.02974385, "step": 9191 }, { "epoch": 18.384, "grad_norm": 0.8621228933334351, "learning_rate": 2e-05, "loss": 0.03226075, "step": 9192 }, { "epoch": 18.386, "grad_norm": 0.9626473784446716, "learning_rate": 2e-05, "loss": 0.03997044, "step": 9193 }, { "epoch": 18.388, "grad_norm": 1.0275819301605225, "learning_rate": 2e-05, "loss": 0.04033227, "step": 9194 }, { "epoch": 18.39, "grad_norm": 0.9863879680633545, "learning_rate": 2e-05, "loss": 0.03954853, "step": 9195 }, { "epoch": 18.392, "grad_norm": 0.940483808517456, "learning_rate": 2e-05, "loss": 0.03857549, "step": 9196 }, { "epoch": 18.394, "grad_norm": 1.8732637166976929, "learning_rate": 2e-05, "loss": 0.06237056, "step": 9197 }, { "epoch": 18.396, "grad_norm": 1.4055670499801636, "learning_rate": 2e-05, "loss": 0.03926038, "step": 9198 }, { "epoch": 18.398, "grad_norm": 1.4744826555252075, "learning_rate": 2e-05, "loss": 0.04425963, "step": 9199 }, { "epoch": 18.4, "grad_norm": 1.3514916896820068, "learning_rate": 2e-05, "loss": 0.04462281, "step": 9200 }, { "epoch": 18.402, "grad_norm": 1.6344118118286133, "learning_rate": 2e-05, "loss": 0.04859058, "step": 9201 }, { "epoch": 18.404, "grad_norm": 1.1523475646972656, "learning_rate": 2e-05, "loss": 0.03055129, "step": 9202 }, { "epoch": 18.406, "grad_norm": 0.8542936444282532, "learning_rate": 2e-05, "loss": 0.02844739, "step": 9203 }, { "epoch": 18.408, "grad_norm": 1.0266939401626587, "learning_rate": 2e-05, "loss": 0.04875603, "step": 9204 }, { "epoch": 18.41, "grad_norm": 1.7657033205032349, "learning_rate": 2e-05, "loss": 0.05347644, "step": 9205 }, { "epoch": 18.412, "grad_norm": 0.6647788286209106, "learning_rate": 2e-05, "loss": 0.02002254, "step": 9206 }, { "epoch": 18.414, "grad_norm": 1.595078468322754, "learning_rate": 2e-05, "loss": 0.05859225, "step": 9207 }, { "epoch": 18.416, "grad_norm": 1.1593120098114014, "learning_rate": 2e-05, "loss": 0.03295641, "step": 9208 }, { "epoch": 18.418, "grad_norm": 1.1592780351638794, "learning_rate": 2e-05, "loss": 0.04936964, "step": 9209 }, { "epoch": 18.42, "grad_norm": 1.1033194065093994, "learning_rate": 2e-05, "loss": 0.02957364, "step": 9210 }, { "epoch": 18.422, "grad_norm": 1.9453579187393188, "learning_rate": 2e-05, "loss": 0.05261527, "step": 9211 }, { "epoch": 18.424, "grad_norm": 3.175036907196045, "learning_rate": 2e-05, "loss": 0.05822589, "step": 9212 }, { "epoch": 18.426, "grad_norm": 1.271564245223999, "learning_rate": 2e-05, "loss": 0.04584102, "step": 9213 }, { "epoch": 18.428, "grad_norm": 1.2030833959579468, "learning_rate": 2e-05, "loss": 0.03379309, "step": 9214 }, { "epoch": 18.43, "grad_norm": 1.1682411432266235, "learning_rate": 2e-05, "loss": 0.03003952, "step": 9215 }, { "epoch": 18.432, "grad_norm": 1.0300477743148804, "learning_rate": 2e-05, "loss": 0.032295, "step": 9216 }, { "epoch": 18.434, "grad_norm": 1.0270270109176636, "learning_rate": 2e-05, "loss": 0.03277268, "step": 9217 }, { "epoch": 18.436, "grad_norm": 1.1464117765426636, "learning_rate": 2e-05, "loss": 0.04051355, "step": 9218 }, { "epoch": 18.438, "grad_norm": 0.937652587890625, "learning_rate": 2e-05, "loss": 0.02602914, "step": 9219 }, { "epoch": 18.44, "grad_norm": 1.4438947439193726, "learning_rate": 2e-05, "loss": 0.04581926, "step": 9220 }, { "epoch": 18.442, "grad_norm": 0.9447879195213318, "learning_rate": 2e-05, "loss": 0.02829626, "step": 9221 }, { "epoch": 18.444, "grad_norm": 1.1383657455444336, "learning_rate": 2e-05, "loss": 0.03696441, "step": 9222 }, { "epoch": 18.446, "grad_norm": 1.361314296722412, "learning_rate": 2e-05, "loss": 0.03871637, "step": 9223 }, { "epoch": 18.448, "grad_norm": 1.1792978048324585, "learning_rate": 2e-05, "loss": 0.04289243, "step": 9224 }, { "epoch": 18.45, "grad_norm": 1.3357179164886475, "learning_rate": 2e-05, "loss": 0.05325472, "step": 9225 }, { "epoch": 18.452, "grad_norm": 0.9456852078437805, "learning_rate": 2e-05, "loss": 0.04340569, "step": 9226 }, { "epoch": 18.454, "grad_norm": 1.9040546417236328, "learning_rate": 2e-05, "loss": 0.04025992, "step": 9227 }, { "epoch": 18.456, "grad_norm": 1.2759653329849243, "learning_rate": 2e-05, "loss": 0.03087399, "step": 9228 }, { "epoch": 18.458, "grad_norm": 0.905541181564331, "learning_rate": 2e-05, "loss": 0.02499575, "step": 9229 }, { "epoch": 18.46, "grad_norm": 1.1002150774002075, "learning_rate": 2e-05, "loss": 0.03570306, "step": 9230 }, { "epoch": 18.462, "grad_norm": 1.0128521919250488, "learning_rate": 2e-05, "loss": 0.02525359, "step": 9231 }, { "epoch": 18.464, "grad_norm": 1.9370986223220825, "learning_rate": 2e-05, "loss": 0.05238746, "step": 9232 }, { "epoch": 18.466, "grad_norm": 1.3359951972961426, "learning_rate": 2e-05, "loss": 0.04406765, "step": 9233 }, { "epoch": 18.468, "grad_norm": 1.1703191995620728, "learning_rate": 2e-05, "loss": 0.01840465, "step": 9234 }, { "epoch": 18.47, "grad_norm": 1.311935544013977, "learning_rate": 2e-05, "loss": 0.0538663, "step": 9235 }, { "epoch": 18.472, "grad_norm": 1.1480039358139038, "learning_rate": 2e-05, "loss": 0.03377277, "step": 9236 }, { "epoch": 18.474, "grad_norm": 1.4493167400360107, "learning_rate": 2e-05, "loss": 0.04199662, "step": 9237 }, { "epoch": 18.476, "grad_norm": 1.4386976957321167, "learning_rate": 2e-05, "loss": 0.04461949, "step": 9238 }, { "epoch": 18.478, "grad_norm": 1.1485795974731445, "learning_rate": 2e-05, "loss": 0.0313971, "step": 9239 }, { "epoch": 18.48, "grad_norm": 1.1396548748016357, "learning_rate": 2e-05, "loss": 0.03531817, "step": 9240 }, { "epoch": 18.482, "grad_norm": 1.6581363677978516, "learning_rate": 2e-05, "loss": 0.04454478, "step": 9241 }, { "epoch": 18.484, "grad_norm": 1.7046040296554565, "learning_rate": 2e-05, "loss": 0.05875383, "step": 9242 }, { "epoch": 18.486, "grad_norm": 1.0793792009353638, "learning_rate": 2e-05, "loss": 0.03110993, "step": 9243 }, { "epoch": 18.488, "grad_norm": 1.7613756656646729, "learning_rate": 2e-05, "loss": 0.0570842, "step": 9244 }, { "epoch": 18.49, "grad_norm": 2.0235798358917236, "learning_rate": 2e-05, "loss": 0.04174618, "step": 9245 }, { "epoch": 18.492, "grad_norm": 1.0287659168243408, "learning_rate": 2e-05, "loss": 0.03822614, "step": 9246 }, { "epoch": 18.494, "grad_norm": 1.1788679361343384, "learning_rate": 2e-05, "loss": 0.04055542, "step": 9247 }, { "epoch": 18.496, "grad_norm": 1.1585736274719238, "learning_rate": 2e-05, "loss": 0.0352186, "step": 9248 }, { "epoch": 18.498, "grad_norm": 1.2201426029205322, "learning_rate": 2e-05, "loss": 0.04049143, "step": 9249 }, { "epoch": 18.5, "grad_norm": 1.282627820968628, "learning_rate": 2e-05, "loss": 0.04607493, "step": 9250 }, { "epoch": 18.502, "grad_norm": 0.7134596705436707, "learning_rate": 2e-05, "loss": 0.02491331, "step": 9251 }, { "epoch": 18.504, "grad_norm": 1.1807990074157715, "learning_rate": 2e-05, "loss": 0.04192899, "step": 9252 }, { "epoch": 18.506, "grad_norm": 1.088732123374939, "learning_rate": 2e-05, "loss": 0.03404564, "step": 9253 }, { "epoch": 18.508, "grad_norm": 1.6486504077911377, "learning_rate": 2e-05, "loss": 0.04478157, "step": 9254 }, { "epoch": 18.51, "grad_norm": 0.6750619411468506, "learning_rate": 2e-05, "loss": 0.0185426, "step": 9255 }, { "epoch": 18.512, "grad_norm": 1.6968318223953247, "learning_rate": 2e-05, "loss": 0.04348899, "step": 9256 }, { "epoch": 18.514, "grad_norm": 2.108614921569824, "learning_rate": 2e-05, "loss": 0.03654766, "step": 9257 }, { "epoch": 18.516, "grad_norm": 1.0347707271575928, "learning_rate": 2e-05, "loss": 0.03417545, "step": 9258 }, { "epoch": 18.518, "grad_norm": 1.4427738189697266, "learning_rate": 2e-05, "loss": 0.03940589, "step": 9259 }, { "epoch": 18.52, "grad_norm": 5.4756317138671875, "learning_rate": 2e-05, "loss": 0.0323865, "step": 9260 }, { "epoch": 18.522, "grad_norm": 1.1430174112319946, "learning_rate": 2e-05, "loss": 0.03802513, "step": 9261 }, { "epoch": 18.524, "grad_norm": 1.8345012664794922, "learning_rate": 2e-05, "loss": 0.03719575, "step": 9262 }, { "epoch": 18.526, "grad_norm": 1.0079413652420044, "learning_rate": 2e-05, "loss": 0.025075, "step": 9263 }, { "epoch": 18.528, "grad_norm": 1.0160233974456787, "learning_rate": 2e-05, "loss": 0.03572342, "step": 9264 }, { "epoch": 18.53, "grad_norm": 1.0282971858978271, "learning_rate": 2e-05, "loss": 0.03215218, "step": 9265 }, { "epoch": 18.532, "grad_norm": 1.0998384952545166, "learning_rate": 2e-05, "loss": 0.02282938, "step": 9266 }, { "epoch": 18.534, "grad_norm": 0.8812691569328308, "learning_rate": 2e-05, "loss": 0.02828015, "step": 9267 }, { "epoch": 18.536, "grad_norm": 0.9534347653388977, "learning_rate": 2e-05, "loss": 0.031918, "step": 9268 }, { "epoch": 18.538, "grad_norm": 1.725890874862671, "learning_rate": 2e-05, "loss": 0.03431566, "step": 9269 }, { "epoch": 18.54, "grad_norm": 1.115046501159668, "learning_rate": 2e-05, "loss": 0.03934949, "step": 9270 }, { "epoch": 18.542, "grad_norm": 1.8355759382247925, "learning_rate": 2e-05, "loss": 0.04451673, "step": 9271 }, { "epoch": 18.544, "grad_norm": 0.7978991270065308, "learning_rate": 2e-05, "loss": 0.02732242, "step": 9272 }, { "epoch": 18.546, "grad_norm": 0.9845297336578369, "learning_rate": 2e-05, "loss": 0.0370938, "step": 9273 }, { "epoch": 18.548000000000002, "grad_norm": 1.4372204542160034, "learning_rate": 2e-05, "loss": 0.04021625, "step": 9274 }, { "epoch": 18.55, "grad_norm": 1.2978121042251587, "learning_rate": 2e-05, "loss": 0.03926712, "step": 9275 }, { "epoch": 18.552, "grad_norm": 1.1228762865066528, "learning_rate": 2e-05, "loss": 0.03606683, "step": 9276 }, { "epoch": 18.554, "grad_norm": 1.7570714950561523, "learning_rate": 2e-05, "loss": 0.04846427, "step": 9277 }, { "epoch": 18.556, "grad_norm": 1.131566047668457, "learning_rate": 2e-05, "loss": 0.03306014, "step": 9278 }, { "epoch": 18.558, "grad_norm": 2.452005624771118, "learning_rate": 2e-05, "loss": 0.04492715, "step": 9279 }, { "epoch": 18.56, "grad_norm": 2.391669750213623, "learning_rate": 2e-05, "loss": 0.04984794, "step": 9280 }, { "epoch": 18.562, "grad_norm": 1.720434308052063, "learning_rate": 2e-05, "loss": 0.04323271, "step": 9281 }, { "epoch": 18.564, "grad_norm": 0.944955050945282, "learning_rate": 2e-05, "loss": 0.03370727, "step": 9282 }, { "epoch": 18.566, "grad_norm": 1.2728393077850342, "learning_rate": 2e-05, "loss": 0.05384246, "step": 9283 }, { "epoch": 18.568, "grad_norm": 0.9309799075126648, "learning_rate": 2e-05, "loss": 0.03208206, "step": 9284 }, { "epoch": 18.57, "grad_norm": 1.5698517560958862, "learning_rate": 2e-05, "loss": 0.03387081, "step": 9285 }, { "epoch": 18.572, "grad_norm": 1.2453644275665283, "learning_rate": 2e-05, "loss": 0.04798931, "step": 9286 }, { "epoch": 18.574, "grad_norm": 1.1332305669784546, "learning_rate": 2e-05, "loss": 0.03295435, "step": 9287 }, { "epoch": 18.576, "grad_norm": 1.0198686122894287, "learning_rate": 2e-05, "loss": 0.03462212, "step": 9288 }, { "epoch": 18.578, "grad_norm": 1.215707778930664, "learning_rate": 2e-05, "loss": 0.03556623, "step": 9289 }, { "epoch": 18.58, "grad_norm": 1.027419924736023, "learning_rate": 2e-05, "loss": 0.03423331, "step": 9290 }, { "epoch": 18.582, "grad_norm": 0.90480637550354, "learning_rate": 2e-05, "loss": 0.03087179, "step": 9291 }, { "epoch": 18.584, "grad_norm": 1.7026139497756958, "learning_rate": 2e-05, "loss": 0.0349278, "step": 9292 }, { "epoch": 18.586, "grad_norm": 0.9826644659042358, "learning_rate": 2e-05, "loss": 0.03831825, "step": 9293 }, { "epoch": 18.588, "grad_norm": 1.0782471895217896, "learning_rate": 2e-05, "loss": 0.0376793, "step": 9294 }, { "epoch": 18.59, "grad_norm": 1.2753046751022339, "learning_rate": 2e-05, "loss": 0.03034364, "step": 9295 }, { "epoch": 18.592, "grad_norm": 1.1512980461120605, "learning_rate": 2e-05, "loss": 0.0408176, "step": 9296 }, { "epoch": 18.594, "grad_norm": 1.0865904092788696, "learning_rate": 2e-05, "loss": 0.03869336, "step": 9297 }, { "epoch": 18.596, "grad_norm": 4.640380859375, "learning_rate": 2e-05, "loss": 0.04819636, "step": 9298 }, { "epoch": 18.598, "grad_norm": 1.5081309080123901, "learning_rate": 2e-05, "loss": 0.0344197, "step": 9299 }, { "epoch": 18.6, "grad_norm": 1.4169105291366577, "learning_rate": 2e-05, "loss": 0.03239644, "step": 9300 }, { "epoch": 18.602, "grad_norm": 0.9452114105224609, "learning_rate": 2e-05, "loss": 0.03383157, "step": 9301 }, { "epoch": 18.604, "grad_norm": 1.0745069980621338, "learning_rate": 2e-05, "loss": 0.03384296, "step": 9302 }, { "epoch": 18.606, "grad_norm": 1.4148032665252686, "learning_rate": 2e-05, "loss": 0.03585404, "step": 9303 }, { "epoch": 18.608, "grad_norm": 1.9133672714233398, "learning_rate": 2e-05, "loss": 0.0532914, "step": 9304 }, { "epoch": 18.61, "grad_norm": 1.5987123250961304, "learning_rate": 2e-05, "loss": 0.04575809, "step": 9305 }, { "epoch": 18.612, "grad_norm": 1.4490927457809448, "learning_rate": 2e-05, "loss": 0.0261304, "step": 9306 }, { "epoch": 18.614, "grad_norm": 1.3110289573669434, "learning_rate": 2e-05, "loss": 0.04317017, "step": 9307 }, { "epoch": 18.616, "grad_norm": 1.7450647354125977, "learning_rate": 2e-05, "loss": 0.03704756, "step": 9308 }, { "epoch": 18.618, "grad_norm": 1.1800872087478638, "learning_rate": 2e-05, "loss": 0.04154502, "step": 9309 }, { "epoch": 18.62, "grad_norm": 1.0921176671981812, "learning_rate": 2e-05, "loss": 0.03790358, "step": 9310 }, { "epoch": 18.622, "grad_norm": 0.9460362792015076, "learning_rate": 2e-05, "loss": 0.03480369, "step": 9311 }, { "epoch": 18.624, "grad_norm": 1.9112956523895264, "learning_rate": 2e-05, "loss": 0.0567015, "step": 9312 }, { "epoch": 18.626, "grad_norm": 0.8658326268196106, "learning_rate": 2e-05, "loss": 0.02694524, "step": 9313 }, { "epoch": 18.628, "grad_norm": 1.45808744430542, "learning_rate": 2e-05, "loss": 0.04023227, "step": 9314 }, { "epoch": 18.63, "grad_norm": 1.2802855968475342, "learning_rate": 2e-05, "loss": 0.04633082, "step": 9315 }, { "epoch": 18.632, "grad_norm": 1.4368757009506226, "learning_rate": 2e-05, "loss": 0.04028036, "step": 9316 }, { "epoch": 18.634, "grad_norm": 1.315123200416565, "learning_rate": 2e-05, "loss": 0.04020244, "step": 9317 }, { "epoch": 18.636, "grad_norm": 1.2213021516799927, "learning_rate": 2e-05, "loss": 0.04257789, "step": 9318 }, { "epoch": 18.638, "grad_norm": 0.9989522695541382, "learning_rate": 2e-05, "loss": 0.03090833, "step": 9319 }, { "epoch": 18.64, "grad_norm": 1.4623515605926514, "learning_rate": 2e-05, "loss": 0.03066295, "step": 9320 }, { "epoch": 18.642, "grad_norm": 0.994607150554657, "learning_rate": 2e-05, "loss": 0.03546558, "step": 9321 }, { "epoch": 18.644, "grad_norm": 1.0749642848968506, "learning_rate": 2e-05, "loss": 0.03004635, "step": 9322 }, { "epoch": 18.646, "grad_norm": 1.7713054418563843, "learning_rate": 2e-05, "loss": 0.04165046, "step": 9323 }, { "epoch": 18.648, "grad_norm": 0.8551549315452576, "learning_rate": 2e-05, "loss": 0.02890996, "step": 9324 }, { "epoch": 18.65, "grad_norm": 1.097883701324463, "learning_rate": 2e-05, "loss": 0.02861174, "step": 9325 }, { "epoch": 18.652, "grad_norm": 1.7840732336044312, "learning_rate": 2e-05, "loss": 0.05357152, "step": 9326 }, { "epoch": 18.654, "grad_norm": 4.349701404571533, "learning_rate": 2e-05, "loss": 0.03939576, "step": 9327 }, { "epoch": 18.656, "grad_norm": 1.1393200159072876, "learning_rate": 2e-05, "loss": 0.04131395, "step": 9328 }, { "epoch": 18.658, "grad_norm": 1.0567841529846191, "learning_rate": 2e-05, "loss": 0.03854817, "step": 9329 }, { "epoch": 18.66, "grad_norm": 1.458124041557312, "learning_rate": 2e-05, "loss": 0.05177096, "step": 9330 }, { "epoch": 18.662, "grad_norm": 1.882347822189331, "learning_rate": 2e-05, "loss": 0.04030109, "step": 9331 }, { "epoch": 18.664, "grad_norm": 2.170494318008423, "learning_rate": 2e-05, "loss": 0.03523333, "step": 9332 }, { "epoch": 18.666, "grad_norm": 1.9181419610977173, "learning_rate": 2e-05, "loss": 0.05300501, "step": 9333 }, { "epoch": 18.668, "grad_norm": 1.2080146074295044, "learning_rate": 2e-05, "loss": 0.04903158, "step": 9334 }, { "epoch": 18.67, "grad_norm": 0.7711287140846252, "learning_rate": 2e-05, "loss": 0.02456314, "step": 9335 }, { "epoch": 18.672, "grad_norm": 1.5548487901687622, "learning_rate": 2e-05, "loss": 0.04005419, "step": 9336 }, { "epoch": 18.674, "grad_norm": 1.1410880088806152, "learning_rate": 2e-05, "loss": 0.03590655, "step": 9337 }, { "epoch": 18.676, "grad_norm": 0.7009702920913696, "learning_rate": 2e-05, "loss": 0.02051269, "step": 9338 }, { "epoch": 18.678, "grad_norm": 1.5990911722183228, "learning_rate": 2e-05, "loss": 0.0434902, "step": 9339 }, { "epoch": 18.68, "grad_norm": 0.9780513048171997, "learning_rate": 2e-05, "loss": 0.03651678, "step": 9340 }, { "epoch": 18.682, "grad_norm": 1.6021627187728882, "learning_rate": 2e-05, "loss": 0.06335925, "step": 9341 }, { "epoch": 18.684, "grad_norm": 1.3357975482940674, "learning_rate": 2e-05, "loss": 0.04495957, "step": 9342 }, { "epoch": 18.686, "grad_norm": 0.9908218383789062, "learning_rate": 2e-05, "loss": 0.03332717, "step": 9343 }, { "epoch": 18.688, "grad_norm": 1.097652792930603, "learning_rate": 2e-05, "loss": 0.0356284, "step": 9344 }, { "epoch": 18.69, "grad_norm": 1.0429366827011108, "learning_rate": 2e-05, "loss": 0.03380845, "step": 9345 }, { "epoch": 18.692, "grad_norm": 1.1586343050003052, "learning_rate": 2e-05, "loss": 0.04361542, "step": 9346 }, { "epoch": 18.694, "grad_norm": 3.3480849266052246, "learning_rate": 2e-05, "loss": 0.0333776, "step": 9347 }, { "epoch": 18.696, "grad_norm": 1.281451940536499, "learning_rate": 2e-05, "loss": 0.04028681, "step": 9348 }, { "epoch": 18.698, "grad_norm": 1.371768593788147, "learning_rate": 2e-05, "loss": 0.05507726, "step": 9349 }, { "epoch": 18.7, "grad_norm": 2.3903164863586426, "learning_rate": 2e-05, "loss": 0.03871591, "step": 9350 }, { "epoch": 18.701999999999998, "grad_norm": 1.772568941116333, "learning_rate": 2e-05, "loss": 0.03984157, "step": 9351 }, { "epoch": 18.704, "grad_norm": 0.9230865836143494, "learning_rate": 2e-05, "loss": 0.03329772, "step": 9352 }, { "epoch": 18.706, "grad_norm": 1.103115200996399, "learning_rate": 2e-05, "loss": 0.03324923, "step": 9353 }, { "epoch": 18.708, "grad_norm": 1.8577684164047241, "learning_rate": 2e-05, "loss": 0.04031277, "step": 9354 }, { "epoch": 18.71, "grad_norm": 0.9403966665267944, "learning_rate": 2e-05, "loss": 0.03368139, "step": 9355 }, { "epoch": 18.712, "grad_norm": 1.1100298166275024, "learning_rate": 2e-05, "loss": 0.02893784, "step": 9356 }, { "epoch": 18.714, "grad_norm": 1.5329560041427612, "learning_rate": 2e-05, "loss": 0.04282334, "step": 9357 }, { "epoch": 18.716, "grad_norm": 1.3389240503311157, "learning_rate": 2e-05, "loss": 0.05290358, "step": 9358 }, { "epoch": 18.718, "grad_norm": 1.237346887588501, "learning_rate": 2e-05, "loss": 0.04667988, "step": 9359 }, { "epoch": 18.72, "grad_norm": 1.5586694478988647, "learning_rate": 2e-05, "loss": 0.05208658, "step": 9360 }, { "epoch": 18.722, "grad_norm": 1.8342750072479248, "learning_rate": 2e-05, "loss": 0.06136811, "step": 9361 }, { "epoch": 18.724, "grad_norm": 1.214396357536316, "learning_rate": 2e-05, "loss": 0.04260029, "step": 9362 }, { "epoch": 18.726, "grad_norm": 1.107094407081604, "learning_rate": 2e-05, "loss": 0.03348082, "step": 9363 }, { "epoch": 18.728, "grad_norm": 2.1899776458740234, "learning_rate": 2e-05, "loss": 0.04633787, "step": 9364 }, { "epoch": 18.73, "grad_norm": 2.223517417907715, "learning_rate": 2e-05, "loss": 0.0558925, "step": 9365 }, { "epoch": 18.732, "grad_norm": 1.228452444076538, "learning_rate": 2e-05, "loss": 0.03945302, "step": 9366 }, { "epoch": 18.734, "grad_norm": 1.3798872232437134, "learning_rate": 2e-05, "loss": 0.04050661, "step": 9367 }, { "epoch": 18.736, "grad_norm": 1.221638798713684, "learning_rate": 2e-05, "loss": 0.04136247, "step": 9368 }, { "epoch": 18.738, "grad_norm": 1.5336992740631104, "learning_rate": 2e-05, "loss": 0.04288931, "step": 9369 }, { "epoch": 18.74, "grad_norm": 1.2660870552062988, "learning_rate": 2e-05, "loss": 0.03788426, "step": 9370 }, { "epoch": 18.742, "grad_norm": 1.0618778467178345, "learning_rate": 2e-05, "loss": 0.03776949, "step": 9371 }, { "epoch": 18.744, "grad_norm": 1.454463005065918, "learning_rate": 2e-05, "loss": 0.04477366, "step": 9372 }, { "epoch": 18.746, "grad_norm": 0.9141420125961304, "learning_rate": 2e-05, "loss": 0.0320573, "step": 9373 }, { "epoch": 18.748, "grad_norm": 0.9383849501609802, "learning_rate": 2e-05, "loss": 0.03445226, "step": 9374 }, { "epoch": 18.75, "grad_norm": 0.9249364733695984, "learning_rate": 2e-05, "loss": 0.0267669, "step": 9375 }, { "epoch": 18.752, "grad_norm": 1.1429213285446167, "learning_rate": 2e-05, "loss": 0.03949725, "step": 9376 }, { "epoch": 18.754, "grad_norm": 1.1244200468063354, "learning_rate": 2e-05, "loss": 0.04906584, "step": 9377 }, { "epoch": 18.756, "grad_norm": 1.230803370475769, "learning_rate": 2e-05, "loss": 0.04461672, "step": 9378 }, { "epoch": 18.758, "grad_norm": 1.2853035926818848, "learning_rate": 2e-05, "loss": 0.03215648, "step": 9379 }, { "epoch": 18.76, "grad_norm": 0.9794400930404663, "learning_rate": 2e-05, "loss": 0.03488969, "step": 9380 }, { "epoch": 18.762, "grad_norm": 1.4227925539016724, "learning_rate": 2e-05, "loss": 0.03503297, "step": 9381 }, { "epoch": 18.764, "grad_norm": 1.0466334819793701, "learning_rate": 2e-05, "loss": 0.03702603, "step": 9382 }, { "epoch": 18.766, "grad_norm": 1.34725022315979, "learning_rate": 2e-05, "loss": 0.03953287, "step": 9383 }, { "epoch": 18.768, "grad_norm": 1.198978304862976, "learning_rate": 2e-05, "loss": 0.04976796, "step": 9384 }, { "epoch": 18.77, "grad_norm": 1.504201889038086, "learning_rate": 2e-05, "loss": 0.0327661, "step": 9385 }, { "epoch": 18.772, "grad_norm": 1.165317416191101, "learning_rate": 2e-05, "loss": 0.03382391, "step": 9386 }, { "epoch": 18.774, "grad_norm": 1.3452415466308594, "learning_rate": 2e-05, "loss": 0.04332644, "step": 9387 }, { "epoch": 18.776, "grad_norm": 1.2175707817077637, "learning_rate": 2e-05, "loss": 0.04044385, "step": 9388 }, { "epoch": 18.778, "grad_norm": 1.3986777067184448, "learning_rate": 2e-05, "loss": 0.03819382, "step": 9389 }, { "epoch": 18.78, "grad_norm": 1.0882761478424072, "learning_rate": 2e-05, "loss": 0.03767996, "step": 9390 }, { "epoch": 18.782, "grad_norm": 1.5877962112426758, "learning_rate": 2e-05, "loss": 0.04321814, "step": 9391 }, { "epoch": 18.784, "grad_norm": 1.101238489151001, "learning_rate": 2e-05, "loss": 0.0354044, "step": 9392 }, { "epoch": 18.786, "grad_norm": 1.2377501726150513, "learning_rate": 2e-05, "loss": 0.04031648, "step": 9393 }, { "epoch": 18.788, "grad_norm": 0.9291300177574158, "learning_rate": 2e-05, "loss": 0.02476031, "step": 9394 }, { "epoch": 18.79, "grad_norm": 1.2788015604019165, "learning_rate": 2e-05, "loss": 0.0391355, "step": 9395 }, { "epoch": 18.792, "grad_norm": 1.7409638166427612, "learning_rate": 2e-05, "loss": 0.03689357, "step": 9396 }, { "epoch": 18.794, "grad_norm": 1.0328192710876465, "learning_rate": 2e-05, "loss": 0.04310239, "step": 9397 }, { "epoch": 18.796, "grad_norm": 1.011396884918213, "learning_rate": 2e-05, "loss": 0.03288874, "step": 9398 }, { "epoch": 18.798000000000002, "grad_norm": 0.7435385584831238, "learning_rate": 2e-05, "loss": 0.02558596, "step": 9399 }, { "epoch": 18.8, "grad_norm": 4.462393760681152, "learning_rate": 2e-05, "loss": 0.04563508, "step": 9400 }, { "epoch": 18.802, "grad_norm": 1.0921659469604492, "learning_rate": 2e-05, "loss": 0.03758308, "step": 9401 }, { "epoch": 18.804, "grad_norm": 0.8717844486236572, "learning_rate": 2e-05, "loss": 0.02702617, "step": 9402 }, { "epoch": 18.806, "grad_norm": 1.1632307767868042, "learning_rate": 2e-05, "loss": 0.0234844, "step": 9403 }, { "epoch": 18.808, "grad_norm": 1.5535489320755005, "learning_rate": 2e-05, "loss": 0.05584404, "step": 9404 }, { "epoch": 18.81, "grad_norm": 0.9174478054046631, "learning_rate": 2e-05, "loss": 0.03324962, "step": 9405 }, { "epoch": 18.812, "grad_norm": 0.8994336128234863, "learning_rate": 2e-05, "loss": 0.027875, "step": 9406 }, { "epoch": 18.814, "grad_norm": 1.3199756145477295, "learning_rate": 2e-05, "loss": 0.03234796, "step": 9407 }, { "epoch": 18.816, "grad_norm": 1.08950674533844, "learning_rate": 2e-05, "loss": 0.03516907, "step": 9408 }, { "epoch": 18.818, "grad_norm": 1.1115375757217407, "learning_rate": 2e-05, "loss": 0.04053742, "step": 9409 }, { "epoch": 18.82, "grad_norm": 1.1536626815795898, "learning_rate": 2e-05, "loss": 0.03484582, "step": 9410 }, { "epoch": 18.822, "grad_norm": 0.8211438059806824, "learning_rate": 2e-05, "loss": 0.02448115, "step": 9411 }, { "epoch": 18.824, "grad_norm": 1.1042817831039429, "learning_rate": 2e-05, "loss": 0.04530789, "step": 9412 }, { "epoch": 18.826, "grad_norm": 0.8658460378646851, "learning_rate": 2e-05, "loss": 0.02777041, "step": 9413 }, { "epoch": 18.828, "grad_norm": 2.023766279220581, "learning_rate": 2e-05, "loss": 0.04357454, "step": 9414 }, { "epoch": 18.83, "grad_norm": 0.9699124693870544, "learning_rate": 2e-05, "loss": 0.03296517, "step": 9415 }, { "epoch": 18.832, "grad_norm": 1.7311854362487793, "learning_rate": 2e-05, "loss": 0.03739531, "step": 9416 }, { "epoch": 18.834, "grad_norm": 1.1026251316070557, "learning_rate": 2e-05, "loss": 0.03349984, "step": 9417 }, { "epoch": 18.836, "grad_norm": 2.2641592025756836, "learning_rate": 2e-05, "loss": 0.04603563, "step": 9418 }, { "epoch": 18.838, "grad_norm": 1.2863683700561523, "learning_rate": 2e-05, "loss": 0.03495658, "step": 9419 }, { "epoch": 18.84, "grad_norm": 1.184340476989746, "learning_rate": 2e-05, "loss": 0.03394157, "step": 9420 }, { "epoch": 18.842, "grad_norm": 1.1718181371688843, "learning_rate": 2e-05, "loss": 0.04804385, "step": 9421 }, { "epoch": 18.844, "grad_norm": 1.673646330833435, "learning_rate": 2e-05, "loss": 0.04263519, "step": 9422 }, { "epoch": 18.846, "grad_norm": 2.055541753768921, "learning_rate": 2e-05, "loss": 0.05165798, "step": 9423 }, { "epoch": 18.848, "grad_norm": 1.409141182899475, "learning_rate": 2e-05, "loss": 0.03210691, "step": 9424 }, { "epoch": 18.85, "grad_norm": 1.0464624166488647, "learning_rate": 2e-05, "loss": 0.03134453, "step": 9425 }, { "epoch": 18.852, "grad_norm": 1.4115263223648071, "learning_rate": 2e-05, "loss": 0.04165501, "step": 9426 }, { "epoch": 18.854, "grad_norm": 1.4404549598693848, "learning_rate": 2e-05, "loss": 0.04252353, "step": 9427 }, { "epoch": 18.856, "grad_norm": 1.6682082414627075, "learning_rate": 2e-05, "loss": 0.04965983, "step": 9428 }, { "epoch": 18.858, "grad_norm": 2.1932332515716553, "learning_rate": 2e-05, "loss": 0.04318756, "step": 9429 }, { "epoch": 18.86, "grad_norm": 1.5169520378112793, "learning_rate": 2e-05, "loss": 0.05041992, "step": 9430 }, { "epoch": 18.862, "grad_norm": 1.1857248544692993, "learning_rate": 2e-05, "loss": 0.02926999, "step": 9431 }, { "epoch": 18.864, "grad_norm": 1.5535048246383667, "learning_rate": 2e-05, "loss": 0.03655865, "step": 9432 }, { "epoch": 18.866, "grad_norm": 1.4972198009490967, "learning_rate": 2e-05, "loss": 0.05736169, "step": 9433 }, { "epoch": 18.868, "grad_norm": 3.452970027923584, "learning_rate": 2e-05, "loss": 0.05056961, "step": 9434 }, { "epoch": 18.87, "grad_norm": 0.9347188472747803, "learning_rate": 2e-05, "loss": 0.03612703, "step": 9435 }, { "epoch": 18.872, "grad_norm": 1.1150261163711548, "learning_rate": 2e-05, "loss": 0.0411825, "step": 9436 }, { "epoch": 18.874, "grad_norm": 1.1914849281311035, "learning_rate": 2e-05, "loss": 0.04563664, "step": 9437 }, { "epoch": 18.876, "grad_norm": 2.185731887817383, "learning_rate": 2e-05, "loss": 0.06565648, "step": 9438 }, { "epoch": 18.878, "grad_norm": 1.1941205263137817, "learning_rate": 2e-05, "loss": 0.03810947, "step": 9439 }, { "epoch": 18.88, "grad_norm": 1.6133930683135986, "learning_rate": 2e-05, "loss": 0.04206388, "step": 9440 }, { "epoch": 18.882, "grad_norm": 1.831911563873291, "learning_rate": 2e-05, "loss": 0.03500404, "step": 9441 }, { "epoch": 18.884, "grad_norm": 1.5689963102340698, "learning_rate": 2e-05, "loss": 0.04609049, "step": 9442 }, { "epoch": 18.886, "grad_norm": 1.4582114219665527, "learning_rate": 2e-05, "loss": 0.03695561, "step": 9443 }, { "epoch": 18.888, "grad_norm": 1.6033324003219604, "learning_rate": 2e-05, "loss": 0.04263844, "step": 9444 }, { "epoch": 18.89, "grad_norm": 1.3246691226959229, "learning_rate": 2e-05, "loss": 0.03826751, "step": 9445 }, { "epoch": 18.892, "grad_norm": 0.83452969789505, "learning_rate": 2e-05, "loss": 0.02271818, "step": 9446 }, { "epoch": 18.894, "grad_norm": 0.9485161900520325, "learning_rate": 2e-05, "loss": 0.02958164, "step": 9447 }, { "epoch": 18.896, "grad_norm": 1.047574520111084, "learning_rate": 2e-05, "loss": 0.03668105, "step": 9448 }, { "epoch": 18.898, "grad_norm": 0.9807855486869812, "learning_rate": 2e-05, "loss": 0.02736486, "step": 9449 }, { "epoch": 18.9, "grad_norm": 0.961555540561676, "learning_rate": 2e-05, "loss": 0.03825683, "step": 9450 }, { "epoch": 18.902, "grad_norm": 1.6609978675842285, "learning_rate": 2e-05, "loss": 0.04328287, "step": 9451 }, { "epoch": 18.904, "grad_norm": 1.5748347043991089, "learning_rate": 2e-05, "loss": 0.05321528, "step": 9452 }, { "epoch": 18.906, "grad_norm": 1.1782357692718506, "learning_rate": 2e-05, "loss": 0.03493937, "step": 9453 }, { "epoch": 18.908, "grad_norm": 0.9678424596786499, "learning_rate": 2e-05, "loss": 0.03539448, "step": 9454 }, { "epoch": 18.91, "grad_norm": 1.318500280380249, "learning_rate": 2e-05, "loss": 0.04112168, "step": 9455 }, { "epoch": 18.912, "grad_norm": 1.0962268114089966, "learning_rate": 2e-05, "loss": 0.04137162, "step": 9456 }, { "epoch": 18.914, "grad_norm": 2.6498241424560547, "learning_rate": 2e-05, "loss": 0.04910755, "step": 9457 }, { "epoch": 18.916, "grad_norm": 0.8723467588424683, "learning_rate": 2e-05, "loss": 0.02558673, "step": 9458 }, { "epoch": 18.918, "grad_norm": 4.092097282409668, "learning_rate": 2e-05, "loss": 0.07681785, "step": 9459 }, { "epoch": 18.92, "grad_norm": 1.6774117946624756, "learning_rate": 2e-05, "loss": 0.05558608, "step": 9460 }, { "epoch": 18.922, "grad_norm": 1.1624091863632202, "learning_rate": 2e-05, "loss": 0.0366333, "step": 9461 }, { "epoch": 18.924, "grad_norm": 2.775557518005371, "learning_rate": 2e-05, "loss": 0.03752064, "step": 9462 }, { "epoch": 18.926, "grad_norm": 1.2753337621688843, "learning_rate": 2e-05, "loss": 0.0280635, "step": 9463 }, { "epoch": 18.928, "grad_norm": 1.4751827716827393, "learning_rate": 2e-05, "loss": 0.03453179, "step": 9464 }, { "epoch": 18.93, "grad_norm": 1.8795740604400635, "learning_rate": 2e-05, "loss": 0.05508083, "step": 9465 }, { "epoch": 18.932, "grad_norm": 0.8160423040390015, "learning_rate": 2e-05, "loss": 0.02429902, "step": 9466 }, { "epoch": 18.934, "grad_norm": 1.3197660446166992, "learning_rate": 2e-05, "loss": 0.04274568, "step": 9467 }, { "epoch": 18.936, "grad_norm": 1.277567744255066, "learning_rate": 2e-05, "loss": 0.03160505, "step": 9468 }, { "epoch": 18.938, "grad_norm": 1.089657187461853, "learning_rate": 2e-05, "loss": 0.03029203, "step": 9469 }, { "epoch": 18.94, "grad_norm": 0.9514510631561279, "learning_rate": 2e-05, "loss": 0.02322984, "step": 9470 }, { "epoch": 18.942, "grad_norm": 1.5049282312393188, "learning_rate": 2e-05, "loss": 0.05135813, "step": 9471 }, { "epoch": 18.944, "grad_norm": 2.5291552543640137, "learning_rate": 2e-05, "loss": 0.04994528, "step": 9472 }, { "epoch": 18.946, "grad_norm": 1.2019156217575073, "learning_rate": 2e-05, "loss": 0.03954164, "step": 9473 }, { "epoch": 18.948, "grad_norm": 1.2438514232635498, "learning_rate": 2e-05, "loss": 0.0393982, "step": 9474 }, { "epoch": 18.95, "grad_norm": 1.67550790309906, "learning_rate": 2e-05, "loss": 0.03951187, "step": 9475 }, { "epoch": 18.951999999999998, "grad_norm": 1.288069486618042, "learning_rate": 2e-05, "loss": 0.04033024, "step": 9476 }, { "epoch": 18.954, "grad_norm": 0.9242081642150879, "learning_rate": 2e-05, "loss": 0.02453855, "step": 9477 }, { "epoch": 18.956, "grad_norm": 1.2630972862243652, "learning_rate": 2e-05, "loss": 0.02997711, "step": 9478 }, { "epoch": 18.958, "grad_norm": 1.551659107208252, "learning_rate": 2e-05, "loss": 0.04051644, "step": 9479 }, { "epoch": 18.96, "grad_norm": 1.3620100021362305, "learning_rate": 2e-05, "loss": 0.04093819, "step": 9480 }, { "epoch": 18.962, "grad_norm": 1.232282280921936, "learning_rate": 2e-05, "loss": 0.03754396, "step": 9481 }, { "epoch": 18.964, "grad_norm": 2.0125794410705566, "learning_rate": 2e-05, "loss": 0.03548459, "step": 9482 }, { "epoch": 18.966, "grad_norm": 1.5373892784118652, "learning_rate": 2e-05, "loss": 0.03135991, "step": 9483 }, { "epoch": 18.968, "grad_norm": 1.0258489847183228, "learning_rate": 2e-05, "loss": 0.04001627, "step": 9484 }, { "epoch": 18.97, "grad_norm": 1.2281891107559204, "learning_rate": 2e-05, "loss": 0.04556176, "step": 9485 }, { "epoch": 18.972, "grad_norm": 1.3304164409637451, "learning_rate": 2e-05, "loss": 0.03649063, "step": 9486 }, { "epoch": 18.974, "grad_norm": 1.2232104539871216, "learning_rate": 2e-05, "loss": 0.03224166, "step": 9487 }, { "epoch": 18.976, "grad_norm": 1.0277901887893677, "learning_rate": 2e-05, "loss": 0.03058126, "step": 9488 }, { "epoch": 18.978, "grad_norm": 1.2064499855041504, "learning_rate": 2e-05, "loss": 0.04848855, "step": 9489 }, { "epoch": 18.98, "grad_norm": 1.3543627262115479, "learning_rate": 2e-05, "loss": 0.05095208, "step": 9490 }, { "epoch": 18.982, "grad_norm": 1.169352650642395, "learning_rate": 2e-05, "loss": 0.03938319, "step": 9491 }, { "epoch": 18.984, "grad_norm": 1.1936968564987183, "learning_rate": 2e-05, "loss": 0.04840044, "step": 9492 }, { "epoch": 18.986, "grad_norm": 1.0446419715881348, "learning_rate": 2e-05, "loss": 0.02854344, "step": 9493 }, { "epoch": 18.988, "grad_norm": 1.0690535306930542, "learning_rate": 2e-05, "loss": 0.04077658, "step": 9494 }, { "epoch": 18.99, "grad_norm": 0.9659092426300049, "learning_rate": 2e-05, "loss": 0.0352863, "step": 9495 }, { "epoch": 18.992, "grad_norm": 1.171740174293518, "learning_rate": 2e-05, "loss": 0.04648896, "step": 9496 }, { "epoch": 18.994, "grad_norm": 1.2358547449111938, "learning_rate": 2e-05, "loss": 0.04814133, "step": 9497 }, { "epoch": 18.996, "grad_norm": 1.2690925598144531, "learning_rate": 2e-05, "loss": 0.03684695, "step": 9498 }, { "epoch": 18.998, "grad_norm": 0.8553169965744019, "learning_rate": 2e-05, "loss": 0.03203268, "step": 9499 }, { "epoch": 19.0, "grad_norm": 1.3000123500823975, "learning_rate": 2e-05, "loss": 0.04224008, "step": 9500 }, { "epoch": 19.0, "eval_performance": { "AngleClassification_1": 0.994, "AngleClassification_2": 0.994, "AngleClassification_3": 0.9720558882235529, "Equal_1": 0.998, "Equal_2": 0.9720558882235529, "Equal_3": 0.9680638722554891, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.984, "Perpendicular_1": 0.996, "Perpendicular_2": 0.992, "Perpendicular_3": 0.7785571142284569, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.992, "PointLiesOnCircle_3": 0.9912000000000001, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 226.2832, "eval_samples_per_second": 46.402, "eval_steps_per_second": 0.928, "step": 9500 }, { "epoch": 19.002, "grad_norm": 2.068063497543335, "learning_rate": 2e-05, "loss": 0.05514351, "step": 9501 }, { "epoch": 19.004, "grad_norm": 1.4949918985366821, "learning_rate": 2e-05, "loss": 0.04859954, "step": 9502 }, { "epoch": 19.006, "grad_norm": 1.593234658241272, "learning_rate": 2e-05, "loss": 0.0594999, "step": 9503 }, { "epoch": 19.008, "grad_norm": 1.7630842924118042, "learning_rate": 2e-05, "loss": 0.0547442, "step": 9504 }, { "epoch": 19.01, "grad_norm": 1.3359355926513672, "learning_rate": 2e-05, "loss": 0.0435757, "step": 9505 }, { "epoch": 19.012, "grad_norm": 1.1348426342010498, "learning_rate": 2e-05, "loss": 0.0498636, "step": 9506 }, { "epoch": 19.014, "grad_norm": 0.9702634215354919, "learning_rate": 2e-05, "loss": 0.03833943, "step": 9507 }, { "epoch": 19.016, "grad_norm": 1.3303167819976807, "learning_rate": 2e-05, "loss": 0.05735467, "step": 9508 }, { "epoch": 19.018, "grad_norm": 1.2726213932037354, "learning_rate": 2e-05, "loss": 0.04152704, "step": 9509 }, { "epoch": 19.02, "grad_norm": 1.4237123727798462, "learning_rate": 2e-05, "loss": 0.04325964, "step": 9510 }, { "epoch": 19.022, "grad_norm": 2.0979814529418945, "learning_rate": 2e-05, "loss": 0.04794328, "step": 9511 }, { "epoch": 19.024, "grad_norm": 1.6185158491134644, "learning_rate": 2e-05, "loss": 0.0419357, "step": 9512 }, { "epoch": 19.026, "grad_norm": 1.0660626888275146, "learning_rate": 2e-05, "loss": 0.04816101, "step": 9513 }, { "epoch": 19.028, "grad_norm": 2.115166425704956, "learning_rate": 2e-05, "loss": 0.0368342, "step": 9514 }, { "epoch": 19.03, "grad_norm": 1.0870682001113892, "learning_rate": 2e-05, "loss": 0.03372486, "step": 9515 }, { "epoch": 19.032, "grad_norm": 1.2472476959228516, "learning_rate": 2e-05, "loss": 0.03647649, "step": 9516 }, { "epoch": 19.034, "grad_norm": 1.6778788566589355, "learning_rate": 2e-05, "loss": 0.05027078, "step": 9517 }, { "epoch": 19.036, "grad_norm": 1.1583776473999023, "learning_rate": 2e-05, "loss": 0.05213567, "step": 9518 }, { "epoch": 19.038, "grad_norm": 0.8478331565856934, "learning_rate": 2e-05, "loss": 0.03096831, "step": 9519 }, { "epoch": 19.04, "grad_norm": 2.253941774368286, "learning_rate": 2e-05, "loss": 0.06452757, "step": 9520 }, { "epoch": 19.042, "grad_norm": 0.9793718457221985, "learning_rate": 2e-05, "loss": 0.03285653, "step": 9521 }, { "epoch": 19.044, "grad_norm": 1.1670494079589844, "learning_rate": 2e-05, "loss": 0.04431014, "step": 9522 }, { "epoch": 19.046, "grad_norm": 1.1969513893127441, "learning_rate": 2e-05, "loss": 0.04522659, "step": 9523 }, { "epoch": 19.048, "grad_norm": 2.3027029037475586, "learning_rate": 2e-05, "loss": 0.06609085, "step": 9524 }, { "epoch": 19.05, "grad_norm": 2.106353521347046, "learning_rate": 2e-05, "loss": 0.06551776, "step": 9525 }, { "epoch": 19.052, "grad_norm": 1.131520390510559, "learning_rate": 2e-05, "loss": 0.0388155, "step": 9526 }, { "epoch": 19.054, "grad_norm": 1.5376803874969482, "learning_rate": 2e-05, "loss": 0.05878596, "step": 9527 }, { "epoch": 19.056, "grad_norm": 1.1558544635772705, "learning_rate": 2e-05, "loss": 0.04668014, "step": 9528 }, { "epoch": 19.058, "grad_norm": 2.322650194168091, "learning_rate": 2e-05, "loss": 0.04452207, "step": 9529 }, { "epoch": 19.06, "grad_norm": 1.124748706817627, "learning_rate": 2e-05, "loss": 0.03521612, "step": 9530 }, { "epoch": 19.062, "grad_norm": 1.4117465019226074, "learning_rate": 2e-05, "loss": 0.06336311, "step": 9531 }, { "epoch": 19.064, "grad_norm": 1.4536550045013428, "learning_rate": 2e-05, "loss": 0.06200144, "step": 9532 }, { "epoch": 19.066, "grad_norm": 1.2861456871032715, "learning_rate": 2e-05, "loss": 0.0470906, "step": 9533 }, { "epoch": 19.068, "grad_norm": 1.1793369054794312, "learning_rate": 2e-05, "loss": 0.05701993, "step": 9534 }, { "epoch": 19.07, "grad_norm": 1.456478476524353, "learning_rate": 2e-05, "loss": 0.03865527, "step": 9535 }, { "epoch": 19.072, "grad_norm": 0.9656800627708435, "learning_rate": 2e-05, "loss": 0.03674142, "step": 9536 }, { "epoch": 19.074, "grad_norm": 1.1307231187820435, "learning_rate": 2e-05, "loss": 0.05579684, "step": 9537 }, { "epoch": 19.076, "grad_norm": 0.9977941513061523, "learning_rate": 2e-05, "loss": 0.03835602, "step": 9538 }, { "epoch": 19.078, "grad_norm": 1.2633696794509888, "learning_rate": 2e-05, "loss": 0.04786922, "step": 9539 }, { "epoch": 19.08, "grad_norm": 1.0871366262435913, "learning_rate": 2e-05, "loss": 0.04613236, "step": 9540 }, { "epoch": 19.082, "grad_norm": 1.9468365907669067, "learning_rate": 2e-05, "loss": 0.05565763, "step": 9541 }, { "epoch": 19.084, "grad_norm": 2.074259042739868, "learning_rate": 2e-05, "loss": 0.0362253, "step": 9542 }, { "epoch": 19.086, "grad_norm": 0.9686467051506042, "learning_rate": 2e-05, "loss": 0.03293646, "step": 9543 }, { "epoch": 19.088, "grad_norm": 2.2230942249298096, "learning_rate": 2e-05, "loss": 0.06106763, "step": 9544 }, { "epoch": 19.09, "grad_norm": 1.3701294660568237, "learning_rate": 2e-05, "loss": 0.04030365, "step": 9545 }, { "epoch": 19.092, "grad_norm": 1.6550284624099731, "learning_rate": 2e-05, "loss": 0.04168969, "step": 9546 }, { "epoch": 19.094, "grad_norm": 0.9674344062805176, "learning_rate": 2e-05, "loss": 0.03010088, "step": 9547 }, { "epoch": 19.096, "grad_norm": 1.1236790418624878, "learning_rate": 2e-05, "loss": 0.04962952, "step": 9548 }, { "epoch": 19.098, "grad_norm": 1.367441177368164, "learning_rate": 2e-05, "loss": 0.05980235, "step": 9549 }, { "epoch": 19.1, "grad_norm": 1.3515336513519287, "learning_rate": 2e-05, "loss": 0.05576843, "step": 9550 }, { "epoch": 19.102, "grad_norm": 1.0636272430419922, "learning_rate": 2e-05, "loss": 0.03672649, "step": 9551 }, { "epoch": 19.104, "grad_norm": 1.3951401710510254, "learning_rate": 2e-05, "loss": 0.04574697, "step": 9552 }, { "epoch": 19.106, "grad_norm": 1.4239749908447266, "learning_rate": 2e-05, "loss": 0.04333872, "step": 9553 }, { "epoch": 19.108, "grad_norm": 1.410109043121338, "learning_rate": 2e-05, "loss": 0.04741354, "step": 9554 }, { "epoch": 19.11, "grad_norm": 2.2005419731140137, "learning_rate": 2e-05, "loss": 0.05171037, "step": 9555 }, { "epoch": 19.112, "grad_norm": 2.0647928714752197, "learning_rate": 2e-05, "loss": 0.05073348, "step": 9556 }, { "epoch": 19.114, "grad_norm": 4.837023735046387, "learning_rate": 2e-05, "loss": 0.0426005, "step": 9557 }, { "epoch": 19.116, "grad_norm": 1.4993720054626465, "learning_rate": 2e-05, "loss": 0.04596305, "step": 9558 }, { "epoch": 19.118, "grad_norm": 1.5079679489135742, "learning_rate": 2e-05, "loss": 0.05423679, "step": 9559 }, { "epoch": 19.12, "grad_norm": 2.0939688682556152, "learning_rate": 2e-05, "loss": 0.08043244, "step": 9560 }, { "epoch": 19.122, "grad_norm": 0.9353753924369812, "learning_rate": 2e-05, "loss": 0.02612016, "step": 9561 }, { "epoch": 19.124, "grad_norm": 1.5738691091537476, "learning_rate": 2e-05, "loss": 0.04554492, "step": 9562 }, { "epoch": 19.126, "grad_norm": 1.2481563091278076, "learning_rate": 2e-05, "loss": 0.04297471, "step": 9563 }, { "epoch": 19.128, "grad_norm": 2.0447239875793457, "learning_rate": 2e-05, "loss": 0.05938052, "step": 9564 }, { "epoch": 19.13, "grad_norm": 1.5070217847824097, "learning_rate": 2e-05, "loss": 0.0532247, "step": 9565 }, { "epoch": 19.132, "grad_norm": 1.241140365600586, "learning_rate": 2e-05, "loss": 0.03251466, "step": 9566 }, { "epoch": 19.134, "grad_norm": 1.7029706239700317, "learning_rate": 2e-05, "loss": 0.04438997, "step": 9567 }, { "epoch": 19.136, "grad_norm": 1.1682099103927612, "learning_rate": 2e-05, "loss": 0.03803521, "step": 9568 }, { "epoch": 19.138, "grad_norm": 2.038193464279175, "learning_rate": 2e-05, "loss": 0.06189165, "step": 9569 }, { "epoch": 19.14, "grad_norm": 1.0770591497421265, "learning_rate": 2e-05, "loss": 0.03082434, "step": 9570 }, { "epoch": 19.142, "grad_norm": 1.4201340675354004, "learning_rate": 2e-05, "loss": 0.04988094, "step": 9571 }, { "epoch": 19.144, "grad_norm": 1.4843192100524902, "learning_rate": 2e-05, "loss": 0.05186343, "step": 9572 }, { "epoch": 19.146, "grad_norm": 1.1361877918243408, "learning_rate": 2e-05, "loss": 0.04214589, "step": 9573 }, { "epoch": 19.148, "grad_norm": 1.536643385887146, "learning_rate": 2e-05, "loss": 0.04893829, "step": 9574 }, { "epoch": 19.15, "grad_norm": 1.5760071277618408, "learning_rate": 2e-05, "loss": 0.05837336, "step": 9575 }, { "epoch": 19.152, "grad_norm": 1.5717682838439941, "learning_rate": 2e-05, "loss": 0.05036075, "step": 9576 }, { "epoch": 19.154, "grad_norm": 1.6515288352966309, "learning_rate": 2e-05, "loss": 0.05457538, "step": 9577 }, { "epoch": 19.156, "grad_norm": 0.8262234330177307, "learning_rate": 2e-05, "loss": 0.03069056, "step": 9578 }, { "epoch": 19.158, "grad_norm": 1.1026842594146729, "learning_rate": 2e-05, "loss": 0.03231575, "step": 9579 }, { "epoch": 19.16, "grad_norm": 1.4705458879470825, "learning_rate": 2e-05, "loss": 0.05492937, "step": 9580 }, { "epoch": 19.162, "grad_norm": 1.1295093297958374, "learning_rate": 2e-05, "loss": 0.03586147, "step": 9581 }, { "epoch": 19.164, "grad_norm": 4.090209007263184, "learning_rate": 2e-05, "loss": 0.0552994, "step": 9582 }, { "epoch": 19.166, "grad_norm": 1.1965664625167847, "learning_rate": 2e-05, "loss": 0.04352884, "step": 9583 }, { "epoch": 19.168, "grad_norm": 1.2648299932479858, "learning_rate": 2e-05, "loss": 0.05929117, "step": 9584 }, { "epoch": 19.17, "grad_norm": 1.1680749654769897, "learning_rate": 2e-05, "loss": 0.04267289, "step": 9585 }, { "epoch": 19.172, "grad_norm": 2.512723445892334, "learning_rate": 2e-05, "loss": 0.04715227, "step": 9586 }, { "epoch": 19.174, "grad_norm": 1.9148845672607422, "learning_rate": 2e-05, "loss": 0.05139339, "step": 9587 }, { "epoch": 19.176, "grad_norm": 0.999897301197052, "learning_rate": 2e-05, "loss": 0.03897456, "step": 9588 }, { "epoch": 19.178, "grad_norm": 0.8950326442718506, "learning_rate": 2e-05, "loss": 0.03085954, "step": 9589 }, { "epoch": 19.18, "grad_norm": 0.9471958875656128, "learning_rate": 2e-05, "loss": 0.03678799, "step": 9590 }, { "epoch": 19.182, "grad_norm": 4.979821681976318, "learning_rate": 2e-05, "loss": 0.05950527, "step": 9591 }, { "epoch": 19.184, "grad_norm": 1.2521330118179321, "learning_rate": 2e-05, "loss": 0.05176454, "step": 9592 }, { "epoch": 19.186, "grad_norm": 1.4560898542404175, "learning_rate": 2e-05, "loss": 0.04815323, "step": 9593 }, { "epoch": 19.188, "grad_norm": 0.8276633620262146, "learning_rate": 2e-05, "loss": 0.03196088, "step": 9594 }, { "epoch": 19.19, "grad_norm": 1.2336090803146362, "learning_rate": 2e-05, "loss": 0.04029569, "step": 9595 }, { "epoch": 19.192, "grad_norm": 1.315801978111267, "learning_rate": 2e-05, "loss": 0.02891696, "step": 9596 }, { "epoch": 19.194, "grad_norm": 1.893702507019043, "learning_rate": 2e-05, "loss": 0.05300629, "step": 9597 }, { "epoch": 19.196, "grad_norm": 1.509690284729004, "learning_rate": 2e-05, "loss": 0.03716821, "step": 9598 }, { "epoch": 19.198, "grad_norm": 1.3387304544448853, "learning_rate": 2e-05, "loss": 0.05035957, "step": 9599 }, { "epoch": 19.2, "grad_norm": 1.7897385358810425, "learning_rate": 2e-05, "loss": 0.05165695, "step": 9600 }, { "epoch": 19.202, "grad_norm": 1.1607680320739746, "learning_rate": 2e-05, "loss": 0.0505014, "step": 9601 }, { "epoch": 19.204, "grad_norm": 1.1931777000427246, "learning_rate": 2e-05, "loss": 0.04449996, "step": 9602 }, { "epoch": 19.206, "grad_norm": 1.5485371351242065, "learning_rate": 2e-05, "loss": 0.0504408, "step": 9603 }, { "epoch": 19.208, "grad_norm": 1.5164246559143066, "learning_rate": 2e-05, "loss": 0.05643946, "step": 9604 }, { "epoch": 19.21, "grad_norm": 1.0061755180358887, "learning_rate": 2e-05, "loss": 0.03502137, "step": 9605 }, { "epoch": 19.212, "grad_norm": 1.1823749542236328, "learning_rate": 2e-05, "loss": 0.03302339, "step": 9606 }, { "epoch": 19.214, "grad_norm": 0.9059613943099976, "learning_rate": 2e-05, "loss": 0.03552267, "step": 9607 }, { "epoch": 19.216, "grad_norm": 2.4212279319763184, "learning_rate": 2e-05, "loss": 0.06147094, "step": 9608 }, { "epoch": 19.218, "grad_norm": 1.0591505765914917, "learning_rate": 2e-05, "loss": 0.03705286, "step": 9609 }, { "epoch": 19.22, "grad_norm": 1.8088831901550293, "learning_rate": 2e-05, "loss": 0.03809807, "step": 9610 }, { "epoch": 19.222, "grad_norm": 1.0194939374923706, "learning_rate": 2e-05, "loss": 0.03567684, "step": 9611 }, { "epoch": 19.224, "grad_norm": 1.243156909942627, "learning_rate": 2e-05, "loss": 0.04229685, "step": 9612 }, { "epoch": 19.226, "grad_norm": 1.1337292194366455, "learning_rate": 2e-05, "loss": 0.05196662, "step": 9613 }, { "epoch": 19.228, "grad_norm": 1.2861288785934448, "learning_rate": 2e-05, "loss": 0.04222748, "step": 9614 }, { "epoch": 19.23, "grad_norm": 1.2640674114227295, "learning_rate": 2e-05, "loss": 0.03076211, "step": 9615 }, { "epoch": 19.232, "grad_norm": 1.1922847032546997, "learning_rate": 2e-05, "loss": 0.04441094, "step": 9616 }, { "epoch": 19.234, "grad_norm": 1.1812851428985596, "learning_rate": 2e-05, "loss": 0.0433521, "step": 9617 }, { "epoch": 19.236, "grad_norm": 1.3344746828079224, "learning_rate": 2e-05, "loss": 0.04352124, "step": 9618 }, { "epoch": 19.238, "grad_norm": 0.9644930362701416, "learning_rate": 2e-05, "loss": 0.03768401, "step": 9619 }, { "epoch": 19.24, "grad_norm": 1.4631989002227783, "learning_rate": 2e-05, "loss": 0.06687289, "step": 9620 }, { "epoch": 19.242, "grad_norm": 1.341984510421753, "learning_rate": 2e-05, "loss": 0.04227417, "step": 9621 }, { "epoch": 19.244, "grad_norm": 1.1987580060958862, "learning_rate": 2e-05, "loss": 0.03802745, "step": 9622 }, { "epoch": 19.246, "grad_norm": 1.4911067485809326, "learning_rate": 2e-05, "loss": 0.0531439, "step": 9623 }, { "epoch": 19.248, "grad_norm": 1.490322232246399, "learning_rate": 2e-05, "loss": 0.04796601, "step": 9624 }, { "epoch": 19.25, "grad_norm": 1.8929321765899658, "learning_rate": 2e-05, "loss": 0.06009138, "step": 9625 }, { "epoch": 19.252, "grad_norm": 1.603279709815979, "learning_rate": 2e-05, "loss": 0.06496938, "step": 9626 }, { "epoch": 19.254, "grad_norm": 1.9387052059173584, "learning_rate": 2e-05, "loss": 0.03215116, "step": 9627 }, { "epoch": 19.256, "grad_norm": 1.5905379056930542, "learning_rate": 2e-05, "loss": 0.05835063, "step": 9628 }, { "epoch": 19.258, "grad_norm": 1.7073575258255005, "learning_rate": 2e-05, "loss": 0.05511844, "step": 9629 }, { "epoch": 19.26, "grad_norm": 0.9866523146629333, "learning_rate": 2e-05, "loss": 0.03965458, "step": 9630 }, { "epoch": 19.262, "grad_norm": 1.5832114219665527, "learning_rate": 2e-05, "loss": 0.06277335, "step": 9631 }, { "epoch": 19.264, "grad_norm": 0.9088577032089233, "learning_rate": 2e-05, "loss": 0.03908282, "step": 9632 }, { "epoch": 19.266, "grad_norm": 1.0746324062347412, "learning_rate": 2e-05, "loss": 0.04002892, "step": 9633 }, { "epoch": 19.268, "grad_norm": 1.371802806854248, "learning_rate": 2e-05, "loss": 0.04461369, "step": 9634 }, { "epoch": 19.27, "grad_norm": 1.0281873941421509, "learning_rate": 2e-05, "loss": 0.04214869, "step": 9635 }, { "epoch": 19.272, "grad_norm": 1.1577483415603638, "learning_rate": 2e-05, "loss": 0.04298501, "step": 9636 }, { "epoch": 19.274, "grad_norm": 1.2367029190063477, "learning_rate": 2e-05, "loss": 0.05060035, "step": 9637 }, { "epoch": 19.276, "grad_norm": 1.312851071357727, "learning_rate": 2e-05, "loss": 0.05152234, "step": 9638 }, { "epoch": 19.278, "grad_norm": 1.8516192436218262, "learning_rate": 2e-05, "loss": 0.04668981, "step": 9639 }, { "epoch": 19.28, "grad_norm": 0.9402768611907959, "learning_rate": 2e-05, "loss": 0.03359219, "step": 9640 }, { "epoch": 19.282, "grad_norm": 1.2451539039611816, "learning_rate": 2e-05, "loss": 0.04481611, "step": 9641 }, { "epoch": 19.284, "grad_norm": 1.8052244186401367, "learning_rate": 2e-05, "loss": 0.05552262, "step": 9642 }, { "epoch": 19.286, "grad_norm": 1.0428301095962524, "learning_rate": 2e-05, "loss": 0.04519371, "step": 9643 }, { "epoch": 19.288, "grad_norm": 1.6911115646362305, "learning_rate": 2e-05, "loss": 0.04465013, "step": 9644 }, { "epoch": 19.29, "grad_norm": 0.9367170333862305, "learning_rate": 2e-05, "loss": 0.04070941, "step": 9645 }, { "epoch": 19.292, "grad_norm": 1.3151055574417114, "learning_rate": 2e-05, "loss": 0.05247714, "step": 9646 }, { "epoch": 19.294, "grad_norm": 1.6003053188323975, "learning_rate": 2e-05, "loss": 0.05282956, "step": 9647 }, { "epoch": 19.296, "grad_norm": 0.8632450699806213, "learning_rate": 2e-05, "loss": 0.02785587, "step": 9648 }, { "epoch": 19.298, "grad_norm": 1.1528141498565674, "learning_rate": 2e-05, "loss": 0.04477213, "step": 9649 }, { "epoch": 19.3, "grad_norm": 1.422354817390442, "learning_rate": 2e-05, "loss": 0.05602736, "step": 9650 }, { "epoch": 19.302, "grad_norm": 1.0909887552261353, "learning_rate": 2e-05, "loss": 0.03081812, "step": 9651 }, { "epoch": 19.304, "grad_norm": 1.458472490310669, "learning_rate": 2e-05, "loss": 0.04661476, "step": 9652 }, { "epoch": 19.306, "grad_norm": 1.2165693044662476, "learning_rate": 2e-05, "loss": 0.04959568, "step": 9653 }, { "epoch": 19.308, "grad_norm": 1.257581114768982, "learning_rate": 2e-05, "loss": 0.05788159, "step": 9654 }, { "epoch": 19.31, "grad_norm": 1.1335254907608032, "learning_rate": 2e-05, "loss": 0.03306501, "step": 9655 }, { "epoch": 19.312, "grad_norm": 4.175816535949707, "learning_rate": 2e-05, "loss": 0.04191012, "step": 9656 }, { "epoch": 19.314, "grad_norm": 2.7777833938598633, "learning_rate": 2e-05, "loss": 0.0447332, "step": 9657 }, { "epoch": 19.316, "grad_norm": 2.1185333728790283, "learning_rate": 2e-05, "loss": 0.04624393, "step": 9658 }, { "epoch": 19.318, "grad_norm": 1.4900935888290405, "learning_rate": 2e-05, "loss": 0.03905595, "step": 9659 }, { "epoch": 19.32, "grad_norm": 0.8893716335296631, "learning_rate": 2e-05, "loss": 0.03079229, "step": 9660 }, { "epoch": 19.322, "grad_norm": 3.23656964302063, "learning_rate": 2e-05, "loss": 0.06021797, "step": 9661 }, { "epoch": 19.324, "grad_norm": 1.4597251415252686, "learning_rate": 2e-05, "loss": 0.05042277, "step": 9662 }, { "epoch": 19.326, "grad_norm": 1.7244936227798462, "learning_rate": 2e-05, "loss": 0.05716979, "step": 9663 }, { "epoch": 19.328, "grad_norm": 1.5345367193222046, "learning_rate": 2e-05, "loss": 0.04216295, "step": 9664 }, { "epoch": 19.33, "grad_norm": 2.214154005050659, "learning_rate": 2e-05, "loss": 0.05353715, "step": 9665 }, { "epoch": 19.332, "grad_norm": 1.5732632875442505, "learning_rate": 2e-05, "loss": 0.03894854, "step": 9666 }, { "epoch": 19.334, "grad_norm": 1.1253100633621216, "learning_rate": 2e-05, "loss": 0.04764336, "step": 9667 }, { "epoch": 19.336, "grad_norm": 2.5042335987091064, "learning_rate": 2e-05, "loss": 0.06285486, "step": 9668 }, { "epoch": 19.338, "grad_norm": 2.0975494384765625, "learning_rate": 2e-05, "loss": 0.04954441, "step": 9669 }, { "epoch": 19.34, "grad_norm": 1.4419890642166138, "learning_rate": 2e-05, "loss": 0.04968518, "step": 9670 }, { "epoch": 19.342, "grad_norm": 1.1824123859405518, "learning_rate": 2e-05, "loss": 0.05658853, "step": 9671 }, { "epoch": 19.344, "grad_norm": 1.6057556867599487, "learning_rate": 2e-05, "loss": 0.03070906, "step": 9672 }, { "epoch": 19.346, "grad_norm": 1.4249005317687988, "learning_rate": 2e-05, "loss": 0.05406775, "step": 9673 }, { "epoch": 19.348, "grad_norm": 1.621444582939148, "learning_rate": 2e-05, "loss": 0.03802917, "step": 9674 }, { "epoch": 19.35, "grad_norm": 1.267951250076294, "learning_rate": 2e-05, "loss": 0.03876458, "step": 9675 }, { "epoch": 19.352, "grad_norm": 1.2804059982299805, "learning_rate": 2e-05, "loss": 0.05597539, "step": 9676 }, { "epoch": 19.354, "grad_norm": 1.033166766166687, "learning_rate": 2e-05, "loss": 0.03686696, "step": 9677 }, { "epoch": 19.356, "grad_norm": 1.505784511566162, "learning_rate": 2e-05, "loss": 0.03984234, "step": 9678 }, { "epoch": 19.358, "grad_norm": 1.391517996788025, "learning_rate": 2e-05, "loss": 0.03385401, "step": 9679 }, { "epoch": 19.36, "grad_norm": 1.3774709701538086, "learning_rate": 2e-05, "loss": 0.04038809, "step": 9680 }, { "epoch": 19.362, "grad_norm": 1.5731260776519775, "learning_rate": 2e-05, "loss": 0.07322963, "step": 9681 }, { "epoch": 19.364, "grad_norm": 1.2107212543487549, "learning_rate": 2e-05, "loss": 0.03320616, "step": 9682 }, { "epoch": 19.366, "grad_norm": 1.7580780982971191, "learning_rate": 2e-05, "loss": 0.0528774, "step": 9683 }, { "epoch": 19.368, "grad_norm": 1.5378915071487427, "learning_rate": 2e-05, "loss": 0.05046671, "step": 9684 }, { "epoch": 19.37, "grad_norm": 1.0928095579147339, "learning_rate": 2e-05, "loss": 0.03816278, "step": 9685 }, { "epoch": 19.372, "grad_norm": 1.182478904724121, "learning_rate": 2e-05, "loss": 0.0522567, "step": 9686 }, { "epoch": 19.374, "grad_norm": 1.4353283643722534, "learning_rate": 2e-05, "loss": 0.03691858, "step": 9687 }, { "epoch": 19.376, "grad_norm": 1.475116491317749, "learning_rate": 2e-05, "loss": 0.04696631, "step": 9688 }, { "epoch": 19.378, "grad_norm": 0.974381148815155, "learning_rate": 2e-05, "loss": 0.0333575, "step": 9689 }, { "epoch": 19.38, "grad_norm": 1.4504315853118896, "learning_rate": 2e-05, "loss": 0.05596361, "step": 9690 }, { "epoch": 19.382, "grad_norm": 1.4319438934326172, "learning_rate": 2e-05, "loss": 0.05262763, "step": 9691 }, { "epoch": 19.384, "grad_norm": 0.9636086225509644, "learning_rate": 2e-05, "loss": 0.03767851, "step": 9692 }, { "epoch": 19.386, "grad_norm": 1.425256609916687, "learning_rate": 2e-05, "loss": 0.04698531, "step": 9693 }, { "epoch": 19.388, "grad_norm": 1.4024748802185059, "learning_rate": 2e-05, "loss": 0.05453777, "step": 9694 }, { "epoch": 19.39, "grad_norm": 1.092736840248108, "learning_rate": 2e-05, "loss": 0.04190373, "step": 9695 }, { "epoch": 19.392, "grad_norm": 1.2734224796295166, "learning_rate": 2e-05, "loss": 0.03131421, "step": 9696 }, { "epoch": 19.394, "grad_norm": 1.0958750247955322, "learning_rate": 2e-05, "loss": 0.02913704, "step": 9697 }, { "epoch": 19.396, "grad_norm": 1.1894475221633911, "learning_rate": 2e-05, "loss": 0.04679964, "step": 9698 }, { "epoch": 19.398, "grad_norm": 1.457545280456543, "learning_rate": 2e-05, "loss": 0.04851875, "step": 9699 }, { "epoch": 19.4, "grad_norm": 1.9172102212905884, "learning_rate": 2e-05, "loss": 0.04264786, "step": 9700 }, { "epoch": 19.402, "grad_norm": 2.0579476356506348, "learning_rate": 2e-05, "loss": 0.05123217, "step": 9701 }, { "epoch": 19.404, "grad_norm": 1.5111398696899414, "learning_rate": 2e-05, "loss": 0.03861041, "step": 9702 }, { "epoch": 19.406, "grad_norm": 1.0559989213943481, "learning_rate": 2e-05, "loss": 0.04486804, "step": 9703 }, { "epoch": 19.408, "grad_norm": 1.3110512495040894, "learning_rate": 2e-05, "loss": 0.03388352, "step": 9704 }, { "epoch": 19.41, "grad_norm": 1.385489583015442, "learning_rate": 2e-05, "loss": 0.06344204, "step": 9705 }, { "epoch": 19.412, "grad_norm": 0.962348997592926, "learning_rate": 2e-05, "loss": 0.02755783, "step": 9706 }, { "epoch": 19.414, "grad_norm": 1.0933512449264526, "learning_rate": 2e-05, "loss": 0.03290668, "step": 9707 }, { "epoch": 19.416, "grad_norm": 0.9703479409217834, "learning_rate": 2e-05, "loss": 0.03354245, "step": 9708 }, { "epoch": 19.418, "grad_norm": 1.2828093767166138, "learning_rate": 2e-05, "loss": 0.05780891, "step": 9709 }, { "epoch": 19.42, "grad_norm": 1.4246282577514648, "learning_rate": 2e-05, "loss": 0.05770401, "step": 9710 }, { "epoch": 19.422, "grad_norm": 1.2994410991668701, "learning_rate": 2e-05, "loss": 0.04848997, "step": 9711 }, { "epoch": 19.424, "grad_norm": 3.296254873275757, "learning_rate": 2e-05, "loss": 0.05313291, "step": 9712 }, { "epoch": 19.426, "grad_norm": 1.7690913677215576, "learning_rate": 2e-05, "loss": 0.057825, "step": 9713 }, { "epoch": 19.428, "grad_norm": 1.2090288400650024, "learning_rate": 2e-05, "loss": 0.03477422, "step": 9714 }, { "epoch": 19.43, "grad_norm": 2.6982650756835938, "learning_rate": 2e-05, "loss": 0.05113177, "step": 9715 }, { "epoch": 19.432, "grad_norm": 1.0609557628631592, "learning_rate": 2e-05, "loss": 0.02290886, "step": 9716 }, { "epoch": 19.434, "grad_norm": 2.1590404510498047, "learning_rate": 2e-05, "loss": 0.05990878, "step": 9717 }, { "epoch": 19.436, "grad_norm": 1.7866110801696777, "learning_rate": 2e-05, "loss": 0.04441113, "step": 9718 }, { "epoch": 19.438, "grad_norm": 1.4546856880187988, "learning_rate": 2e-05, "loss": 0.0402088, "step": 9719 }, { "epoch": 19.44, "grad_norm": 1.0940507650375366, "learning_rate": 2e-05, "loss": 0.03648096, "step": 9720 }, { "epoch": 19.442, "grad_norm": 2.4689576625823975, "learning_rate": 2e-05, "loss": 0.10238684, "step": 9721 }, { "epoch": 19.444, "grad_norm": 1.3880178928375244, "learning_rate": 2e-05, "loss": 0.03915354, "step": 9722 }, { "epoch": 19.446, "grad_norm": 1.245656967163086, "learning_rate": 2e-05, "loss": 0.05551424, "step": 9723 }, { "epoch": 19.448, "grad_norm": 1.0747759342193604, "learning_rate": 2e-05, "loss": 0.04109879, "step": 9724 }, { "epoch": 19.45, "grad_norm": 1.746601939201355, "learning_rate": 2e-05, "loss": 0.03869846, "step": 9725 }, { "epoch": 19.452, "grad_norm": 1.2285462617874146, "learning_rate": 2e-05, "loss": 0.04490523, "step": 9726 }, { "epoch": 19.454, "grad_norm": 1.2025593519210815, "learning_rate": 2e-05, "loss": 0.03346576, "step": 9727 }, { "epoch": 19.456, "grad_norm": 1.2167935371398926, "learning_rate": 2e-05, "loss": 0.04255934, "step": 9728 }, { "epoch": 19.458, "grad_norm": 1.1847678422927856, "learning_rate": 2e-05, "loss": 0.03471856, "step": 9729 }, { "epoch": 19.46, "grad_norm": 1.374746561050415, "learning_rate": 2e-05, "loss": 0.03128728, "step": 9730 }, { "epoch": 19.462, "grad_norm": 1.323233723640442, "learning_rate": 2e-05, "loss": 0.03614213, "step": 9731 }, { "epoch": 19.464, "grad_norm": 1.2926902770996094, "learning_rate": 2e-05, "loss": 0.05702139, "step": 9732 }, { "epoch": 19.466, "grad_norm": 1.458201289176941, "learning_rate": 2e-05, "loss": 0.04913079, "step": 9733 }, { "epoch": 19.468, "grad_norm": 11.280439376831055, "learning_rate": 2e-05, "loss": 0.05011719, "step": 9734 }, { "epoch": 19.47, "grad_norm": 1.7070592641830444, "learning_rate": 2e-05, "loss": 0.04946275, "step": 9735 }, { "epoch": 19.472, "grad_norm": 1.4574682712554932, "learning_rate": 2e-05, "loss": 0.04652442, "step": 9736 }, { "epoch": 19.474, "grad_norm": 1.1022684574127197, "learning_rate": 2e-05, "loss": 0.05206183, "step": 9737 }, { "epoch": 19.476, "grad_norm": 1.231909990310669, "learning_rate": 2e-05, "loss": 0.04035537, "step": 9738 }, { "epoch": 19.478, "grad_norm": 1.9996434450149536, "learning_rate": 2e-05, "loss": 0.0517461, "step": 9739 }, { "epoch": 19.48, "grad_norm": 1.3271154165267944, "learning_rate": 2e-05, "loss": 0.03997225, "step": 9740 }, { "epoch": 19.482, "grad_norm": 1.512366771697998, "learning_rate": 2e-05, "loss": 0.04821225, "step": 9741 }, { "epoch": 19.484, "grad_norm": 1.3343210220336914, "learning_rate": 2e-05, "loss": 0.03394932, "step": 9742 }, { "epoch": 19.486, "grad_norm": 1.3094391822814941, "learning_rate": 2e-05, "loss": 0.04556721, "step": 9743 }, { "epoch": 19.488, "grad_norm": 2.965493679046631, "learning_rate": 2e-05, "loss": 0.03707909, "step": 9744 }, { "epoch": 19.49, "grad_norm": 1.1992868185043335, "learning_rate": 2e-05, "loss": 0.0463643, "step": 9745 }, { "epoch": 19.492, "grad_norm": 2.4122650623321533, "learning_rate": 2e-05, "loss": 0.04022121, "step": 9746 }, { "epoch": 19.494, "grad_norm": 1.2792567014694214, "learning_rate": 2e-05, "loss": 0.04131445, "step": 9747 }, { "epoch": 19.496, "grad_norm": 1.321345567703247, "learning_rate": 2e-05, "loss": 0.03763434, "step": 9748 }, { "epoch": 19.498, "grad_norm": 0.8731803297996521, "learning_rate": 2e-05, "loss": 0.02759463, "step": 9749 }, { "epoch": 19.5, "grad_norm": 1.5647461414337158, "learning_rate": 2e-05, "loss": 0.05147694, "step": 9750 }, { "epoch": 19.502, "grad_norm": 1.6512911319732666, "learning_rate": 2e-05, "loss": 0.05187906, "step": 9751 }, { "epoch": 19.504, "grad_norm": 2.5671064853668213, "learning_rate": 2e-05, "loss": 0.0621926, "step": 9752 }, { "epoch": 19.506, "grad_norm": 0.8515844941139221, "learning_rate": 2e-05, "loss": 0.03332325, "step": 9753 }, { "epoch": 19.508, "grad_norm": 1.1611289978027344, "learning_rate": 2e-05, "loss": 0.03925589, "step": 9754 }, { "epoch": 19.51, "grad_norm": 1.1841310262680054, "learning_rate": 2e-05, "loss": 0.04153107, "step": 9755 }, { "epoch": 19.512, "grad_norm": 1.5829827785491943, "learning_rate": 2e-05, "loss": 0.05926523, "step": 9756 }, { "epoch": 19.514, "grad_norm": 2.830404043197632, "learning_rate": 2e-05, "loss": 0.0346127, "step": 9757 }, { "epoch": 19.516, "grad_norm": 1.1793166399002075, "learning_rate": 2e-05, "loss": 0.04535308, "step": 9758 }, { "epoch": 19.518, "grad_norm": 1.171308159828186, "learning_rate": 2e-05, "loss": 0.0358928, "step": 9759 }, { "epoch": 19.52, "grad_norm": 0.9667606353759766, "learning_rate": 2e-05, "loss": 0.03626837, "step": 9760 }, { "epoch": 19.522, "grad_norm": 1.5529730319976807, "learning_rate": 2e-05, "loss": 0.0561646, "step": 9761 }, { "epoch": 19.524, "grad_norm": 2.0817434787750244, "learning_rate": 2e-05, "loss": 0.03666446, "step": 9762 }, { "epoch": 19.526, "grad_norm": 10.803163528442383, "learning_rate": 2e-05, "loss": 0.0598635, "step": 9763 }, { "epoch": 19.528, "grad_norm": 1.135371446609497, "learning_rate": 2e-05, "loss": 0.04183336, "step": 9764 }, { "epoch": 19.53, "grad_norm": 1.0773217678070068, "learning_rate": 2e-05, "loss": 0.0374106, "step": 9765 }, { "epoch": 19.532, "grad_norm": 1.3163087368011475, "learning_rate": 2e-05, "loss": 0.05014953, "step": 9766 }, { "epoch": 19.534, "grad_norm": 1.0920156240463257, "learning_rate": 2e-05, "loss": 0.04792017, "step": 9767 }, { "epoch": 19.536, "grad_norm": 1.1586713790893555, "learning_rate": 2e-05, "loss": 0.03783863, "step": 9768 }, { "epoch": 19.538, "grad_norm": 1.0288833379745483, "learning_rate": 2e-05, "loss": 0.03095918, "step": 9769 }, { "epoch": 19.54, "grad_norm": 1.1086949110031128, "learning_rate": 2e-05, "loss": 0.04088346, "step": 9770 }, { "epoch": 19.542, "grad_norm": 1.0012675523757935, "learning_rate": 2e-05, "loss": 0.02675718, "step": 9771 }, { "epoch": 19.544, "grad_norm": 1.4383875131607056, "learning_rate": 2e-05, "loss": 0.05273371, "step": 9772 }, { "epoch": 19.546, "grad_norm": 1.5652005672454834, "learning_rate": 2e-05, "loss": 0.04652348, "step": 9773 }, { "epoch": 19.548000000000002, "grad_norm": 1.5093274116516113, "learning_rate": 2e-05, "loss": 0.05924704, "step": 9774 }, { "epoch": 19.55, "grad_norm": 1.0317670106887817, "learning_rate": 2e-05, "loss": 0.03254639, "step": 9775 }, { "epoch": 19.552, "grad_norm": 1.0487090349197388, "learning_rate": 2e-05, "loss": 0.04190826, "step": 9776 }, { "epoch": 19.554, "grad_norm": 1.1325141191482544, "learning_rate": 2e-05, "loss": 0.03892371, "step": 9777 }, { "epoch": 19.556, "grad_norm": 3.222447156906128, "learning_rate": 2e-05, "loss": 0.04135956, "step": 9778 }, { "epoch": 19.558, "grad_norm": 1.1760438680648804, "learning_rate": 2e-05, "loss": 0.05717054, "step": 9779 }, { "epoch": 19.56, "grad_norm": 1.0659761428833008, "learning_rate": 2e-05, "loss": 0.04145942, "step": 9780 }, { "epoch": 19.562, "grad_norm": 1.1207369565963745, "learning_rate": 2e-05, "loss": 0.04055945, "step": 9781 }, { "epoch": 19.564, "grad_norm": 1.6405407190322876, "learning_rate": 2e-05, "loss": 0.0635064, "step": 9782 }, { "epoch": 19.566, "grad_norm": 0.9997173547744751, "learning_rate": 2e-05, "loss": 0.03239504, "step": 9783 }, { "epoch": 19.568, "grad_norm": 1.2001104354858398, "learning_rate": 2e-05, "loss": 0.03856827, "step": 9784 }, { "epoch": 19.57, "grad_norm": 1.8651480674743652, "learning_rate": 2e-05, "loss": 0.03771979, "step": 9785 }, { "epoch": 19.572, "grad_norm": 1.2942423820495605, "learning_rate": 2e-05, "loss": 0.04859246, "step": 9786 }, { "epoch": 19.574, "grad_norm": 2.317221164703369, "learning_rate": 2e-05, "loss": 0.0564457, "step": 9787 }, { "epoch": 19.576, "grad_norm": 1.1623016595840454, "learning_rate": 2e-05, "loss": 0.03815918, "step": 9788 }, { "epoch": 19.578, "grad_norm": 1.2147778272628784, "learning_rate": 2e-05, "loss": 0.0415374, "step": 9789 }, { "epoch": 19.58, "grad_norm": 1.2703278064727783, "learning_rate": 2e-05, "loss": 0.04007832, "step": 9790 }, { "epoch": 19.582, "grad_norm": 1.303297996520996, "learning_rate": 2e-05, "loss": 0.0433217, "step": 9791 }, { "epoch": 19.584, "grad_norm": 1.329809546470642, "learning_rate": 2e-05, "loss": 0.03548926, "step": 9792 }, { "epoch": 19.586, "grad_norm": 1.2936608791351318, "learning_rate": 2e-05, "loss": 0.04913282, "step": 9793 }, { "epoch": 19.588, "grad_norm": 1.0678317546844482, "learning_rate": 2e-05, "loss": 0.04557052, "step": 9794 }, { "epoch": 19.59, "grad_norm": 1.1214265823364258, "learning_rate": 2e-05, "loss": 0.03600621, "step": 9795 }, { "epoch": 19.592, "grad_norm": 1.6379845142364502, "learning_rate": 2e-05, "loss": 0.05931259, "step": 9796 }, { "epoch": 19.594, "grad_norm": 2.4793388843536377, "learning_rate": 2e-05, "loss": 0.05285888, "step": 9797 }, { "epoch": 19.596, "grad_norm": 1.473997712135315, "learning_rate": 2e-05, "loss": 0.05199683, "step": 9798 }, { "epoch": 19.598, "grad_norm": 1.1606085300445557, "learning_rate": 2e-05, "loss": 0.04489448, "step": 9799 }, { "epoch": 19.6, "grad_norm": 1.797681450843811, "learning_rate": 2e-05, "loss": 0.03874779, "step": 9800 }, { "epoch": 19.602, "grad_norm": 1.5281680822372437, "learning_rate": 2e-05, "loss": 0.03582354, "step": 9801 }, { "epoch": 19.604, "grad_norm": 1.0201494693756104, "learning_rate": 2e-05, "loss": 0.03549119, "step": 9802 }, { "epoch": 19.606, "grad_norm": 1.8086521625518799, "learning_rate": 2e-05, "loss": 0.05904224, "step": 9803 }, { "epoch": 19.608, "grad_norm": 1.399520993232727, "learning_rate": 2e-05, "loss": 0.05493116, "step": 9804 }, { "epoch": 19.61, "grad_norm": 0.8422263860702515, "learning_rate": 2e-05, "loss": 0.02994341, "step": 9805 }, { "epoch": 19.612, "grad_norm": 1.3597902059555054, "learning_rate": 2e-05, "loss": 0.04971177, "step": 9806 }, { "epoch": 19.614, "grad_norm": 1.4778944253921509, "learning_rate": 2e-05, "loss": 0.03863184, "step": 9807 }, { "epoch": 19.616, "grad_norm": 1.0869373083114624, "learning_rate": 2e-05, "loss": 0.03532803, "step": 9808 }, { "epoch": 19.618, "grad_norm": 1.2919552326202393, "learning_rate": 2e-05, "loss": 0.05406925, "step": 9809 }, { "epoch": 19.62, "grad_norm": 1.0126062631607056, "learning_rate": 2e-05, "loss": 0.04513823, "step": 9810 }, { "epoch": 19.622, "grad_norm": 2.3918895721435547, "learning_rate": 2e-05, "loss": 0.05585405, "step": 9811 }, { "epoch": 19.624, "grad_norm": 1.3447763919830322, "learning_rate": 2e-05, "loss": 0.04343608, "step": 9812 }, { "epoch": 19.626, "grad_norm": 1.4151437282562256, "learning_rate": 2e-05, "loss": 0.0329832, "step": 9813 }, { "epoch": 19.628, "grad_norm": 2.7684154510498047, "learning_rate": 2e-05, "loss": 0.05637457, "step": 9814 }, { "epoch": 19.63, "grad_norm": 1.323366641998291, "learning_rate": 2e-05, "loss": 0.05628778, "step": 9815 }, { "epoch": 19.632, "grad_norm": 1.2532988786697388, "learning_rate": 2e-05, "loss": 0.04769237, "step": 9816 }, { "epoch": 19.634, "grad_norm": 1.0455249547958374, "learning_rate": 2e-05, "loss": 0.04133917, "step": 9817 }, { "epoch": 19.636, "grad_norm": 1.1305084228515625, "learning_rate": 2e-05, "loss": 0.0472152, "step": 9818 }, { "epoch": 19.638, "grad_norm": 1.0384160280227661, "learning_rate": 2e-05, "loss": 0.05237353, "step": 9819 }, { "epoch": 19.64, "grad_norm": 1.4017505645751953, "learning_rate": 2e-05, "loss": 0.05203006, "step": 9820 }, { "epoch": 19.642, "grad_norm": 1.5074249505996704, "learning_rate": 2e-05, "loss": 0.04066243, "step": 9821 }, { "epoch": 19.644, "grad_norm": 1.059114933013916, "learning_rate": 2e-05, "loss": 0.0350259, "step": 9822 }, { "epoch": 19.646, "grad_norm": 2.181492328643799, "learning_rate": 2e-05, "loss": 0.04174504, "step": 9823 }, { "epoch": 19.648, "grad_norm": 1.536797285079956, "learning_rate": 2e-05, "loss": 0.04247047, "step": 9824 }, { "epoch": 19.65, "grad_norm": 1.7219699621200562, "learning_rate": 2e-05, "loss": 0.04848897, "step": 9825 }, { "epoch": 19.652, "grad_norm": 1.4132335186004639, "learning_rate": 2e-05, "loss": 0.0642467, "step": 9826 }, { "epoch": 19.654, "grad_norm": 1.2741103172302246, "learning_rate": 2e-05, "loss": 0.05388, "step": 9827 }, { "epoch": 19.656, "grad_norm": 1.5186736583709717, "learning_rate": 2e-05, "loss": 0.04202322, "step": 9828 }, { "epoch": 19.658, "grad_norm": 2.855564594268799, "learning_rate": 2e-05, "loss": 0.06140168, "step": 9829 }, { "epoch": 19.66, "grad_norm": 1.5625933408737183, "learning_rate": 2e-05, "loss": 0.0482957, "step": 9830 }, { "epoch": 19.662, "grad_norm": 0.9124172925949097, "learning_rate": 2e-05, "loss": 0.02172836, "step": 9831 }, { "epoch": 19.664, "grad_norm": 1.618937611579895, "learning_rate": 2e-05, "loss": 0.04446569, "step": 9832 }, { "epoch": 19.666, "grad_norm": 1.2576582431793213, "learning_rate": 2e-05, "loss": 0.05328108, "step": 9833 }, { "epoch": 19.668, "grad_norm": 1.2171757221221924, "learning_rate": 2e-05, "loss": 0.04585882, "step": 9834 }, { "epoch": 19.67, "grad_norm": 1.3426347970962524, "learning_rate": 2e-05, "loss": 0.06089702, "step": 9835 }, { "epoch": 19.672, "grad_norm": 1.1207820177078247, "learning_rate": 2e-05, "loss": 0.02921787, "step": 9836 }, { "epoch": 19.674, "grad_norm": 1.1567469835281372, "learning_rate": 2e-05, "loss": 0.04104996, "step": 9837 }, { "epoch": 19.676, "grad_norm": 0.9024575352668762, "learning_rate": 2e-05, "loss": 0.03358054, "step": 9838 }, { "epoch": 19.678, "grad_norm": 1.0503915548324585, "learning_rate": 2e-05, "loss": 0.04793879, "step": 9839 }, { "epoch": 19.68, "grad_norm": 2.890329599380493, "learning_rate": 2e-05, "loss": 0.05492513, "step": 9840 }, { "epoch": 19.682, "grad_norm": 0.9892696142196655, "learning_rate": 2e-05, "loss": 0.03702658, "step": 9841 }, { "epoch": 19.684, "grad_norm": 1.4491151571273804, "learning_rate": 2e-05, "loss": 0.04822674, "step": 9842 }, { "epoch": 19.686, "grad_norm": 1.7118585109710693, "learning_rate": 2e-05, "loss": 0.04982598, "step": 9843 }, { "epoch": 19.688, "grad_norm": 1.511823296546936, "learning_rate": 2e-05, "loss": 0.03948272, "step": 9844 }, { "epoch": 19.69, "grad_norm": 1.377773642539978, "learning_rate": 2e-05, "loss": 0.04665618, "step": 9845 }, { "epoch": 19.692, "grad_norm": 1.4655163288116455, "learning_rate": 2e-05, "loss": 0.03921684, "step": 9846 }, { "epoch": 19.694, "grad_norm": 1.416415810585022, "learning_rate": 2e-05, "loss": 0.04124401, "step": 9847 }, { "epoch": 19.696, "grad_norm": 1.402280330657959, "learning_rate": 2e-05, "loss": 0.04650119, "step": 9848 }, { "epoch": 19.698, "grad_norm": 1.333787202835083, "learning_rate": 2e-05, "loss": 0.03673238, "step": 9849 }, { "epoch": 19.7, "grad_norm": 1.6126660108566284, "learning_rate": 2e-05, "loss": 0.04919867, "step": 9850 }, { "epoch": 19.701999999999998, "grad_norm": 1.453936219215393, "learning_rate": 2e-05, "loss": 0.05116571, "step": 9851 }, { "epoch": 19.704, "grad_norm": 1.3196852207183838, "learning_rate": 2e-05, "loss": 0.05735859, "step": 9852 }, { "epoch": 19.706, "grad_norm": 1.0045263767242432, "learning_rate": 2e-05, "loss": 0.03579157, "step": 9853 }, { "epoch": 19.708, "grad_norm": 1.1301738023757935, "learning_rate": 2e-05, "loss": 0.05237764, "step": 9854 }, { "epoch": 19.71, "grad_norm": 1.0088297128677368, "learning_rate": 2e-05, "loss": 0.04020531, "step": 9855 }, { "epoch": 19.712, "grad_norm": 1.141501545906067, "learning_rate": 2e-05, "loss": 0.0543732, "step": 9856 }, { "epoch": 19.714, "grad_norm": 1.2338721752166748, "learning_rate": 2e-05, "loss": 0.0452891, "step": 9857 }, { "epoch": 19.716, "grad_norm": 1.4067304134368896, "learning_rate": 2e-05, "loss": 0.0634157, "step": 9858 }, { "epoch": 19.718, "grad_norm": 1.7353579998016357, "learning_rate": 2e-05, "loss": 0.05240074, "step": 9859 }, { "epoch": 19.72, "grad_norm": 1.1042323112487793, "learning_rate": 2e-05, "loss": 0.03902452, "step": 9860 }, { "epoch": 19.722, "grad_norm": 2.05401349067688, "learning_rate": 2e-05, "loss": 0.05709256, "step": 9861 }, { "epoch": 19.724, "grad_norm": 1.228642225265503, "learning_rate": 2e-05, "loss": 0.05616094, "step": 9862 }, { "epoch": 19.726, "grad_norm": 1.3411247730255127, "learning_rate": 2e-05, "loss": 0.05274705, "step": 9863 }, { "epoch": 19.728, "grad_norm": 0.9605430364608765, "learning_rate": 2e-05, "loss": 0.03076487, "step": 9864 }, { "epoch": 19.73, "grad_norm": 1.314101219177246, "learning_rate": 2e-05, "loss": 0.06292171, "step": 9865 }, { "epoch": 19.732, "grad_norm": 1.1829004287719727, "learning_rate": 2e-05, "loss": 0.03205954, "step": 9866 }, { "epoch": 19.734, "grad_norm": 1.4708023071289062, "learning_rate": 2e-05, "loss": 0.0438548, "step": 9867 }, { "epoch": 19.736, "grad_norm": 3.3712403774261475, "learning_rate": 2e-05, "loss": 0.053829, "step": 9868 }, { "epoch": 19.738, "grad_norm": 1.0781121253967285, "learning_rate": 2e-05, "loss": 0.04134469, "step": 9869 }, { "epoch": 19.74, "grad_norm": 1.4421260356903076, "learning_rate": 2e-05, "loss": 0.05800582, "step": 9870 }, { "epoch": 19.742, "grad_norm": 1.5257000923156738, "learning_rate": 2e-05, "loss": 0.05040158, "step": 9871 }, { "epoch": 19.744, "grad_norm": 1.9874612092971802, "learning_rate": 2e-05, "loss": 0.04579516, "step": 9872 }, { "epoch": 19.746, "grad_norm": 1.1693814992904663, "learning_rate": 2e-05, "loss": 0.03881216, "step": 9873 }, { "epoch": 19.748, "grad_norm": 1.0554652214050293, "learning_rate": 2e-05, "loss": 0.0410638, "step": 9874 }, { "epoch": 19.75, "grad_norm": 1.2013182640075684, "learning_rate": 2e-05, "loss": 0.04026397, "step": 9875 }, { "epoch": 19.752, "grad_norm": 1.749387264251709, "learning_rate": 2e-05, "loss": 0.06423385, "step": 9876 }, { "epoch": 19.754, "grad_norm": 1.4695591926574707, "learning_rate": 2e-05, "loss": 0.03430013, "step": 9877 }, { "epoch": 19.756, "grad_norm": 0.998163104057312, "learning_rate": 2e-05, "loss": 0.0367656, "step": 9878 }, { "epoch": 19.758, "grad_norm": 1.049464464187622, "learning_rate": 2e-05, "loss": 0.03448299, "step": 9879 }, { "epoch": 19.76, "grad_norm": 1.335489273071289, "learning_rate": 2e-05, "loss": 0.04513178, "step": 9880 }, { "epoch": 19.762, "grad_norm": 1.67737877368927, "learning_rate": 2e-05, "loss": 0.05232001, "step": 9881 }, { "epoch": 19.764, "grad_norm": 2.04710054397583, "learning_rate": 2e-05, "loss": 0.07295971, "step": 9882 }, { "epoch": 19.766, "grad_norm": 2.154341220855713, "learning_rate": 2e-05, "loss": 0.04757445, "step": 9883 }, { "epoch": 19.768, "grad_norm": 1.0037877559661865, "learning_rate": 2e-05, "loss": 0.03642687, "step": 9884 }, { "epoch": 19.77, "grad_norm": 1.9585700035095215, "learning_rate": 2e-05, "loss": 0.06963745, "step": 9885 }, { "epoch": 19.772, "grad_norm": 1.1366851329803467, "learning_rate": 2e-05, "loss": 0.042356, "step": 9886 }, { "epoch": 19.774, "grad_norm": 0.9963080883026123, "learning_rate": 2e-05, "loss": 0.04771899, "step": 9887 }, { "epoch": 19.776, "grad_norm": 1.1770130395889282, "learning_rate": 2e-05, "loss": 0.03741566, "step": 9888 }, { "epoch": 19.778, "grad_norm": 1.517072081565857, "learning_rate": 2e-05, "loss": 0.03924861, "step": 9889 }, { "epoch": 19.78, "grad_norm": 1.2560396194458008, "learning_rate": 2e-05, "loss": 0.05135451, "step": 9890 }, { "epoch": 19.782, "grad_norm": 1.301523208618164, "learning_rate": 2e-05, "loss": 0.04597385, "step": 9891 }, { "epoch": 19.784, "grad_norm": 2.0971477031707764, "learning_rate": 2e-05, "loss": 0.07420594, "step": 9892 }, { "epoch": 19.786, "grad_norm": 1.106209397315979, "learning_rate": 2e-05, "loss": 0.03496512, "step": 9893 }, { "epoch": 19.788, "grad_norm": 1.2144694328308105, "learning_rate": 2e-05, "loss": 0.04760725, "step": 9894 }, { "epoch": 19.79, "grad_norm": 2.2000837326049805, "learning_rate": 2e-05, "loss": 0.06038141, "step": 9895 }, { "epoch": 19.792, "grad_norm": 1.1673394441604614, "learning_rate": 2e-05, "loss": 0.03258986, "step": 9896 }, { "epoch": 19.794, "grad_norm": 1.1665492057800293, "learning_rate": 2e-05, "loss": 0.04413421, "step": 9897 }, { "epoch": 19.796, "grad_norm": 1.4714115858078003, "learning_rate": 2e-05, "loss": 0.04880546, "step": 9898 }, { "epoch": 19.798000000000002, "grad_norm": 1.1352968215942383, "learning_rate": 2e-05, "loss": 0.04468106, "step": 9899 }, { "epoch": 19.8, "grad_norm": 1.8641090393066406, "learning_rate": 2e-05, "loss": 0.04832969, "step": 9900 }, { "epoch": 19.802, "grad_norm": 2.043614387512207, "learning_rate": 2e-05, "loss": 0.05763764, "step": 9901 }, { "epoch": 19.804, "grad_norm": 1.3381956815719604, "learning_rate": 2e-05, "loss": 0.05766708, "step": 9902 }, { "epoch": 19.806, "grad_norm": 1.0041110515594482, "learning_rate": 2e-05, "loss": 0.03801927, "step": 9903 }, { "epoch": 19.808, "grad_norm": 1.8360679149627686, "learning_rate": 2e-05, "loss": 0.05261574, "step": 9904 }, { "epoch": 19.81, "grad_norm": 1.4357998371124268, "learning_rate": 2e-05, "loss": 0.0379136, "step": 9905 }, { "epoch": 19.812, "grad_norm": 1.1604324579238892, "learning_rate": 2e-05, "loss": 0.03319767, "step": 9906 }, { "epoch": 19.814, "grad_norm": 1.6565756797790527, "learning_rate": 2e-05, "loss": 0.0378035, "step": 9907 }, { "epoch": 19.816, "grad_norm": 1.4502451419830322, "learning_rate": 2e-05, "loss": 0.0406094, "step": 9908 }, { "epoch": 19.818, "grad_norm": 1.1169416904449463, "learning_rate": 2e-05, "loss": 0.03667828, "step": 9909 }, { "epoch": 19.82, "grad_norm": 0.9602922797203064, "learning_rate": 2e-05, "loss": 0.03931554, "step": 9910 }, { "epoch": 19.822, "grad_norm": 1.0306222438812256, "learning_rate": 2e-05, "loss": 0.03479027, "step": 9911 }, { "epoch": 19.824, "grad_norm": 1.1581436395645142, "learning_rate": 2e-05, "loss": 0.03783763, "step": 9912 }, { "epoch": 19.826, "grad_norm": 1.4078260660171509, "learning_rate": 2e-05, "loss": 0.06037591, "step": 9913 }, { "epoch": 19.828, "grad_norm": 1.4233543872833252, "learning_rate": 2e-05, "loss": 0.04747615, "step": 9914 }, { "epoch": 19.83, "grad_norm": 2.2578678131103516, "learning_rate": 2e-05, "loss": 0.07326545, "step": 9915 }, { "epoch": 19.832, "grad_norm": 1.0691442489624023, "learning_rate": 2e-05, "loss": 0.03629192, "step": 9916 }, { "epoch": 19.834, "grad_norm": 1.3853094577789307, "learning_rate": 2e-05, "loss": 0.03920384, "step": 9917 }, { "epoch": 19.836, "grad_norm": 0.9466228485107422, "learning_rate": 2e-05, "loss": 0.03561662, "step": 9918 }, { "epoch": 19.838, "grad_norm": 1.7220699787139893, "learning_rate": 2e-05, "loss": 0.04991079, "step": 9919 }, { "epoch": 19.84, "grad_norm": 1.5772058963775635, "learning_rate": 2e-05, "loss": 0.05064041, "step": 9920 }, { "epoch": 19.842, "grad_norm": 1.2876979112625122, "learning_rate": 2e-05, "loss": 0.04476606, "step": 9921 }, { "epoch": 19.844, "grad_norm": 1.6998168230056763, "learning_rate": 2e-05, "loss": 0.04959469, "step": 9922 }, { "epoch": 19.846, "grad_norm": 2.0727686882019043, "learning_rate": 2e-05, "loss": 0.05590031, "step": 9923 }, { "epoch": 19.848, "grad_norm": 0.8940509557723999, "learning_rate": 2e-05, "loss": 0.030901, "step": 9924 }, { "epoch": 19.85, "grad_norm": 2.0460457801818848, "learning_rate": 2e-05, "loss": 0.06272165, "step": 9925 }, { "epoch": 19.852, "grad_norm": 2.1535210609436035, "learning_rate": 2e-05, "loss": 0.04591251, "step": 9926 }, { "epoch": 19.854, "grad_norm": 0.8285711407661438, "learning_rate": 2e-05, "loss": 0.03149334, "step": 9927 }, { "epoch": 19.856, "grad_norm": 1.254256248474121, "learning_rate": 2e-05, "loss": 0.05982056, "step": 9928 }, { "epoch": 19.858, "grad_norm": 2.6181087493896484, "learning_rate": 2e-05, "loss": 0.05251448, "step": 9929 }, { "epoch": 19.86, "grad_norm": 1.7091259956359863, "learning_rate": 2e-05, "loss": 0.0409224, "step": 9930 }, { "epoch": 19.862, "grad_norm": 2.5646708011627197, "learning_rate": 2e-05, "loss": 0.06089189, "step": 9931 }, { "epoch": 19.864, "grad_norm": 1.2642056941986084, "learning_rate": 2e-05, "loss": 0.04255435, "step": 9932 }, { "epoch": 19.866, "grad_norm": 0.8946945071220398, "learning_rate": 2e-05, "loss": 0.03370569, "step": 9933 }, { "epoch": 19.868, "grad_norm": 1.041985034942627, "learning_rate": 2e-05, "loss": 0.03797919, "step": 9934 }, { "epoch": 19.87, "grad_norm": 1.2088443040847778, "learning_rate": 2e-05, "loss": 0.04309478, "step": 9935 }, { "epoch": 19.872, "grad_norm": 0.957236111164093, "learning_rate": 2e-05, "loss": 0.03912753, "step": 9936 }, { "epoch": 19.874, "grad_norm": 1.078927993774414, "learning_rate": 2e-05, "loss": 0.03755477, "step": 9937 }, { "epoch": 19.876, "grad_norm": 1.6212440729141235, "learning_rate": 2e-05, "loss": 0.04807086, "step": 9938 }, { "epoch": 19.878, "grad_norm": 1.828425407409668, "learning_rate": 2e-05, "loss": 0.05459414, "step": 9939 }, { "epoch": 19.88, "grad_norm": 1.2175099849700928, "learning_rate": 2e-05, "loss": 0.04490793, "step": 9940 }, { "epoch": 19.882, "grad_norm": 1.0675256252288818, "learning_rate": 2e-05, "loss": 0.03945842, "step": 9941 }, { "epoch": 19.884, "grad_norm": 1.3400332927703857, "learning_rate": 2e-05, "loss": 0.05500691, "step": 9942 }, { "epoch": 19.886, "grad_norm": 1.0481685400009155, "learning_rate": 2e-05, "loss": 0.0376045, "step": 9943 }, { "epoch": 19.888, "grad_norm": 1.1912091970443726, "learning_rate": 2e-05, "loss": 0.04554616, "step": 9944 }, { "epoch": 19.89, "grad_norm": 1.089674711227417, "learning_rate": 2e-05, "loss": 0.03435346, "step": 9945 }, { "epoch": 19.892, "grad_norm": 2.5817346572875977, "learning_rate": 2e-05, "loss": 0.05764627, "step": 9946 }, { "epoch": 19.894, "grad_norm": 1.1305749416351318, "learning_rate": 2e-05, "loss": 0.04945515, "step": 9947 }, { "epoch": 19.896, "grad_norm": 1.0717039108276367, "learning_rate": 2e-05, "loss": 0.04119519, "step": 9948 }, { "epoch": 19.898, "grad_norm": 1.333313226699829, "learning_rate": 2e-05, "loss": 0.05041708, "step": 9949 }, { "epoch": 19.9, "grad_norm": 1.2159230709075928, "learning_rate": 2e-05, "loss": 0.05127116, "step": 9950 }, { "epoch": 19.902, "grad_norm": 1.1147123575210571, "learning_rate": 2e-05, "loss": 0.03272236, "step": 9951 }, { "epoch": 19.904, "grad_norm": 1.5635836124420166, "learning_rate": 2e-05, "loss": 0.03329258, "step": 9952 }, { "epoch": 19.906, "grad_norm": 2.2178726196289062, "learning_rate": 2e-05, "loss": 0.04939291, "step": 9953 }, { "epoch": 19.908, "grad_norm": 1.0326124429702759, "learning_rate": 2e-05, "loss": 0.024304, "step": 9954 }, { "epoch": 19.91, "grad_norm": 1.465420126914978, "learning_rate": 2e-05, "loss": 0.04347322, "step": 9955 }, { "epoch": 19.912, "grad_norm": 1.8869314193725586, "learning_rate": 2e-05, "loss": 0.05487315, "step": 9956 }, { "epoch": 19.914, "grad_norm": 1.1457998752593994, "learning_rate": 2e-05, "loss": 0.03031149, "step": 9957 }, { "epoch": 19.916, "grad_norm": 1.2959970235824585, "learning_rate": 2e-05, "loss": 0.04220332, "step": 9958 }, { "epoch": 19.918, "grad_norm": 1.4468764066696167, "learning_rate": 2e-05, "loss": 0.03483006, "step": 9959 }, { "epoch": 19.92, "grad_norm": 1.6252766847610474, "learning_rate": 2e-05, "loss": 0.04224895, "step": 9960 }, { "epoch": 19.922, "grad_norm": 1.1470091342926025, "learning_rate": 2e-05, "loss": 0.04128341, "step": 9961 }, { "epoch": 19.924, "grad_norm": 1.6575427055358887, "learning_rate": 2e-05, "loss": 0.05208197, "step": 9962 }, { "epoch": 19.926, "grad_norm": 2.863175868988037, "learning_rate": 2e-05, "loss": 0.06198461, "step": 9963 }, { "epoch": 19.928, "grad_norm": 1.0098485946655273, "learning_rate": 2e-05, "loss": 0.03977994, "step": 9964 }, { "epoch": 19.93, "grad_norm": 3.209949254989624, "learning_rate": 2e-05, "loss": 0.05408508, "step": 9965 }, { "epoch": 19.932, "grad_norm": 1.322562575340271, "learning_rate": 2e-05, "loss": 0.04583313, "step": 9966 }, { "epoch": 19.934, "grad_norm": 2.9274871349334717, "learning_rate": 2e-05, "loss": 0.05956116, "step": 9967 }, { "epoch": 19.936, "grad_norm": 0.9923803210258484, "learning_rate": 2e-05, "loss": 0.03944445, "step": 9968 }, { "epoch": 19.938, "grad_norm": 1.251950979232788, "learning_rate": 2e-05, "loss": 0.0486494, "step": 9969 }, { "epoch": 19.94, "grad_norm": 1.156855821609497, "learning_rate": 2e-05, "loss": 0.0280044, "step": 9970 }, { "epoch": 19.942, "grad_norm": 0.9502769708633423, "learning_rate": 2e-05, "loss": 0.03585855, "step": 9971 }, { "epoch": 19.944, "grad_norm": 1.8762098550796509, "learning_rate": 2e-05, "loss": 0.05288342, "step": 9972 }, { "epoch": 19.946, "grad_norm": 2.1633381843566895, "learning_rate": 2e-05, "loss": 0.05550963, "step": 9973 }, { "epoch": 19.948, "grad_norm": 1.552352786064148, "learning_rate": 2e-05, "loss": 0.05989832, "step": 9974 }, { "epoch": 19.95, "grad_norm": 1.0320520401000977, "learning_rate": 2e-05, "loss": 0.03358724, "step": 9975 }, { "epoch": 19.951999999999998, "grad_norm": 1.075856328010559, "learning_rate": 2e-05, "loss": 0.04548848, "step": 9976 }, { "epoch": 19.954, "grad_norm": 1.3800830841064453, "learning_rate": 2e-05, "loss": 0.05029379, "step": 9977 }, { "epoch": 19.956, "grad_norm": 2.481336832046509, "learning_rate": 2e-05, "loss": 0.05610425, "step": 9978 }, { "epoch": 19.958, "grad_norm": 1.4334888458251953, "learning_rate": 2e-05, "loss": 0.0440852, "step": 9979 }, { "epoch": 19.96, "grad_norm": 1.0521533489227295, "learning_rate": 2e-05, "loss": 0.03211974, "step": 9980 }, { "epoch": 19.962, "grad_norm": 1.3181735277175903, "learning_rate": 2e-05, "loss": 0.04537112, "step": 9981 }, { "epoch": 19.964, "grad_norm": 1.7708529233932495, "learning_rate": 2e-05, "loss": 0.05885722, "step": 9982 }, { "epoch": 19.966, "grad_norm": 1.0898101329803467, "learning_rate": 2e-05, "loss": 0.04922247, "step": 9983 }, { "epoch": 19.968, "grad_norm": 1.6484142541885376, "learning_rate": 2e-05, "loss": 0.04867502, "step": 9984 }, { "epoch": 19.97, "grad_norm": 1.3384199142456055, "learning_rate": 2e-05, "loss": 0.05530729, "step": 9985 }, { "epoch": 19.972, "grad_norm": 2.1147444248199463, "learning_rate": 2e-05, "loss": 0.03819465, "step": 9986 }, { "epoch": 19.974, "grad_norm": 0.9747064113616943, "learning_rate": 2e-05, "loss": 0.04225169, "step": 9987 }, { "epoch": 19.976, "grad_norm": 1.3863695859909058, "learning_rate": 2e-05, "loss": 0.04229522, "step": 9988 }, { "epoch": 19.978, "grad_norm": 1.4297418594360352, "learning_rate": 2e-05, "loss": 0.05352734, "step": 9989 }, { "epoch": 19.98, "grad_norm": 1.1878514289855957, "learning_rate": 2e-05, "loss": 0.04906711, "step": 9990 }, { "epoch": 19.982, "grad_norm": 1.2906607389450073, "learning_rate": 2e-05, "loss": 0.03765155, "step": 9991 }, { "epoch": 19.984, "grad_norm": 1.30812668800354, "learning_rate": 2e-05, "loss": 0.05759721, "step": 9992 }, { "epoch": 19.986, "grad_norm": 1.327970027923584, "learning_rate": 2e-05, "loss": 0.04500524, "step": 9993 }, { "epoch": 19.988, "grad_norm": 1.3616249561309814, "learning_rate": 2e-05, "loss": 0.04437044, "step": 9994 }, { "epoch": 19.99, "grad_norm": 1.2756716012954712, "learning_rate": 2e-05, "loss": 0.03608905, "step": 9995 }, { "epoch": 19.992, "grad_norm": 1.3079259395599365, "learning_rate": 2e-05, "loss": 0.04226516, "step": 9996 }, { "epoch": 19.994, "grad_norm": 1.7889764308929443, "learning_rate": 2e-05, "loss": 0.04997349, "step": 9997 }, { "epoch": 19.996, "grad_norm": 1.3262531757354736, "learning_rate": 2e-05, "loss": 0.0421113, "step": 9998 }, { "epoch": 19.998, "grad_norm": 1.1342781782150269, "learning_rate": 2e-05, "loss": 0.0441694, "step": 9999 }, { "epoch": 20.0, "grad_norm": 1.0011807680130005, "learning_rate": 2e-05, "loss": 0.03834677, "step": 10000 }, { "epoch": 20.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.996, "Equal_2": 0.9680638722554891, "Equal_3": 0.9700598802395209, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9919839679358717, "Parallel_3": 0.974, "Perpendicular_1": 0.988, "Perpendicular_2": 0.984, "Perpendicular_3": 0.8517034068136272, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9848666666666667, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 226.2359, "eval_samples_per_second": 46.412, "eval_steps_per_second": 0.928, "step": 10000 }, { "epoch": 20.002, "grad_norm": 1.127284049987793, "learning_rate": 2e-05, "loss": 0.04092779, "step": 10001 }, { "epoch": 20.004, "grad_norm": 1.2159945964813232, "learning_rate": 2e-05, "loss": 0.02937359, "step": 10002 }, { "epoch": 20.006, "grad_norm": 1.1951302289962769, "learning_rate": 2e-05, "loss": 0.04636762, "step": 10003 }, { "epoch": 20.008, "grad_norm": 1.383665680885315, "learning_rate": 2e-05, "loss": 0.04984976, "step": 10004 }, { "epoch": 20.01, "grad_norm": 0.7439373731613159, "learning_rate": 2e-05, "loss": 0.02024483, "step": 10005 }, { "epoch": 20.012, "grad_norm": 1.339874267578125, "learning_rate": 2e-05, "loss": 0.04749817, "step": 10006 }, { "epoch": 20.014, "grad_norm": 1.2978925704956055, "learning_rate": 2e-05, "loss": 0.04234719, "step": 10007 }, { "epoch": 20.016, "grad_norm": 1.0107332468032837, "learning_rate": 2e-05, "loss": 0.03818996, "step": 10008 }, { "epoch": 20.018, "grad_norm": 2.1409077644348145, "learning_rate": 2e-05, "loss": 0.04500724, "step": 10009 }, { "epoch": 20.02, "grad_norm": 1.3362798690795898, "learning_rate": 2e-05, "loss": 0.06147458, "step": 10010 }, { "epoch": 20.022, "grad_norm": 1.0012301206588745, "learning_rate": 2e-05, "loss": 0.03237762, "step": 10011 }, { "epoch": 20.024, "grad_norm": 2.653005599975586, "learning_rate": 2e-05, "loss": 0.04953773, "step": 10012 }, { "epoch": 20.026, "grad_norm": 1.8172537088394165, "learning_rate": 2e-05, "loss": 0.04584821, "step": 10013 }, { "epoch": 20.028, "grad_norm": 1.2688225507736206, "learning_rate": 2e-05, "loss": 0.05444579, "step": 10014 }, { "epoch": 20.03, "grad_norm": 1.270298719406128, "learning_rate": 2e-05, "loss": 0.04976795, "step": 10015 }, { "epoch": 20.032, "grad_norm": 1.3398442268371582, "learning_rate": 2e-05, "loss": 0.05510159, "step": 10016 }, { "epoch": 20.034, "grad_norm": 2.2006499767303467, "learning_rate": 2e-05, "loss": 0.05911603, "step": 10017 }, { "epoch": 20.036, "grad_norm": 2.3411972522735596, "learning_rate": 2e-05, "loss": 0.05090253, "step": 10018 }, { "epoch": 20.038, "grad_norm": 1.631791114807129, "learning_rate": 2e-05, "loss": 0.05586906, "step": 10019 }, { "epoch": 20.04, "grad_norm": 1.186143159866333, "learning_rate": 2e-05, "loss": 0.03592892, "step": 10020 }, { "epoch": 20.042, "grad_norm": 1.1092864274978638, "learning_rate": 2e-05, "loss": 0.03687192, "step": 10021 }, { "epoch": 20.044, "grad_norm": 1.1306670904159546, "learning_rate": 2e-05, "loss": 0.04861335, "step": 10022 }, { "epoch": 20.046, "grad_norm": 1.3635127544403076, "learning_rate": 2e-05, "loss": 0.0349245, "step": 10023 }, { "epoch": 20.048, "grad_norm": 1.2683796882629395, "learning_rate": 2e-05, "loss": 0.03927577, "step": 10024 }, { "epoch": 20.05, "grad_norm": 2.56442928314209, "learning_rate": 2e-05, "loss": 0.03617909, "step": 10025 }, { "epoch": 20.052, "grad_norm": 1.4810510873794556, "learning_rate": 2e-05, "loss": 0.0485364, "step": 10026 }, { "epoch": 20.054, "grad_norm": 2.7198219299316406, "learning_rate": 2e-05, "loss": 0.04877225, "step": 10027 }, { "epoch": 20.056, "grad_norm": 1.1607643365859985, "learning_rate": 2e-05, "loss": 0.04197336, "step": 10028 }, { "epoch": 20.058, "grad_norm": 1.2049826383590698, "learning_rate": 2e-05, "loss": 0.03773725, "step": 10029 }, { "epoch": 20.06, "grad_norm": 1.1548011302947998, "learning_rate": 2e-05, "loss": 0.03615688, "step": 10030 }, { "epoch": 20.062, "grad_norm": 1.2524808645248413, "learning_rate": 2e-05, "loss": 0.04427896, "step": 10031 }, { "epoch": 20.064, "grad_norm": 0.7943839430809021, "learning_rate": 2e-05, "loss": 0.02417147, "step": 10032 }, { "epoch": 20.066, "grad_norm": 1.7086710929870605, "learning_rate": 2e-05, "loss": 0.06036331, "step": 10033 }, { "epoch": 20.068, "grad_norm": 1.862647294998169, "learning_rate": 2e-05, "loss": 0.04705863, "step": 10034 }, { "epoch": 20.07, "grad_norm": 0.9783092737197876, "learning_rate": 2e-05, "loss": 0.04180858, "step": 10035 }, { "epoch": 20.072, "grad_norm": 0.8657662272453308, "learning_rate": 2e-05, "loss": 0.02798888, "step": 10036 }, { "epoch": 20.074, "grad_norm": 1.2639257907867432, "learning_rate": 2e-05, "loss": 0.05093665, "step": 10037 }, { "epoch": 20.076, "grad_norm": 1.540494680404663, "learning_rate": 2e-05, "loss": 0.03157865, "step": 10038 }, { "epoch": 20.078, "grad_norm": 1.1178789138793945, "learning_rate": 2e-05, "loss": 0.04478153, "step": 10039 }, { "epoch": 20.08, "grad_norm": 1.0298389196395874, "learning_rate": 2e-05, "loss": 0.03717678, "step": 10040 }, { "epoch": 20.082, "grad_norm": 1.119714379310608, "learning_rate": 2e-05, "loss": 0.03268052, "step": 10041 }, { "epoch": 20.084, "grad_norm": 1.4932787418365479, "learning_rate": 2e-05, "loss": 0.04607581, "step": 10042 }, { "epoch": 20.086, "grad_norm": 0.95176762342453, "learning_rate": 2e-05, "loss": 0.04165036, "step": 10043 }, { "epoch": 20.088, "grad_norm": 1.2105460166931152, "learning_rate": 2e-05, "loss": 0.03452426, "step": 10044 }, { "epoch": 20.09, "grad_norm": 1.2063449621200562, "learning_rate": 2e-05, "loss": 0.04971318, "step": 10045 }, { "epoch": 20.092, "grad_norm": 1.3851768970489502, "learning_rate": 2e-05, "loss": 0.05252733, "step": 10046 }, { "epoch": 20.094, "grad_norm": 1.1269818544387817, "learning_rate": 2e-05, "loss": 0.05008313, "step": 10047 }, { "epoch": 20.096, "grad_norm": 3.5604240894317627, "learning_rate": 2e-05, "loss": 0.04828449, "step": 10048 }, { "epoch": 20.098, "grad_norm": 1.0702931880950928, "learning_rate": 2e-05, "loss": 0.03500254, "step": 10049 }, { "epoch": 20.1, "grad_norm": 3.459932327270508, "learning_rate": 2e-05, "loss": 0.03913084, "step": 10050 }, { "epoch": 20.102, "grad_norm": 2.2439889907836914, "learning_rate": 2e-05, "loss": 0.04833048, "step": 10051 }, { "epoch": 20.104, "grad_norm": 1.0361684560775757, "learning_rate": 2e-05, "loss": 0.03574806, "step": 10052 }, { "epoch": 20.106, "grad_norm": 1.2123799324035645, "learning_rate": 2e-05, "loss": 0.05213985, "step": 10053 }, { "epoch": 20.108, "grad_norm": 1.3180760145187378, "learning_rate": 2e-05, "loss": 0.02883743, "step": 10054 }, { "epoch": 20.11, "grad_norm": 1.5634307861328125, "learning_rate": 2e-05, "loss": 0.04479971, "step": 10055 }, { "epoch": 20.112, "grad_norm": 1.3959718942642212, "learning_rate": 2e-05, "loss": 0.05039557, "step": 10056 }, { "epoch": 20.114, "grad_norm": 1.3963791131973267, "learning_rate": 2e-05, "loss": 0.05422051, "step": 10057 }, { "epoch": 20.116, "grad_norm": 1.7040727138519287, "learning_rate": 2e-05, "loss": 0.04533888, "step": 10058 }, { "epoch": 20.118, "grad_norm": 0.8397608995437622, "learning_rate": 2e-05, "loss": 0.03097349, "step": 10059 }, { "epoch": 20.12, "grad_norm": 1.2344368696212769, "learning_rate": 2e-05, "loss": 0.03421988, "step": 10060 }, { "epoch": 20.122, "grad_norm": 1.1958335638046265, "learning_rate": 2e-05, "loss": 0.03738203, "step": 10061 }, { "epoch": 20.124, "grad_norm": 1.6847928762435913, "learning_rate": 2e-05, "loss": 0.04612182, "step": 10062 }, { "epoch": 20.126, "grad_norm": 1.3300483226776123, "learning_rate": 2e-05, "loss": 0.06019877, "step": 10063 }, { "epoch": 20.128, "grad_norm": 1.149237871170044, "learning_rate": 2e-05, "loss": 0.05080335, "step": 10064 }, { "epoch": 20.13, "grad_norm": 1.2036911249160767, "learning_rate": 2e-05, "loss": 0.04776897, "step": 10065 }, { "epoch": 20.132, "grad_norm": 0.8646746873855591, "learning_rate": 2e-05, "loss": 0.03144902, "step": 10066 }, { "epoch": 20.134, "grad_norm": 2.3091139793395996, "learning_rate": 2e-05, "loss": 0.05764953, "step": 10067 }, { "epoch": 20.136, "grad_norm": 1.25112783908844, "learning_rate": 2e-05, "loss": 0.04652739, "step": 10068 }, { "epoch": 20.138, "grad_norm": 1.6571767330169678, "learning_rate": 2e-05, "loss": 0.03990902, "step": 10069 }, { "epoch": 20.14, "grad_norm": 1.9582146406173706, "learning_rate": 2e-05, "loss": 0.05029725, "step": 10070 }, { "epoch": 20.142, "grad_norm": 1.5058319568634033, "learning_rate": 2e-05, "loss": 0.06166413, "step": 10071 }, { "epoch": 20.144, "grad_norm": 1.5961921215057373, "learning_rate": 2e-05, "loss": 0.04206074, "step": 10072 }, { "epoch": 20.146, "grad_norm": 0.9953280091285706, "learning_rate": 2e-05, "loss": 0.04185956, "step": 10073 }, { "epoch": 20.148, "grad_norm": 1.4559868574142456, "learning_rate": 2e-05, "loss": 0.0373886, "step": 10074 }, { "epoch": 20.15, "grad_norm": 1.4694159030914307, "learning_rate": 2e-05, "loss": 0.05773043, "step": 10075 }, { "epoch": 20.152, "grad_norm": 0.969153881072998, "learning_rate": 2e-05, "loss": 0.04082034, "step": 10076 }, { "epoch": 20.154, "grad_norm": 1.5008985996246338, "learning_rate": 2e-05, "loss": 0.04427098, "step": 10077 }, { "epoch": 20.156, "grad_norm": 0.8656779527664185, "learning_rate": 2e-05, "loss": 0.03067489, "step": 10078 }, { "epoch": 20.158, "grad_norm": 1.7384730577468872, "learning_rate": 2e-05, "loss": 0.065503, "step": 10079 }, { "epoch": 20.16, "grad_norm": 2.1725077629089355, "learning_rate": 2e-05, "loss": 0.06007623, "step": 10080 }, { "epoch": 20.162, "grad_norm": 1.115153431892395, "learning_rate": 2e-05, "loss": 0.04562127, "step": 10081 }, { "epoch": 20.164, "grad_norm": 1.5023186206817627, "learning_rate": 2e-05, "loss": 0.02449904, "step": 10082 }, { "epoch": 20.166, "grad_norm": 2.2025842666625977, "learning_rate": 2e-05, "loss": 0.03233415, "step": 10083 }, { "epoch": 20.168, "grad_norm": 0.9161062836647034, "learning_rate": 2e-05, "loss": 0.03692743, "step": 10084 }, { "epoch": 20.17, "grad_norm": 1.3516541719436646, "learning_rate": 2e-05, "loss": 0.04077698, "step": 10085 }, { "epoch": 20.172, "grad_norm": 1.1397532224655151, "learning_rate": 2e-05, "loss": 0.03421555, "step": 10086 }, { "epoch": 20.174, "grad_norm": 1.5021570920944214, "learning_rate": 2e-05, "loss": 0.03015932, "step": 10087 }, { "epoch": 20.176, "grad_norm": 1.482361912727356, "learning_rate": 2e-05, "loss": 0.04250352, "step": 10088 }, { "epoch": 20.178, "grad_norm": 3.305467128753662, "learning_rate": 2e-05, "loss": 0.05214929, "step": 10089 }, { "epoch": 20.18, "grad_norm": 1.243033528327942, "learning_rate": 2e-05, "loss": 0.0432499, "step": 10090 }, { "epoch": 20.182, "grad_norm": 2.000516176223755, "learning_rate": 2e-05, "loss": 0.04775564, "step": 10091 }, { "epoch": 20.184, "grad_norm": 1.366226077079773, "learning_rate": 2e-05, "loss": 0.0531464, "step": 10092 }, { "epoch": 20.186, "grad_norm": 1.5327894687652588, "learning_rate": 2e-05, "loss": 0.04527364, "step": 10093 }, { "epoch": 20.188, "grad_norm": 1.4279086589813232, "learning_rate": 2e-05, "loss": 0.04522057, "step": 10094 }, { "epoch": 20.19, "grad_norm": 2.7902400493621826, "learning_rate": 2e-05, "loss": 0.05276362, "step": 10095 }, { "epoch": 20.192, "grad_norm": 1.4314908981323242, "learning_rate": 2e-05, "loss": 0.04141193, "step": 10096 }, { "epoch": 20.194, "grad_norm": 2.09260630607605, "learning_rate": 2e-05, "loss": 0.03445105, "step": 10097 }, { "epoch": 20.196, "grad_norm": 2.0250329971313477, "learning_rate": 2e-05, "loss": 0.04754239, "step": 10098 }, { "epoch": 20.198, "grad_norm": 1.4299249649047852, "learning_rate": 2e-05, "loss": 0.0427656, "step": 10099 }, { "epoch": 20.2, "grad_norm": 1.483863353729248, "learning_rate": 2e-05, "loss": 0.03940018, "step": 10100 }, { "epoch": 20.202, "grad_norm": 5.425300121307373, "learning_rate": 2e-05, "loss": 0.05327262, "step": 10101 }, { "epoch": 20.204, "grad_norm": 0.9125757217407227, "learning_rate": 2e-05, "loss": 0.03425956, "step": 10102 }, { "epoch": 20.206, "grad_norm": 1.8118587732315063, "learning_rate": 2e-05, "loss": 0.05771492, "step": 10103 }, { "epoch": 20.208, "grad_norm": 1.0001643896102905, "learning_rate": 2e-05, "loss": 0.03523345, "step": 10104 }, { "epoch": 20.21, "grad_norm": 1.3148696422576904, "learning_rate": 2e-05, "loss": 0.06470096, "step": 10105 }, { "epoch": 20.212, "grad_norm": 1.3661611080169678, "learning_rate": 2e-05, "loss": 0.04957868, "step": 10106 }, { "epoch": 20.214, "grad_norm": 1.0026696920394897, "learning_rate": 2e-05, "loss": 0.04280265, "step": 10107 }, { "epoch": 20.216, "grad_norm": 1.3734781742095947, "learning_rate": 2e-05, "loss": 0.0529963, "step": 10108 }, { "epoch": 20.218, "grad_norm": 1.6206780672073364, "learning_rate": 2e-05, "loss": 0.05038095, "step": 10109 }, { "epoch": 20.22, "grad_norm": 1.801062822341919, "learning_rate": 2e-05, "loss": 0.05541755, "step": 10110 }, { "epoch": 20.222, "grad_norm": 1.376810908317566, "learning_rate": 2e-05, "loss": 0.05840788, "step": 10111 }, { "epoch": 20.224, "grad_norm": 1.2406682968139648, "learning_rate": 2e-05, "loss": 0.04380761, "step": 10112 }, { "epoch": 20.226, "grad_norm": 2.923719644546509, "learning_rate": 2e-05, "loss": 0.06667317, "step": 10113 }, { "epoch": 20.228, "grad_norm": 0.9990182518959045, "learning_rate": 2e-05, "loss": 0.03537645, "step": 10114 }, { "epoch": 20.23, "grad_norm": 1.7336490154266357, "learning_rate": 2e-05, "loss": 0.05489245, "step": 10115 }, { "epoch": 20.232, "grad_norm": 1.254774570465088, "learning_rate": 2e-05, "loss": 0.04664354, "step": 10116 }, { "epoch": 20.234, "grad_norm": 1.7745966911315918, "learning_rate": 2e-05, "loss": 0.03555185, "step": 10117 }, { "epoch": 20.236, "grad_norm": 1.1603413820266724, "learning_rate": 2e-05, "loss": 0.0529111, "step": 10118 }, { "epoch": 20.238, "grad_norm": 3.6189393997192383, "learning_rate": 2e-05, "loss": 0.07565692, "step": 10119 }, { "epoch": 20.24, "grad_norm": 1.3650014400482178, "learning_rate": 2e-05, "loss": 0.04595004, "step": 10120 }, { "epoch": 20.242, "grad_norm": 1.0776909589767456, "learning_rate": 2e-05, "loss": 0.04143743, "step": 10121 }, { "epoch": 20.244, "grad_norm": 1.3992996215820312, "learning_rate": 2e-05, "loss": 0.04920649, "step": 10122 }, { "epoch": 20.246, "grad_norm": 0.9790788888931274, "learning_rate": 2e-05, "loss": 0.02805899, "step": 10123 }, { "epoch": 20.248, "grad_norm": 1.3699696063995361, "learning_rate": 2e-05, "loss": 0.05215797, "step": 10124 }, { "epoch": 20.25, "grad_norm": 0.9365881681442261, "learning_rate": 2e-05, "loss": 0.03423967, "step": 10125 }, { "epoch": 20.252, "grad_norm": 1.2202496528625488, "learning_rate": 2e-05, "loss": 0.05319753, "step": 10126 }, { "epoch": 20.254, "grad_norm": 1.924760103225708, "learning_rate": 2e-05, "loss": 0.06585795, "step": 10127 }, { "epoch": 20.256, "grad_norm": 1.0630261898040771, "learning_rate": 2e-05, "loss": 0.02922217, "step": 10128 }, { "epoch": 20.258, "grad_norm": 1.077317476272583, "learning_rate": 2e-05, "loss": 0.04042543, "step": 10129 }, { "epoch": 20.26, "grad_norm": 1.3726462125778198, "learning_rate": 2e-05, "loss": 0.0473836, "step": 10130 }, { "epoch": 20.262, "grad_norm": 1.4987577199935913, "learning_rate": 2e-05, "loss": 0.04243626, "step": 10131 }, { "epoch": 20.264, "grad_norm": 0.9211649298667908, "learning_rate": 2e-05, "loss": 0.0284448, "step": 10132 }, { "epoch": 20.266, "grad_norm": 2.0797038078308105, "learning_rate": 2e-05, "loss": 0.06800514, "step": 10133 }, { "epoch": 20.268, "grad_norm": 1.9882663488388062, "learning_rate": 2e-05, "loss": 0.06047645, "step": 10134 }, { "epoch": 20.27, "grad_norm": 1.143628716468811, "learning_rate": 2e-05, "loss": 0.05360023, "step": 10135 }, { "epoch": 20.272, "grad_norm": 2.587083101272583, "learning_rate": 2e-05, "loss": 0.07420809, "step": 10136 }, { "epoch": 20.274, "grad_norm": 1.0694243907928467, "learning_rate": 2e-05, "loss": 0.04723281, "step": 10137 }, { "epoch": 20.276, "grad_norm": 1.1372148990631104, "learning_rate": 2e-05, "loss": 0.05700918, "step": 10138 }, { "epoch": 20.278, "grad_norm": 1.1703349351882935, "learning_rate": 2e-05, "loss": 0.04315702, "step": 10139 }, { "epoch": 20.28, "grad_norm": 1.6638000011444092, "learning_rate": 2e-05, "loss": 0.04923899, "step": 10140 }, { "epoch": 20.282, "grad_norm": 1.2647908926010132, "learning_rate": 2e-05, "loss": 0.05854087, "step": 10141 }, { "epoch": 20.284, "grad_norm": 1.5534178018569946, "learning_rate": 2e-05, "loss": 0.04616763, "step": 10142 }, { "epoch": 20.286, "grad_norm": 0.8279833793640137, "learning_rate": 2e-05, "loss": 0.03093289, "step": 10143 }, { "epoch": 20.288, "grad_norm": 1.4477670192718506, "learning_rate": 2e-05, "loss": 0.04702333, "step": 10144 }, { "epoch": 20.29, "grad_norm": 1.4276376962661743, "learning_rate": 2e-05, "loss": 0.04652718, "step": 10145 }, { "epoch": 20.292, "grad_norm": 1.5534920692443848, "learning_rate": 2e-05, "loss": 0.0381812, "step": 10146 }, { "epoch": 20.294, "grad_norm": 1.0579372644424438, "learning_rate": 2e-05, "loss": 0.03842229, "step": 10147 }, { "epoch": 20.296, "grad_norm": 0.9664697051048279, "learning_rate": 2e-05, "loss": 0.03105097, "step": 10148 }, { "epoch": 20.298, "grad_norm": 1.6579413414001465, "learning_rate": 2e-05, "loss": 0.04448261, "step": 10149 }, { "epoch": 20.3, "grad_norm": 1.314843773841858, "learning_rate": 2e-05, "loss": 0.04033219, "step": 10150 }, { "epoch": 20.302, "grad_norm": 1.024294137954712, "learning_rate": 2e-05, "loss": 0.03863858, "step": 10151 }, { "epoch": 20.304, "grad_norm": 0.7670977711677551, "learning_rate": 2e-05, "loss": 0.02628741, "step": 10152 }, { "epoch": 20.306, "grad_norm": 1.0504965782165527, "learning_rate": 2e-05, "loss": 0.05121246, "step": 10153 }, { "epoch": 20.308, "grad_norm": 1.1332063674926758, "learning_rate": 2e-05, "loss": 0.05957182, "step": 10154 }, { "epoch": 20.31, "grad_norm": 1.5719181299209595, "learning_rate": 2e-05, "loss": 0.04905337, "step": 10155 }, { "epoch": 20.312, "grad_norm": 1.7581254243850708, "learning_rate": 2e-05, "loss": 0.04820206, "step": 10156 }, { "epoch": 20.314, "grad_norm": 1.4229159355163574, "learning_rate": 2e-05, "loss": 0.05189781, "step": 10157 }, { "epoch": 20.316, "grad_norm": 1.4341851472854614, "learning_rate": 2e-05, "loss": 0.04942834, "step": 10158 }, { "epoch": 20.318, "grad_norm": 1.19769287109375, "learning_rate": 2e-05, "loss": 0.04812186, "step": 10159 }, { "epoch": 20.32, "grad_norm": 1.109605312347412, "learning_rate": 2e-05, "loss": 0.03712996, "step": 10160 }, { "epoch": 20.322, "grad_norm": 0.8154127597808838, "learning_rate": 2e-05, "loss": 0.02339576, "step": 10161 }, { "epoch": 20.324, "grad_norm": 1.8360966444015503, "learning_rate": 2e-05, "loss": 0.0448301, "step": 10162 }, { "epoch": 20.326, "grad_norm": 1.6078212261199951, "learning_rate": 2e-05, "loss": 0.04163124, "step": 10163 }, { "epoch": 20.328, "grad_norm": 1.4216164350509644, "learning_rate": 2e-05, "loss": 0.03090533, "step": 10164 }, { "epoch": 20.33, "grad_norm": 2.2679834365844727, "learning_rate": 2e-05, "loss": 0.05675884, "step": 10165 }, { "epoch": 20.332, "grad_norm": 0.9259978532791138, "learning_rate": 2e-05, "loss": 0.02993172, "step": 10166 }, { "epoch": 20.334, "grad_norm": 1.9035354852676392, "learning_rate": 2e-05, "loss": 0.06232638, "step": 10167 }, { "epoch": 20.336, "grad_norm": 1.1959291696548462, "learning_rate": 2e-05, "loss": 0.04528594, "step": 10168 }, { "epoch": 20.338, "grad_norm": 2.8003320693969727, "learning_rate": 2e-05, "loss": 0.05229426, "step": 10169 }, { "epoch": 20.34, "grad_norm": 1.201270580291748, "learning_rate": 2e-05, "loss": 0.05930093, "step": 10170 }, { "epoch": 20.342, "grad_norm": 2.1972343921661377, "learning_rate": 2e-05, "loss": 0.03533514, "step": 10171 }, { "epoch": 20.344, "grad_norm": 0.9845930933952332, "learning_rate": 2e-05, "loss": 0.03654853, "step": 10172 }, { "epoch": 20.346, "grad_norm": 1.281238317489624, "learning_rate": 2e-05, "loss": 0.04299886, "step": 10173 }, { "epoch": 20.348, "grad_norm": 1.3360427618026733, "learning_rate": 2e-05, "loss": 0.05187582, "step": 10174 }, { "epoch": 20.35, "grad_norm": 1.6089589595794678, "learning_rate": 2e-05, "loss": 0.04967071, "step": 10175 }, { "epoch": 20.352, "grad_norm": 1.7011065483093262, "learning_rate": 2e-05, "loss": 0.04751972, "step": 10176 }, { "epoch": 20.354, "grad_norm": 1.2188957929611206, "learning_rate": 2e-05, "loss": 0.04685476, "step": 10177 }, { "epoch": 20.356, "grad_norm": 1.1148327589035034, "learning_rate": 2e-05, "loss": 0.03792383, "step": 10178 }, { "epoch": 20.358, "grad_norm": 1.760035753250122, "learning_rate": 2e-05, "loss": 0.05031365, "step": 10179 }, { "epoch": 20.36, "grad_norm": 5.721475124359131, "learning_rate": 2e-05, "loss": 0.03906047, "step": 10180 }, { "epoch": 20.362, "grad_norm": 1.7922227382659912, "learning_rate": 2e-05, "loss": 0.02920638, "step": 10181 }, { "epoch": 20.364, "grad_norm": 1.2500402927398682, "learning_rate": 2e-05, "loss": 0.06090016, "step": 10182 }, { "epoch": 20.366, "grad_norm": 1.0983604192733765, "learning_rate": 2e-05, "loss": 0.04513254, "step": 10183 }, { "epoch": 20.368, "grad_norm": 1.3607755899429321, "learning_rate": 2e-05, "loss": 0.03430536, "step": 10184 }, { "epoch": 20.37, "grad_norm": 1.3529531955718994, "learning_rate": 2e-05, "loss": 0.0402711, "step": 10185 }, { "epoch": 20.372, "grad_norm": 1.385323405265808, "learning_rate": 2e-05, "loss": 0.03134587, "step": 10186 }, { "epoch": 20.374, "grad_norm": 1.9540444612503052, "learning_rate": 2e-05, "loss": 0.05450889, "step": 10187 }, { "epoch": 20.376, "grad_norm": 1.102575659751892, "learning_rate": 2e-05, "loss": 0.04622038, "step": 10188 }, { "epoch": 20.378, "grad_norm": 1.4392669200897217, "learning_rate": 2e-05, "loss": 0.03235029, "step": 10189 }, { "epoch": 20.38, "grad_norm": 1.1985517740249634, "learning_rate": 2e-05, "loss": 0.04795422, "step": 10190 }, { "epoch": 20.382, "grad_norm": 2.912957191467285, "learning_rate": 2e-05, "loss": 0.04838662, "step": 10191 }, { "epoch": 20.384, "grad_norm": 1.424532175064087, "learning_rate": 2e-05, "loss": 0.06633276, "step": 10192 }, { "epoch": 20.386, "grad_norm": 3.126333236694336, "learning_rate": 2e-05, "loss": 0.07727267, "step": 10193 }, { "epoch": 20.388, "grad_norm": 1.037419319152832, "learning_rate": 2e-05, "loss": 0.04210549, "step": 10194 }, { "epoch": 20.39, "grad_norm": 1.0489381551742554, "learning_rate": 2e-05, "loss": 0.04458465, "step": 10195 }, { "epoch": 20.392, "grad_norm": 1.6810097694396973, "learning_rate": 2e-05, "loss": 0.03292147, "step": 10196 }, { "epoch": 20.394, "grad_norm": 1.8507788181304932, "learning_rate": 2e-05, "loss": 0.04262263, "step": 10197 }, { "epoch": 20.396, "grad_norm": 1.7001067399978638, "learning_rate": 2e-05, "loss": 0.04962175, "step": 10198 }, { "epoch": 20.398, "grad_norm": 1.0742583274841309, "learning_rate": 2e-05, "loss": 0.03843827, "step": 10199 }, { "epoch": 20.4, "grad_norm": 1.2791439294815063, "learning_rate": 2e-05, "loss": 0.04217529, "step": 10200 }, { "epoch": 20.402, "grad_norm": 1.0216410160064697, "learning_rate": 2e-05, "loss": 0.03967939, "step": 10201 }, { "epoch": 20.404, "grad_norm": 1.2969735860824585, "learning_rate": 2e-05, "loss": 0.04790597, "step": 10202 }, { "epoch": 20.406, "grad_norm": 1.1163370609283447, "learning_rate": 2e-05, "loss": 0.04177451, "step": 10203 }, { "epoch": 20.408, "grad_norm": 1.3300760984420776, "learning_rate": 2e-05, "loss": 0.05363194, "step": 10204 }, { "epoch": 20.41, "grad_norm": 1.242143988609314, "learning_rate": 2e-05, "loss": 0.04320357, "step": 10205 }, { "epoch": 20.412, "grad_norm": 2.6196517944335938, "learning_rate": 2e-05, "loss": 0.05824392, "step": 10206 }, { "epoch": 20.414, "grad_norm": 1.3999351263046265, "learning_rate": 2e-05, "loss": 0.05536207, "step": 10207 }, { "epoch": 20.416, "grad_norm": 1.636181116104126, "learning_rate": 2e-05, "loss": 0.05983796, "step": 10208 }, { "epoch": 20.418, "grad_norm": 1.58070707321167, "learning_rate": 2e-05, "loss": 0.05444519, "step": 10209 }, { "epoch": 20.42, "grad_norm": 5.071904182434082, "learning_rate": 2e-05, "loss": 0.05505716, "step": 10210 }, { "epoch": 20.422, "grad_norm": 1.4000588655471802, "learning_rate": 2e-05, "loss": 0.04153286, "step": 10211 }, { "epoch": 20.424, "grad_norm": 1.121661901473999, "learning_rate": 2e-05, "loss": 0.03576649, "step": 10212 }, { "epoch": 20.426, "grad_norm": 0.8570441603660583, "learning_rate": 2e-05, "loss": 0.0261089, "step": 10213 }, { "epoch": 20.428, "grad_norm": 2.150871992111206, "learning_rate": 2e-05, "loss": 0.04892308, "step": 10214 }, { "epoch": 20.43, "grad_norm": 1.2137947082519531, "learning_rate": 2e-05, "loss": 0.03868676, "step": 10215 }, { "epoch": 20.432, "grad_norm": 4.629814147949219, "learning_rate": 2e-05, "loss": 0.04472025, "step": 10216 }, { "epoch": 20.434, "grad_norm": 1.5530465841293335, "learning_rate": 2e-05, "loss": 0.05563099, "step": 10217 }, { "epoch": 20.436, "grad_norm": 1.2164994478225708, "learning_rate": 2e-05, "loss": 0.03554422, "step": 10218 }, { "epoch": 20.438, "grad_norm": 1.203985571861267, "learning_rate": 2e-05, "loss": 0.04696197, "step": 10219 }, { "epoch": 20.44, "grad_norm": 1.067553162574768, "learning_rate": 2e-05, "loss": 0.04774018, "step": 10220 }, { "epoch": 20.442, "grad_norm": 1.0571064949035645, "learning_rate": 2e-05, "loss": 0.03765604, "step": 10221 }, { "epoch": 20.444, "grad_norm": 0.9968178868293762, "learning_rate": 2e-05, "loss": 0.0331139, "step": 10222 }, { "epoch": 20.446, "grad_norm": 2.3849425315856934, "learning_rate": 2e-05, "loss": 0.05791334, "step": 10223 }, { "epoch": 20.448, "grad_norm": 1.1549525260925293, "learning_rate": 2e-05, "loss": 0.03784906, "step": 10224 }, { "epoch": 20.45, "grad_norm": 0.9957252740859985, "learning_rate": 2e-05, "loss": 0.02337346, "step": 10225 }, { "epoch": 20.452, "grad_norm": 0.9673315286636353, "learning_rate": 2e-05, "loss": 0.0406181, "step": 10226 }, { "epoch": 20.454, "grad_norm": 1.179891586303711, "learning_rate": 2e-05, "loss": 0.05067735, "step": 10227 }, { "epoch": 20.456, "grad_norm": 1.3784382343292236, "learning_rate": 2e-05, "loss": 0.05373095, "step": 10228 }, { "epoch": 20.458, "grad_norm": 1.4991099834442139, "learning_rate": 2e-05, "loss": 0.05865605, "step": 10229 }, { "epoch": 20.46, "grad_norm": 1.3689054250717163, "learning_rate": 2e-05, "loss": 0.03278357, "step": 10230 }, { "epoch": 20.462, "grad_norm": 1.9473291635513306, "learning_rate": 2e-05, "loss": 0.07949729, "step": 10231 }, { "epoch": 20.464, "grad_norm": 1.6002066135406494, "learning_rate": 2e-05, "loss": 0.05632165, "step": 10232 }, { "epoch": 20.466, "grad_norm": 1.2916295528411865, "learning_rate": 2e-05, "loss": 0.04640568, "step": 10233 }, { "epoch": 20.468, "grad_norm": 10.343672752380371, "learning_rate": 2e-05, "loss": 0.0383925, "step": 10234 }, { "epoch": 20.47, "grad_norm": 1.1188287734985352, "learning_rate": 2e-05, "loss": 0.05433705, "step": 10235 }, { "epoch": 20.472, "grad_norm": 3.50317120552063, "learning_rate": 2e-05, "loss": 0.05275811, "step": 10236 }, { "epoch": 20.474, "grad_norm": 1.1755462884902954, "learning_rate": 2e-05, "loss": 0.04861164, "step": 10237 }, { "epoch": 20.476, "grad_norm": 2.5035364627838135, "learning_rate": 2e-05, "loss": 0.06815332, "step": 10238 }, { "epoch": 20.478, "grad_norm": 1.119718074798584, "learning_rate": 2e-05, "loss": 0.03829898, "step": 10239 }, { "epoch": 20.48, "grad_norm": 1.1700162887573242, "learning_rate": 2e-05, "loss": 0.03082298, "step": 10240 }, { "epoch": 20.482, "grad_norm": 1.1321544647216797, "learning_rate": 2e-05, "loss": 0.0511568, "step": 10241 }, { "epoch": 20.484, "grad_norm": 1.7816457748413086, "learning_rate": 2e-05, "loss": 0.04333946, "step": 10242 }, { "epoch": 20.486, "grad_norm": 0.9954708814620972, "learning_rate": 2e-05, "loss": 0.0433597, "step": 10243 }, { "epoch": 20.488, "grad_norm": 1.0179307460784912, "learning_rate": 2e-05, "loss": 0.04345551, "step": 10244 }, { "epoch": 20.49, "grad_norm": 0.7298291325569153, "learning_rate": 2e-05, "loss": 0.02046543, "step": 10245 }, { "epoch": 20.492, "grad_norm": 0.8061966896057129, "learning_rate": 2e-05, "loss": 0.03423957, "step": 10246 }, { "epoch": 20.494, "grad_norm": 2.1785058975219727, "learning_rate": 2e-05, "loss": 0.05218278, "step": 10247 }, { "epoch": 20.496, "grad_norm": 1.3597095012664795, "learning_rate": 2e-05, "loss": 0.04774138, "step": 10248 }, { "epoch": 20.498, "grad_norm": 1.2271349430084229, "learning_rate": 2e-05, "loss": 0.04160356, "step": 10249 }, { "epoch": 20.5, "grad_norm": 1.5621925592422485, "learning_rate": 2e-05, "loss": 0.06106496, "step": 10250 }, { "epoch": 20.502, "grad_norm": 1.7379794120788574, "learning_rate": 2e-05, "loss": 0.04996411, "step": 10251 }, { "epoch": 20.504, "grad_norm": 2.060039758682251, "learning_rate": 2e-05, "loss": 0.05726653, "step": 10252 }, { "epoch": 20.506, "grad_norm": 1.2527276277542114, "learning_rate": 2e-05, "loss": 0.04629599, "step": 10253 }, { "epoch": 20.508, "grad_norm": 2.2441203594207764, "learning_rate": 2e-05, "loss": 0.04669379, "step": 10254 }, { "epoch": 20.51, "grad_norm": 1.3785885572433472, "learning_rate": 2e-05, "loss": 0.05399256, "step": 10255 }, { "epoch": 20.512, "grad_norm": 1.7719539403915405, "learning_rate": 2e-05, "loss": 0.04840071, "step": 10256 }, { "epoch": 20.514, "grad_norm": 2.072676181793213, "learning_rate": 2e-05, "loss": 0.07079047, "step": 10257 }, { "epoch": 20.516, "grad_norm": 1.4549145698547363, "learning_rate": 2e-05, "loss": 0.04377753, "step": 10258 }, { "epoch": 20.518, "grad_norm": 1.7947031259536743, "learning_rate": 2e-05, "loss": 0.06556946, "step": 10259 }, { "epoch": 20.52, "grad_norm": 1.935835599899292, "learning_rate": 2e-05, "loss": 0.06388646, "step": 10260 }, { "epoch": 20.522, "grad_norm": 1.935876727104187, "learning_rate": 2e-05, "loss": 0.06333669, "step": 10261 }, { "epoch": 20.524, "grad_norm": 1.188207983970642, "learning_rate": 2e-05, "loss": 0.03658657, "step": 10262 }, { "epoch": 20.526, "grad_norm": 1.2472844123840332, "learning_rate": 2e-05, "loss": 0.04105464, "step": 10263 }, { "epoch": 20.528, "grad_norm": 0.9346619844436646, "learning_rate": 2e-05, "loss": 0.02723719, "step": 10264 }, { "epoch": 20.53, "grad_norm": 2.6355645656585693, "learning_rate": 2e-05, "loss": 0.04632908, "step": 10265 }, { "epoch": 20.532, "grad_norm": 2.4388010501861572, "learning_rate": 2e-05, "loss": 0.05882799, "step": 10266 }, { "epoch": 20.534, "grad_norm": 1.9076504707336426, "learning_rate": 2e-05, "loss": 0.06112557, "step": 10267 }, { "epoch": 20.536, "grad_norm": 1.3747074604034424, "learning_rate": 2e-05, "loss": 0.04082569, "step": 10268 }, { "epoch": 20.538, "grad_norm": 1.1217515468597412, "learning_rate": 2e-05, "loss": 0.03272224, "step": 10269 }, { "epoch": 20.54, "grad_norm": 1.0433509349822998, "learning_rate": 2e-05, "loss": 0.03788045, "step": 10270 }, { "epoch": 20.542, "grad_norm": 1.0351120233535767, "learning_rate": 2e-05, "loss": 0.03685319, "step": 10271 }, { "epoch": 20.544, "grad_norm": 1.2214738130569458, "learning_rate": 2e-05, "loss": 0.04792245, "step": 10272 }, { "epoch": 20.546, "grad_norm": 1.4052644968032837, "learning_rate": 2e-05, "loss": 0.05351058, "step": 10273 }, { "epoch": 20.548000000000002, "grad_norm": 1.0703545808792114, "learning_rate": 2e-05, "loss": 0.03894218, "step": 10274 }, { "epoch": 20.55, "grad_norm": 1.4211127758026123, "learning_rate": 2e-05, "loss": 0.04353243, "step": 10275 }, { "epoch": 20.552, "grad_norm": 0.9819540977478027, "learning_rate": 2e-05, "loss": 0.03631639, "step": 10276 }, { "epoch": 20.554, "grad_norm": 2.016446828842163, "learning_rate": 2e-05, "loss": 0.03591831, "step": 10277 }, { "epoch": 20.556, "grad_norm": 1.0522786378860474, "learning_rate": 2e-05, "loss": 0.03785239, "step": 10278 }, { "epoch": 20.558, "grad_norm": 0.8631588220596313, "learning_rate": 2e-05, "loss": 0.03133775, "step": 10279 }, { "epoch": 20.56, "grad_norm": 1.2176830768585205, "learning_rate": 2e-05, "loss": 0.04396147, "step": 10280 }, { "epoch": 20.562, "grad_norm": 1.0524828433990479, "learning_rate": 2e-05, "loss": 0.05083149, "step": 10281 }, { "epoch": 20.564, "grad_norm": 1.2772709131240845, "learning_rate": 2e-05, "loss": 0.04639094, "step": 10282 }, { "epoch": 20.566, "grad_norm": 1.0644011497497559, "learning_rate": 2e-05, "loss": 0.03369395, "step": 10283 }, { "epoch": 20.568, "grad_norm": 1.5643686056137085, "learning_rate": 2e-05, "loss": 0.05213393, "step": 10284 }, { "epoch": 20.57, "grad_norm": 1.473830223083496, "learning_rate": 2e-05, "loss": 0.04265124, "step": 10285 }, { "epoch": 20.572, "grad_norm": 3.3073060512542725, "learning_rate": 2e-05, "loss": 0.05475201, "step": 10286 }, { "epoch": 20.574, "grad_norm": 1.2723321914672852, "learning_rate": 2e-05, "loss": 0.03993881, "step": 10287 }, { "epoch": 20.576, "grad_norm": 2.2323572635650635, "learning_rate": 2e-05, "loss": 0.0508097, "step": 10288 }, { "epoch": 20.578, "grad_norm": 1.0759077072143555, "learning_rate": 2e-05, "loss": 0.04182278, "step": 10289 }, { "epoch": 20.58, "grad_norm": 2.222902774810791, "learning_rate": 2e-05, "loss": 0.04663807, "step": 10290 }, { "epoch": 20.582, "grad_norm": 1.3857841491699219, "learning_rate": 2e-05, "loss": 0.04347511, "step": 10291 }, { "epoch": 20.584, "grad_norm": 1.4425257444381714, "learning_rate": 2e-05, "loss": 0.0354899, "step": 10292 }, { "epoch": 20.586, "grad_norm": 1.1552139520645142, "learning_rate": 2e-05, "loss": 0.05228887, "step": 10293 }, { "epoch": 20.588, "grad_norm": 1.1056689023971558, "learning_rate": 2e-05, "loss": 0.05262728, "step": 10294 }, { "epoch": 20.59, "grad_norm": 1.3791316747665405, "learning_rate": 2e-05, "loss": 0.05119953, "step": 10295 }, { "epoch": 20.592, "grad_norm": 1.4285564422607422, "learning_rate": 2e-05, "loss": 0.05208554, "step": 10296 }, { "epoch": 20.594, "grad_norm": 0.8674381375312805, "learning_rate": 2e-05, "loss": 0.04094439, "step": 10297 }, { "epoch": 20.596, "grad_norm": 1.4580371379852295, "learning_rate": 2e-05, "loss": 0.04853464, "step": 10298 }, { "epoch": 20.598, "grad_norm": 1.4161983728408813, "learning_rate": 2e-05, "loss": 0.03832055, "step": 10299 }, { "epoch": 20.6, "grad_norm": 1.309073567390442, "learning_rate": 2e-05, "loss": 0.06369597, "step": 10300 }, { "epoch": 20.602, "grad_norm": 0.980151355266571, "learning_rate": 2e-05, "loss": 0.04513336, "step": 10301 }, { "epoch": 20.604, "grad_norm": 1.336606740951538, "learning_rate": 2e-05, "loss": 0.04543047, "step": 10302 }, { "epoch": 20.606, "grad_norm": 2.1489081382751465, "learning_rate": 2e-05, "loss": 0.05139912, "step": 10303 }, { "epoch": 20.608, "grad_norm": 0.9527546167373657, "learning_rate": 2e-05, "loss": 0.03579245, "step": 10304 }, { "epoch": 20.61, "grad_norm": 1.2831027507781982, "learning_rate": 2e-05, "loss": 0.05445024, "step": 10305 }, { "epoch": 20.612, "grad_norm": 1.0742995738983154, "learning_rate": 2e-05, "loss": 0.04675096, "step": 10306 }, { "epoch": 20.614, "grad_norm": 1.2579083442687988, "learning_rate": 2e-05, "loss": 0.05262338, "step": 10307 }, { "epoch": 20.616, "grad_norm": 1.375732421875, "learning_rate": 2e-05, "loss": 0.04900161, "step": 10308 }, { "epoch": 20.618, "grad_norm": 1.0515258312225342, "learning_rate": 2e-05, "loss": 0.05350669, "step": 10309 }, { "epoch": 20.62, "grad_norm": 1.0619442462921143, "learning_rate": 2e-05, "loss": 0.03946974, "step": 10310 }, { "epoch": 20.622, "grad_norm": 1.4386487007141113, "learning_rate": 2e-05, "loss": 0.0429085, "step": 10311 }, { "epoch": 20.624, "grad_norm": 1.3069201707839966, "learning_rate": 2e-05, "loss": 0.04455796, "step": 10312 }, { "epoch": 20.626, "grad_norm": 1.4408050775527954, "learning_rate": 2e-05, "loss": 0.04279725, "step": 10313 }, { "epoch": 20.628, "grad_norm": 4.129767417907715, "learning_rate": 2e-05, "loss": 0.0583929, "step": 10314 }, { "epoch": 20.63, "grad_norm": 2.063295602798462, "learning_rate": 2e-05, "loss": 0.04142103, "step": 10315 }, { "epoch": 20.632, "grad_norm": 1.0283076763153076, "learning_rate": 2e-05, "loss": 0.04039693, "step": 10316 }, { "epoch": 20.634, "grad_norm": 1.4450435638427734, "learning_rate": 2e-05, "loss": 0.05489752, "step": 10317 }, { "epoch": 20.636, "grad_norm": 1.292588472366333, "learning_rate": 2e-05, "loss": 0.02953169, "step": 10318 }, { "epoch": 20.638, "grad_norm": 1.301775336265564, "learning_rate": 2e-05, "loss": 0.04923376, "step": 10319 }, { "epoch": 20.64, "grad_norm": 1.1822822093963623, "learning_rate": 2e-05, "loss": 0.05452499, "step": 10320 }, { "epoch": 20.642, "grad_norm": 2.0157763957977295, "learning_rate": 2e-05, "loss": 0.04730494, "step": 10321 }, { "epoch": 20.644, "grad_norm": 1.2999160289764404, "learning_rate": 2e-05, "loss": 0.0413442, "step": 10322 }, { "epoch": 20.646, "grad_norm": 1.1124050617218018, "learning_rate": 2e-05, "loss": 0.04760566, "step": 10323 }, { "epoch": 20.648, "grad_norm": 1.5610244274139404, "learning_rate": 2e-05, "loss": 0.04336926, "step": 10324 }, { "epoch": 20.65, "grad_norm": 1.2809427976608276, "learning_rate": 2e-05, "loss": 0.05426908, "step": 10325 }, { "epoch": 20.652, "grad_norm": 1.1437517404556274, "learning_rate": 2e-05, "loss": 0.04141314, "step": 10326 }, { "epoch": 20.654, "grad_norm": 2.2348859310150146, "learning_rate": 2e-05, "loss": 0.06014737, "step": 10327 }, { "epoch": 20.656, "grad_norm": 3.9195809364318848, "learning_rate": 2e-05, "loss": 0.04524629, "step": 10328 }, { "epoch": 20.658, "grad_norm": 1.7762178182601929, "learning_rate": 2e-05, "loss": 0.04632588, "step": 10329 }, { "epoch": 20.66, "grad_norm": 2.312138557434082, "learning_rate": 2e-05, "loss": 0.05957634, "step": 10330 }, { "epoch": 20.662, "grad_norm": 0.806215763092041, "learning_rate": 2e-05, "loss": 0.02223288, "step": 10331 }, { "epoch": 20.664, "grad_norm": 1.3906282186508179, "learning_rate": 2e-05, "loss": 0.04223034, "step": 10332 }, { "epoch": 20.666, "grad_norm": 0.8046396970748901, "learning_rate": 2e-05, "loss": 0.02949773, "step": 10333 }, { "epoch": 20.668, "grad_norm": 1.1995511054992676, "learning_rate": 2e-05, "loss": 0.04497914, "step": 10334 }, { "epoch": 20.67, "grad_norm": 1.05391263961792, "learning_rate": 2e-05, "loss": 0.03743277, "step": 10335 }, { "epoch": 20.672, "grad_norm": 1.7558040618896484, "learning_rate": 2e-05, "loss": 0.04881262, "step": 10336 }, { "epoch": 20.674, "grad_norm": 1.1403037309646606, "learning_rate": 2e-05, "loss": 0.03673309, "step": 10337 }, { "epoch": 20.676, "grad_norm": 1.4420597553253174, "learning_rate": 2e-05, "loss": 0.04819232, "step": 10338 }, { "epoch": 20.678, "grad_norm": 1.227445125579834, "learning_rate": 2e-05, "loss": 0.04483083, "step": 10339 }, { "epoch": 20.68, "grad_norm": 1.065697193145752, "learning_rate": 2e-05, "loss": 0.04574988, "step": 10340 }, { "epoch": 20.682, "grad_norm": 0.744301438331604, "learning_rate": 2e-05, "loss": 0.01882716, "step": 10341 }, { "epoch": 20.684, "grad_norm": 0.9540907740592957, "learning_rate": 2e-05, "loss": 0.03570002, "step": 10342 }, { "epoch": 20.686, "grad_norm": 2.3299875259399414, "learning_rate": 2e-05, "loss": 0.0453724, "step": 10343 }, { "epoch": 20.688, "grad_norm": 1.336949348449707, "learning_rate": 2e-05, "loss": 0.04523324, "step": 10344 }, { "epoch": 20.69, "grad_norm": 1.4385414123535156, "learning_rate": 2e-05, "loss": 0.05023495, "step": 10345 }, { "epoch": 20.692, "grad_norm": 1.455505609512329, "learning_rate": 2e-05, "loss": 0.04999039, "step": 10346 }, { "epoch": 20.694, "grad_norm": 1.0223627090454102, "learning_rate": 2e-05, "loss": 0.03877442, "step": 10347 }, { "epoch": 20.696, "grad_norm": 2.0058321952819824, "learning_rate": 2e-05, "loss": 0.0574357, "step": 10348 }, { "epoch": 20.698, "grad_norm": 1.705620527267456, "learning_rate": 2e-05, "loss": 0.04397254, "step": 10349 }, { "epoch": 20.7, "grad_norm": 1.489851713180542, "learning_rate": 2e-05, "loss": 0.05874506, "step": 10350 }, { "epoch": 20.701999999999998, "grad_norm": 1.1559706926345825, "learning_rate": 2e-05, "loss": 0.0479423, "step": 10351 }, { "epoch": 20.704, "grad_norm": 1.028726577758789, "learning_rate": 2e-05, "loss": 0.03859405, "step": 10352 }, { "epoch": 20.706, "grad_norm": 1.0381685495376587, "learning_rate": 2e-05, "loss": 0.04677012, "step": 10353 }, { "epoch": 20.708, "grad_norm": 2.2589404582977295, "learning_rate": 2e-05, "loss": 0.04085595, "step": 10354 }, { "epoch": 20.71, "grad_norm": 1.1877682209014893, "learning_rate": 2e-05, "loss": 0.05692096, "step": 10355 }, { "epoch": 20.712, "grad_norm": 1.6057302951812744, "learning_rate": 2e-05, "loss": 0.05474461, "step": 10356 }, { "epoch": 20.714, "grad_norm": 1.311791181564331, "learning_rate": 2e-05, "loss": 0.0541933, "step": 10357 }, { "epoch": 20.716, "grad_norm": 1.1762055158615112, "learning_rate": 2e-05, "loss": 0.04137921, "step": 10358 }, { "epoch": 20.718, "grad_norm": 1.1249793767929077, "learning_rate": 2e-05, "loss": 0.04413811, "step": 10359 }, { "epoch": 20.72, "grad_norm": 3.215183734893799, "learning_rate": 2e-05, "loss": 0.05507435, "step": 10360 }, { "epoch": 20.722, "grad_norm": 1.016304850578308, "learning_rate": 2e-05, "loss": 0.04264227, "step": 10361 }, { "epoch": 20.724, "grad_norm": 1.1620252132415771, "learning_rate": 2e-05, "loss": 0.03689417, "step": 10362 }, { "epoch": 20.726, "grad_norm": 1.3868876695632935, "learning_rate": 2e-05, "loss": 0.04020111, "step": 10363 }, { "epoch": 20.728, "grad_norm": 0.8388650417327881, "learning_rate": 2e-05, "loss": 0.03484726, "step": 10364 }, { "epoch": 20.73, "grad_norm": 2.5803425312042236, "learning_rate": 2e-05, "loss": 0.05947161, "step": 10365 }, { "epoch": 20.732, "grad_norm": 2.0265052318573, "learning_rate": 2e-05, "loss": 0.04208006, "step": 10366 }, { "epoch": 20.734, "grad_norm": 1.2431007623672485, "learning_rate": 2e-05, "loss": 0.03644053, "step": 10367 }, { "epoch": 20.736, "grad_norm": 0.9912933707237244, "learning_rate": 2e-05, "loss": 0.03235281, "step": 10368 }, { "epoch": 20.738, "grad_norm": 1.2279971837997437, "learning_rate": 2e-05, "loss": 0.04702736, "step": 10369 }, { "epoch": 20.74, "grad_norm": 1.0629041194915771, "learning_rate": 2e-05, "loss": 0.03775265, "step": 10370 }, { "epoch": 20.742, "grad_norm": 2.0171046257019043, "learning_rate": 2e-05, "loss": 0.06155708, "step": 10371 }, { "epoch": 20.744, "grad_norm": 2.1277577877044678, "learning_rate": 2e-05, "loss": 0.03768395, "step": 10372 }, { "epoch": 20.746, "grad_norm": 1.5958000421524048, "learning_rate": 2e-05, "loss": 0.03573854, "step": 10373 }, { "epoch": 20.748, "grad_norm": 1.4193521738052368, "learning_rate": 2e-05, "loss": 0.03894667, "step": 10374 }, { "epoch": 20.75, "grad_norm": 1.3414616584777832, "learning_rate": 2e-05, "loss": 0.03875216, "step": 10375 }, { "epoch": 20.752, "grad_norm": 1.847235918045044, "learning_rate": 2e-05, "loss": 0.05250355, "step": 10376 }, { "epoch": 20.754, "grad_norm": 1.8481709957122803, "learning_rate": 2e-05, "loss": 0.04805972, "step": 10377 }, { "epoch": 20.756, "grad_norm": 1.0048134326934814, "learning_rate": 2e-05, "loss": 0.03031214, "step": 10378 }, { "epoch": 20.758, "grad_norm": 1.8027145862579346, "learning_rate": 2e-05, "loss": 0.04824695, "step": 10379 }, { "epoch": 20.76, "grad_norm": 1.4889280796051025, "learning_rate": 2e-05, "loss": 0.05154603, "step": 10380 }, { "epoch": 20.762, "grad_norm": 1.0634493827819824, "learning_rate": 2e-05, "loss": 0.03822739, "step": 10381 }, { "epoch": 20.764, "grad_norm": 1.5704444646835327, "learning_rate": 2e-05, "loss": 0.04982315, "step": 10382 }, { "epoch": 20.766, "grad_norm": 1.3702362775802612, "learning_rate": 2e-05, "loss": 0.04244145, "step": 10383 }, { "epoch": 20.768, "grad_norm": 1.3335888385772705, "learning_rate": 2e-05, "loss": 0.02980741, "step": 10384 }, { "epoch": 20.77, "grad_norm": 1.418221116065979, "learning_rate": 2e-05, "loss": 0.0591685, "step": 10385 }, { "epoch": 20.772, "grad_norm": 1.3817830085754395, "learning_rate": 2e-05, "loss": 0.05077572, "step": 10386 }, { "epoch": 20.774, "grad_norm": 1.2640894651412964, "learning_rate": 2e-05, "loss": 0.0400922, "step": 10387 }, { "epoch": 20.776, "grad_norm": 1.0491241216659546, "learning_rate": 2e-05, "loss": 0.04051751, "step": 10388 }, { "epoch": 20.778, "grad_norm": 1.5691514015197754, "learning_rate": 2e-05, "loss": 0.04427885, "step": 10389 }, { "epoch": 20.78, "grad_norm": 0.8628635406494141, "learning_rate": 2e-05, "loss": 0.03044099, "step": 10390 }, { "epoch": 20.782, "grad_norm": 1.5972529649734497, "learning_rate": 2e-05, "loss": 0.04842422, "step": 10391 }, { "epoch": 20.784, "grad_norm": 2.384840965270996, "learning_rate": 2e-05, "loss": 0.06167642, "step": 10392 }, { "epoch": 20.786, "grad_norm": 2.0006861686706543, "learning_rate": 2e-05, "loss": 0.05404559, "step": 10393 }, { "epoch": 20.788, "grad_norm": 2.31377911567688, "learning_rate": 2e-05, "loss": 0.04951285, "step": 10394 }, { "epoch": 20.79, "grad_norm": 1.5807206630706787, "learning_rate": 2e-05, "loss": 0.04938896, "step": 10395 }, { "epoch": 20.792, "grad_norm": 1.0856667757034302, "learning_rate": 2e-05, "loss": 0.0457892, "step": 10396 }, { "epoch": 20.794, "grad_norm": 1.307835340499878, "learning_rate": 2e-05, "loss": 0.05461257, "step": 10397 }, { "epoch": 20.796, "grad_norm": 1.3883715867996216, "learning_rate": 2e-05, "loss": 0.04010581, "step": 10398 }, { "epoch": 20.798000000000002, "grad_norm": 1.3887972831726074, "learning_rate": 2e-05, "loss": 0.0401758, "step": 10399 }, { "epoch": 20.8, "grad_norm": 1.785482406616211, "learning_rate": 2e-05, "loss": 0.0616926, "step": 10400 }, { "epoch": 20.802, "grad_norm": 1.0838273763656616, "learning_rate": 2e-05, "loss": 0.03876107, "step": 10401 }, { "epoch": 20.804, "grad_norm": 1.1273860931396484, "learning_rate": 2e-05, "loss": 0.04004445, "step": 10402 }, { "epoch": 20.806, "grad_norm": 1.7788662910461426, "learning_rate": 2e-05, "loss": 0.05170891, "step": 10403 }, { "epoch": 20.808, "grad_norm": 1.6286396980285645, "learning_rate": 2e-05, "loss": 0.04615801, "step": 10404 }, { "epoch": 20.81, "grad_norm": 1.4941986799240112, "learning_rate": 2e-05, "loss": 0.05066349, "step": 10405 }, { "epoch": 20.812, "grad_norm": 1.0226727724075317, "learning_rate": 2e-05, "loss": 0.03011133, "step": 10406 }, { "epoch": 20.814, "grad_norm": 1.511220097541809, "learning_rate": 2e-05, "loss": 0.05082799, "step": 10407 }, { "epoch": 20.816, "grad_norm": 1.02682363986969, "learning_rate": 2e-05, "loss": 0.03503739, "step": 10408 }, { "epoch": 20.818, "grad_norm": 2.4997785091400146, "learning_rate": 2e-05, "loss": 0.05403354, "step": 10409 }, { "epoch": 20.82, "grad_norm": 1.3160008192062378, "learning_rate": 2e-05, "loss": 0.03732576, "step": 10410 }, { "epoch": 20.822, "grad_norm": 1.072137713432312, "learning_rate": 2e-05, "loss": 0.0361542, "step": 10411 }, { "epoch": 20.824, "grad_norm": 1.2923551797866821, "learning_rate": 2e-05, "loss": 0.04200709, "step": 10412 }, { "epoch": 20.826, "grad_norm": 2.5604605674743652, "learning_rate": 2e-05, "loss": 0.06329618, "step": 10413 }, { "epoch": 20.828, "grad_norm": 1.103190302848816, "learning_rate": 2e-05, "loss": 0.04076958, "step": 10414 }, { "epoch": 20.83, "grad_norm": 1.3748561143875122, "learning_rate": 2e-05, "loss": 0.05478231, "step": 10415 }, { "epoch": 20.832, "grad_norm": 1.6953434944152832, "learning_rate": 2e-05, "loss": 0.04061558, "step": 10416 }, { "epoch": 20.834, "grad_norm": 1.332276463508606, "learning_rate": 2e-05, "loss": 0.04034182, "step": 10417 }, { "epoch": 20.836, "grad_norm": 1.2709466218948364, "learning_rate": 2e-05, "loss": 0.04360856, "step": 10418 }, { "epoch": 20.838, "grad_norm": 1.1232945919036865, "learning_rate": 2e-05, "loss": 0.03885805, "step": 10419 }, { "epoch": 20.84, "grad_norm": 1.5421833992004395, "learning_rate": 2e-05, "loss": 0.05012187, "step": 10420 }, { "epoch": 20.842, "grad_norm": 2.1459829807281494, "learning_rate": 2e-05, "loss": 0.04013477, "step": 10421 }, { "epoch": 20.844, "grad_norm": 1.1022701263427734, "learning_rate": 2e-05, "loss": 0.04102813, "step": 10422 }, { "epoch": 20.846, "grad_norm": 1.8244222402572632, "learning_rate": 2e-05, "loss": 0.06234596, "step": 10423 }, { "epoch": 20.848, "grad_norm": 1.0963854789733887, "learning_rate": 2e-05, "loss": 0.04060552, "step": 10424 }, { "epoch": 20.85, "grad_norm": 1.193650722503662, "learning_rate": 2e-05, "loss": 0.0435686, "step": 10425 }, { "epoch": 20.852, "grad_norm": 1.2625453472137451, "learning_rate": 2e-05, "loss": 0.05742463, "step": 10426 }, { "epoch": 20.854, "grad_norm": 1.1247082948684692, "learning_rate": 2e-05, "loss": 0.03828746, "step": 10427 }, { "epoch": 20.856, "grad_norm": 1.121609091758728, "learning_rate": 2e-05, "loss": 0.0424174, "step": 10428 }, { "epoch": 20.858, "grad_norm": 1.144251823425293, "learning_rate": 2e-05, "loss": 0.05209781, "step": 10429 }, { "epoch": 20.86, "grad_norm": 1.5560983419418335, "learning_rate": 2e-05, "loss": 0.06155263, "step": 10430 }, { "epoch": 20.862, "grad_norm": 0.9360548853874207, "learning_rate": 2e-05, "loss": 0.03697401, "step": 10431 }, { "epoch": 20.864, "grad_norm": 1.2462891340255737, "learning_rate": 2e-05, "loss": 0.05151362, "step": 10432 }, { "epoch": 20.866, "grad_norm": 0.9053067564964294, "learning_rate": 2e-05, "loss": 0.02829038, "step": 10433 }, { "epoch": 20.868, "grad_norm": 1.0403835773468018, "learning_rate": 2e-05, "loss": 0.03240875, "step": 10434 }, { "epoch": 20.87, "grad_norm": 1.0964161157608032, "learning_rate": 2e-05, "loss": 0.03703195, "step": 10435 }, { "epoch": 20.872, "grad_norm": 1.1594023704528809, "learning_rate": 2e-05, "loss": 0.05074485, "step": 10436 }, { "epoch": 20.874, "grad_norm": 2.033534049987793, "learning_rate": 2e-05, "loss": 0.03843035, "step": 10437 }, { "epoch": 20.876, "grad_norm": 1.2430881261825562, "learning_rate": 2e-05, "loss": 0.03258603, "step": 10438 }, { "epoch": 20.878, "grad_norm": 1.0736937522888184, "learning_rate": 2e-05, "loss": 0.03902845, "step": 10439 }, { "epoch": 20.88, "grad_norm": 1.3153738975524902, "learning_rate": 2e-05, "loss": 0.0298208, "step": 10440 }, { "epoch": 20.882, "grad_norm": 1.0635802745819092, "learning_rate": 2e-05, "loss": 0.04371457, "step": 10441 }, { "epoch": 20.884, "grad_norm": 1.1820414066314697, "learning_rate": 2e-05, "loss": 0.03823504, "step": 10442 }, { "epoch": 20.886, "grad_norm": 1.439071774482727, "learning_rate": 2e-05, "loss": 0.0368807, "step": 10443 }, { "epoch": 20.888, "grad_norm": 1.9791032075881958, "learning_rate": 2e-05, "loss": 0.04375573, "step": 10444 }, { "epoch": 20.89, "grad_norm": 1.2357045412063599, "learning_rate": 2e-05, "loss": 0.05310776, "step": 10445 }, { "epoch": 20.892, "grad_norm": 1.0571391582489014, "learning_rate": 2e-05, "loss": 0.04662876, "step": 10446 }, { "epoch": 20.894, "grad_norm": 1.5010313987731934, "learning_rate": 2e-05, "loss": 0.04568609, "step": 10447 }, { "epoch": 20.896, "grad_norm": 1.1134576797485352, "learning_rate": 2e-05, "loss": 0.03897135, "step": 10448 }, { "epoch": 20.898, "grad_norm": 1.8384913206100464, "learning_rate": 2e-05, "loss": 0.04566251, "step": 10449 }, { "epoch": 20.9, "grad_norm": 1.8514708280563354, "learning_rate": 2e-05, "loss": 0.04538105, "step": 10450 }, { "epoch": 20.902, "grad_norm": 0.9536843299865723, "learning_rate": 2e-05, "loss": 0.02933218, "step": 10451 }, { "epoch": 20.904, "grad_norm": 1.3290810585021973, "learning_rate": 2e-05, "loss": 0.03876988, "step": 10452 }, { "epoch": 20.906, "grad_norm": 1.192558765411377, "learning_rate": 2e-05, "loss": 0.04925163, "step": 10453 }, { "epoch": 20.908, "grad_norm": 1.5571587085723877, "learning_rate": 2e-05, "loss": 0.05063906, "step": 10454 }, { "epoch": 20.91, "grad_norm": 0.9681788682937622, "learning_rate": 2e-05, "loss": 0.02861215, "step": 10455 }, { "epoch": 20.912, "grad_norm": 1.9657407999038696, "learning_rate": 2e-05, "loss": 0.04332226, "step": 10456 }, { "epoch": 20.914, "grad_norm": 2.038175106048584, "learning_rate": 2e-05, "loss": 0.05232367, "step": 10457 }, { "epoch": 20.916, "grad_norm": 1.6845239400863647, "learning_rate": 2e-05, "loss": 0.06234216, "step": 10458 }, { "epoch": 20.918, "grad_norm": 1.113455891609192, "learning_rate": 2e-05, "loss": 0.04771005, "step": 10459 }, { "epoch": 20.92, "grad_norm": 1.283761739730835, "learning_rate": 2e-05, "loss": 0.04820241, "step": 10460 }, { "epoch": 20.922, "grad_norm": 1.0466747283935547, "learning_rate": 2e-05, "loss": 0.03933599, "step": 10461 }, { "epoch": 20.924, "grad_norm": 1.923121452331543, "learning_rate": 2e-05, "loss": 0.04695834, "step": 10462 }, { "epoch": 20.926, "grad_norm": 1.6274025440216064, "learning_rate": 2e-05, "loss": 0.0557045, "step": 10463 }, { "epoch": 20.928, "grad_norm": 1.1221683025360107, "learning_rate": 2e-05, "loss": 0.03770256, "step": 10464 }, { "epoch": 20.93, "grad_norm": 1.9070851802825928, "learning_rate": 2e-05, "loss": 0.06086807, "step": 10465 }, { "epoch": 20.932, "grad_norm": 1.37982177734375, "learning_rate": 2e-05, "loss": 0.06581369, "step": 10466 }, { "epoch": 20.934, "grad_norm": 1.507738709449768, "learning_rate": 2e-05, "loss": 0.06398027, "step": 10467 }, { "epoch": 20.936, "grad_norm": 1.2770816087722778, "learning_rate": 2e-05, "loss": 0.05247059, "step": 10468 }, { "epoch": 20.938, "grad_norm": 1.213557243347168, "learning_rate": 2e-05, "loss": 0.06019966, "step": 10469 }, { "epoch": 20.94, "grad_norm": 1.6094050407409668, "learning_rate": 2e-05, "loss": 0.0516229, "step": 10470 }, { "epoch": 20.942, "grad_norm": 1.8595128059387207, "learning_rate": 2e-05, "loss": 0.05755534, "step": 10471 }, { "epoch": 20.944, "grad_norm": 1.3475713729858398, "learning_rate": 2e-05, "loss": 0.04932076, "step": 10472 }, { "epoch": 20.946, "grad_norm": 2.1639888286590576, "learning_rate": 2e-05, "loss": 0.04102977, "step": 10473 }, { "epoch": 20.948, "grad_norm": 1.3874578475952148, "learning_rate": 2e-05, "loss": 0.04046158, "step": 10474 }, { "epoch": 20.95, "grad_norm": 0.8573673963546753, "learning_rate": 2e-05, "loss": 0.02958693, "step": 10475 }, { "epoch": 20.951999999999998, "grad_norm": 1.7305490970611572, "learning_rate": 2e-05, "loss": 0.03931931, "step": 10476 }, { "epoch": 20.954, "grad_norm": 1.2163013219833374, "learning_rate": 2e-05, "loss": 0.04262955, "step": 10477 }, { "epoch": 20.956, "grad_norm": 0.8612142205238342, "learning_rate": 2e-05, "loss": 0.03597734, "step": 10478 }, { "epoch": 20.958, "grad_norm": 1.361814022064209, "learning_rate": 2e-05, "loss": 0.04435162, "step": 10479 }, { "epoch": 20.96, "grad_norm": 1.504372239112854, "learning_rate": 2e-05, "loss": 0.04584415, "step": 10480 }, { "epoch": 20.962, "grad_norm": 2.293856382369995, "learning_rate": 2e-05, "loss": 0.06242904, "step": 10481 }, { "epoch": 20.964, "grad_norm": 1.2983638048171997, "learning_rate": 2e-05, "loss": 0.06085057, "step": 10482 }, { "epoch": 20.966, "grad_norm": 1.3499860763549805, "learning_rate": 2e-05, "loss": 0.03652674, "step": 10483 }, { "epoch": 20.968, "grad_norm": 1.5795481204986572, "learning_rate": 2e-05, "loss": 0.0482913, "step": 10484 }, { "epoch": 20.97, "grad_norm": 1.5023541450500488, "learning_rate": 2e-05, "loss": 0.05397575, "step": 10485 }, { "epoch": 20.972, "grad_norm": 1.396065592765808, "learning_rate": 2e-05, "loss": 0.04347632, "step": 10486 }, { "epoch": 20.974, "grad_norm": 1.753361701965332, "learning_rate": 2e-05, "loss": 0.04213675, "step": 10487 }, { "epoch": 20.976, "grad_norm": 5.5056939125061035, "learning_rate": 2e-05, "loss": 0.04313315, "step": 10488 }, { "epoch": 20.978, "grad_norm": 2.6996207237243652, "learning_rate": 2e-05, "loss": 0.06716975, "step": 10489 }, { "epoch": 20.98, "grad_norm": 1.197003960609436, "learning_rate": 2e-05, "loss": 0.04371919, "step": 10490 }, { "epoch": 20.982, "grad_norm": 1.0028985738754272, "learning_rate": 2e-05, "loss": 0.0400428, "step": 10491 }, { "epoch": 20.984, "grad_norm": 1.2397624254226685, "learning_rate": 2e-05, "loss": 0.04347303, "step": 10492 }, { "epoch": 20.986, "grad_norm": 1.605710506439209, "learning_rate": 2e-05, "loss": 0.03323404, "step": 10493 }, { "epoch": 20.988, "grad_norm": 1.335616946220398, "learning_rate": 2e-05, "loss": 0.03030888, "step": 10494 }, { "epoch": 20.99, "grad_norm": 1.0594899654388428, "learning_rate": 2e-05, "loss": 0.04696837, "step": 10495 }, { "epoch": 20.992, "grad_norm": 1.4366145133972168, "learning_rate": 2e-05, "loss": 0.03944476, "step": 10496 }, { "epoch": 20.994, "grad_norm": 2.1102654933929443, "learning_rate": 2e-05, "loss": 0.057108, "step": 10497 }, { "epoch": 20.996, "grad_norm": 3.692108392715454, "learning_rate": 2e-05, "loss": 0.05202651, "step": 10498 }, { "epoch": 20.998, "grad_norm": 1.7679089307785034, "learning_rate": 2e-05, "loss": 0.04077272, "step": 10499 }, { "epoch": 21.0, "grad_norm": 1.3703136444091797, "learning_rate": 2e-05, "loss": 0.04178644, "step": 10500 }, { "epoch": 21.0, "eval_performance": { "AngleClassification_1": 0.994, "AngleClassification_2": 0.99, "AngleClassification_3": 0.9700598802395209, "Equal_1": 0.996, "Equal_2": 0.9700598802395209, "Equal_3": 0.9780439121756487, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.978, "Perpendicular_1": 0.998, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8466933867735471, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9887999999999999, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9899799599198397, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 242.0296, "eval_samples_per_second": 43.383, "eval_steps_per_second": 0.868, "step": 10500 }, { "epoch": 21.002, "grad_norm": 1.462356686592102, "learning_rate": 2e-05, "loss": 0.05049226, "step": 10501 }, { "epoch": 21.004, "grad_norm": 1.1890729665756226, "learning_rate": 2e-05, "loss": 0.05106752, "step": 10502 }, { "epoch": 21.006, "grad_norm": 1.2369667291641235, "learning_rate": 2e-05, "loss": 0.04082203, "step": 10503 }, { "epoch": 21.008, "grad_norm": 1.1810232400894165, "learning_rate": 2e-05, "loss": 0.03376776, "step": 10504 }, { "epoch": 21.01, "grad_norm": 1.1723861694335938, "learning_rate": 2e-05, "loss": 0.04720213, "step": 10505 }, { "epoch": 21.012, "grad_norm": 2.131460189819336, "learning_rate": 2e-05, "loss": 0.05448139, "step": 10506 }, { "epoch": 21.014, "grad_norm": 2.392620801925659, "learning_rate": 2e-05, "loss": 0.06358086, "step": 10507 }, { "epoch": 21.016, "grad_norm": 0.9861345887184143, "learning_rate": 2e-05, "loss": 0.04469083, "step": 10508 }, { "epoch": 21.018, "grad_norm": 1.1304010152816772, "learning_rate": 2e-05, "loss": 0.05413827, "step": 10509 }, { "epoch": 21.02, "grad_norm": 0.9932830333709717, "learning_rate": 2e-05, "loss": 0.03468421, "step": 10510 }, { "epoch": 21.022, "grad_norm": 1.242247223854065, "learning_rate": 2e-05, "loss": 0.03753576, "step": 10511 }, { "epoch": 21.024, "grad_norm": 1.8675990104675293, "learning_rate": 2e-05, "loss": 0.06061586, "step": 10512 }, { "epoch": 21.026, "grad_norm": 1.9113482236862183, "learning_rate": 2e-05, "loss": 0.0689573, "step": 10513 }, { "epoch": 21.028, "grad_norm": 1.560994267463684, "learning_rate": 2e-05, "loss": 0.05749905, "step": 10514 }, { "epoch": 21.03, "grad_norm": 1.5214526653289795, "learning_rate": 2e-05, "loss": 0.05094494, "step": 10515 }, { "epoch": 21.032, "grad_norm": 2.2572882175445557, "learning_rate": 2e-05, "loss": 0.06049348, "step": 10516 }, { "epoch": 21.034, "grad_norm": 1.076747179031372, "learning_rate": 2e-05, "loss": 0.04059365, "step": 10517 }, { "epoch": 21.036, "grad_norm": 1.1050325632095337, "learning_rate": 2e-05, "loss": 0.03558519, "step": 10518 }, { "epoch": 21.038, "grad_norm": 1.8987985849380493, "learning_rate": 2e-05, "loss": 0.04842281, "step": 10519 }, { "epoch": 21.04, "grad_norm": 1.242336392402649, "learning_rate": 2e-05, "loss": 0.04280468, "step": 10520 }, { "epoch": 21.042, "grad_norm": 1.5794707536697388, "learning_rate": 2e-05, "loss": 0.07130768, "step": 10521 }, { "epoch": 21.044, "grad_norm": 1.1983901262283325, "learning_rate": 2e-05, "loss": 0.05210111, "step": 10522 }, { "epoch": 21.046, "grad_norm": 1.4267812967300415, "learning_rate": 2e-05, "loss": 0.07043134, "step": 10523 }, { "epoch": 21.048, "grad_norm": 1.1391783952713013, "learning_rate": 2e-05, "loss": 0.04264867, "step": 10524 }, { "epoch": 21.05, "grad_norm": 1.4363746643066406, "learning_rate": 2e-05, "loss": 0.03185604, "step": 10525 }, { "epoch": 21.052, "grad_norm": 1.8698248863220215, "learning_rate": 2e-05, "loss": 0.06181779, "step": 10526 }, { "epoch": 21.054, "grad_norm": 1.1637581586837769, "learning_rate": 2e-05, "loss": 0.04072328, "step": 10527 }, { "epoch": 21.056, "grad_norm": 1.959799885749817, "learning_rate": 2e-05, "loss": 0.04672805, "step": 10528 }, { "epoch": 21.058, "grad_norm": 1.7608394622802734, "learning_rate": 2e-05, "loss": 0.07064647, "step": 10529 }, { "epoch": 21.06, "grad_norm": 1.7372740507125854, "learning_rate": 2e-05, "loss": 0.04768047, "step": 10530 }, { "epoch": 21.062, "grad_norm": 1.513545036315918, "learning_rate": 2e-05, "loss": 0.0485164, "step": 10531 }, { "epoch": 21.064, "grad_norm": 1.2370104789733887, "learning_rate": 2e-05, "loss": 0.04482142, "step": 10532 }, { "epoch": 21.066, "grad_norm": 1.5725091695785522, "learning_rate": 2e-05, "loss": 0.05075742, "step": 10533 }, { "epoch": 21.068, "grad_norm": 1.2082266807556152, "learning_rate": 2e-05, "loss": 0.04183413, "step": 10534 }, { "epoch": 21.07, "grad_norm": 1.1127504110336304, "learning_rate": 2e-05, "loss": 0.04747891, "step": 10535 }, { "epoch": 21.072, "grad_norm": 1.1064636707305908, "learning_rate": 2e-05, "loss": 0.0549633, "step": 10536 }, { "epoch": 21.074, "grad_norm": 1.278546929359436, "learning_rate": 2e-05, "loss": 0.04664842, "step": 10537 }, { "epoch": 21.076, "grad_norm": 1.3650281429290771, "learning_rate": 2e-05, "loss": 0.02511859, "step": 10538 }, { "epoch": 21.078, "grad_norm": 1.9991543292999268, "learning_rate": 2e-05, "loss": 0.05309486, "step": 10539 }, { "epoch": 21.08, "grad_norm": 0.9923328757286072, "learning_rate": 2e-05, "loss": 0.04307661, "step": 10540 }, { "epoch": 21.082, "grad_norm": 1.18087637424469, "learning_rate": 2e-05, "loss": 0.0424362, "step": 10541 }, { "epoch": 21.084, "grad_norm": 1.5038344860076904, "learning_rate": 2e-05, "loss": 0.06554523, "step": 10542 }, { "epoch": 21.086, "grad_norm": 1.736120343208313, "learning_rate": 2e-05, "loss": 0.05815983, "step": 10543 }, { "epoch": 21.088, "grad_norm": 1.3450461626052856, "learning_rate": 2e-05, "loss": 0.04903487, "step": 10544 }, { "epoch": 21.09, "grad_norm": 1.4321377277374268, "learning_rate": 2e-05, "loss": 0.0716904, "step": 10545 }, { "epoch": 21.092, "grad_norm": 1.7467728853225708, "learning_rate": 2e-05, "loss": 0.05331498, "step": 10546 }, { "epoch": 21.094, "grad_norm": 1.6884418725967407, "learning_rate": 2e-05, "loss": 0.0416181, "step": 10547 }, { "epoch": 21.096, "grad_norm": 2.433647632598877, "learning_rate": 2e-05, "loss": 0.05498656, "step": 10548 }, { "epoch": 21.098, "grad_norm": 0.9459535479545593, "learning_rate": 2e-05, "loss": 0.04383996, "step": 10549 }, { "epoch": 21.1, "grad_norm": 0.9566987156867981, "learning_rate": 2e-05, "loss": 0.04180296, "step": 10550 }, { "epoch": 21.102, "grad_norm": 3.6449708938598633, "learning_rate": 2e-05, "loss": 0.07155284, "step": 10551 }, { "epoch": 21.104, "grad_norm": 1.4156569242477417, "learning_rate": 2e-05, "loss": 0.05265646, "step": 10552 }, { "epoch": 21.106, "grad_norm": 1.7648773193359375, "learning_rate": 2e-05, "loss": 0.06461636, "step": 10553 }, { "epoch": 21.108, "grad_norm": 2.773676872253418, "learning_rate": 2e-05, "loss": 0.04887477, "step": 10554 }, { "epoch": 21.11, "grad_norm": 1.637283205986023, "learning_rate": 2e-05, "loss": 0.05045804, "step": 10555 }, { "epoch": 21.112, "grad_norm": 1.501587986946106, "learning_rate": 2e-05, "loss": 0.0528647, "step": 10556 }, { "epoch": 21.114, "grad_norm": 1.029062271118164, "learning_rate": 2e-05, "loss": 0.04682366, "step": 10557 }, { "epoch": 21.116, "grad_norm": 2.760324716567993, "learning_rate": 2e-05, "loss": 0.06920011, "step": 10558 }, { "epoch": 21.118, "grad_norm": 1.5845695734024048, "learning_rate": 2e-05, "loss": 0.06164956, "step": 10559 }, { "epoch": 21.12, "grad_norm": 1.2537925243377686, "learning_rate": 2e-05, "loss": 0.05061475, "step": 10560 }, { "epoch": 21.122, "grad_norm": 1.1121909618377686, "learning_rate": 2e-05, "loss": 0.04680887, "step": 10561 }, { "epoch": 21.124, "grad_norm": 1.1195684671401978, "learning_rate": 2e-05, "loss": 0.04420564, "step": 10562 }, { "epoch": 21.126, "grad_norm": 1.1377873420715332, "learning_rate": 2e-05, "loss": 0.04047245, "step": 10563 }, { "epoch": 21.128, "grad_norm": 1.222047209739685, "learning_rate": 2e-05, "loss": 0.06022634, "step": 10564 }, { "epoch": 21.13, "grad_norm": 1.5862013101577759, "learning_rate": 2e-05, "loss": 0.06138531, "step": 10565 }, { "epoch": 21.132, "grad_norm": 1.9791460037231445, "learning_rate": 2e-05, "loss": 0.05450207, "step": 10566 }, { "epoch": 21.134, "grad_norm": 1.4456321001052856, "learning_rate": 2e-05, "loss": 0.03760494, "step": 10567 }, { "epoch": 21.136, "grad_norm": 1.0595258474349976, "learning_rate": 2e-05, "loss": 0.0450093, "step": 10568 }, { "epoch": 21.138, "grad_norm": 1.009607195854187, "learning_rate": 2e-05, "loss": 0.04127701, "step": 10569 }, { "epoch": 21.14, "grad_norm": 0.8797770142555237, "learning_rate": 2e-05, "loss": 0.03502529, "step": 10570 }, { "epoch": 21.142, "grad_norm": 2.1534171104431152, "learning_rate": 2e-05, "loss": 0.048948, "step": 10571 }, { "epoch": 21.144, "grad_norm": 1.185837745666504, "learning_rate": 2e-05, "loss": 0.04408889, "step": 10572 }, { "epoch": 21.146, "grad_norm": 1.5296651124954224, "learning_rate": 2e-05, "loss": 0.06487472, "step": 10573 }, { "epoch": 21.148, "grad_norm": 1.4371334314346313, "learning_rate": 2e-05, "loss": 0.0573732, "step": 10574 }, { "epoch": 21.15, "grad_norm": 1.3668755292892456, "learning_rate": 2e-05, "loss": 0.06311549, "step": 10575 }, { "epoch": 21.152, "grad_norm": 2.295741319656372, "learning_rate": 2e-05, "loss": 0.05279358, "step": 10576 }, { "epoch": 21.154, "grad_norm": 1.5927609205245972, "learning_rate": 2e-05, "loss": 0.04977706, "step": 10577 }, { "epoch": 21.156, "grad_norm": 1.2296701669692993, "learning_rate": 2e-05, "loss": 0.04173926, "step": 10578 }, { "epoch": 21.158, "grad_norm": 0.9735904932022095, "learning_rate": 2e-05, "loss": 0.04261548, "step": 10579 }, { "epoch": 21.16, "grad_norm": 1.2517967224121094, "learning_rate": 2e-05, "loss": 0.04657619, "step": 10580 }, { "epoch": 21.162, "grad_norm": 1.7209504842758179, "learning_rate": 2e-05, "loss": 0.05133269, "step": 10581 }, { "epoch": 21.164, "grad_norm": 1.026035189628601, "learning_rate": 2e-05, "loss": 0.03889134, "step": 10582 }, { "epoch": 21.166, "grad_norm": 1.2690342664718628, "learning_rate": 2e-05, "loss": 0.0364273, "step": 10583 }, { "epoch": 21.168, "grad_norm": 1.3065950870513916, "learning_rate": 2e-05, "loss": 0.0615424, "step": 10584 }, { "epoch": 21.17, "grad_norm": 1.2293459177017212, "learning_rate": 2e-05, "loss": 0.04358068, "step": 10585 }, { "epoch": 21.172, "grad_norm": 1.9252980947494507, "learning_rate": 2e-05, "loss": 0.05533506, "step": 10586 }, { "epoch": 21.174, "grad_norm": 2.424349069595337, "learning_rate": 2e-05, "loss": 0.07094898, "step": 10587 }, { "epoch": 21.176, "grad_norm": 1.049418330192566, "learning_rate": 2e-05, "loss": 0.05184286, "step": 10588 }, { "epoch": 21.178, "grad_norm": 1.1546443700790405, "learning_rate": 2e-05, "loss": 0.0416768, "step": 10589 }, { "epoch": 21.18, "grad_norm": 1.4959067106246948, "learning_rate": 2e-05, "loss": 0.03667594, "step": 10590 }, { "epoch": 21.182, "grad_norm": 1.7127101421356201, "learning_rate": 2e-05, "loss": 0.06447674, "step": 10591 }, { "epoch": 21.184, "grad_norm": 0.9761877059936523, "learning_rate": 2e-05, "loss": 0.03954772, "step": 10592 }, { "epoch": 21.186, "grad_norm": 1.3575623035430908, "learning_rate": 2e-05, "loss": 0.05794598, "step": 10593 }, { "epoch": 21.188, "grad_norm": 1.0766328573226929, "learning_rate": 2e-05, "loss": 0.04239533, "step": 10594 }, { "epoch": 21.19, "grad_norm": 2.491546630859375, "learning_rate": 2e-05, "loss": 0.05523719, "step": 10595 }, { "epoch": 21.192, "grad_norm": 1.4443989992141724, "learning_rate": 2e-05, "loss": 0.0524526, "step": 10596 }, { "epoch": 21.194, "grad_norm": 1.1527374982833862, "learning_rate": 2e-05, "loss": 0.03966731, "step": 10597 }, { "epoch": 21.196, "grad_norm": 1.0531830787658691, "learning_rate": 2e-05, "loss": 0.0432022, "step": 10598 }, { "epoch": 21.198, "grad_norm": 1.2543634176254272, "learning_rate": 2e-05, "loss": 0.05207051, "step": 10599 }, { "epoch": 21.2, "grad_norm": 1.4031164646148682, "learning_rate": 2e-05, "loss": 0.05572195, "step": 10600 }, { "epoch": 21.202, "grad_norm": 1.1671595573425293, "learning_rate": 2e-05, "loss": 0.0546497, "step": 10601 }, { "epoch": 21.204, "grad_norm": 1.4751501083374023, "learning_rate": 2e-05, "loss": 0.0426508, "step": 10602 }, { "epoch": 21.206, "grad_norm": 1.119997501373291, "learning_rate": 2e-05, "loss": 0.03689012, "step": 10603 }, { "epoch": 21.208, "grad_norm": 1.1726877689361572, "learning_rate": 2e-05, "loss": 0.05807318, "step": 10604 }, { "epoch": 21.21, "grad_norm": 1.4020177125930786, "learning_rate": 2e-05, "loss": 0.05618124, "step": 10605 }, { "epoch": 21.212, "grad_norm": 1.4401888847351074, "learning_rate": 2e-05, "loss": 0.05572372, "step": 10606 }, { "epoch": 21.214, "grad_norm": 0.9991408586502075, "learning_rate": 2e-05, "loss": 0.0452058, "step": 10607 }, { "epoch": 21.216, "grad_norm": 1.1121331453323364, "learning_rate": 2e-05, "loss": 0.03917393, "step": 10608 }, { "epoch": 21.218, "grad_norm": 0.9031201601028442, "learning_rate": 2e-05, "loss": 0.03409135, "step": 10609 }, { "epoch": 21.22, "grad_norm": 1.7865961790084839, "learning_rate": 2e-05, "loss": 0.05350084, "step": 10610 }, { "epoch": 21.222, "grad_norm": 1.4278464317321777, "learning_rate": 2e-05, "loss": 0.05273437, "step": 10611 }, { "epoch": 21.224, "grad_norm": 1.824474811553955, "learning_rate": 2e-05, "loss": 0.05080405, "step": 10612 }, { "epoch": 21.226, "grad_norm": 0.9497618079185486, "learning_rate": 2e-05, "loss": 0.04737395, "step": 10613 }, { "epoch": 21.228, "grad_norm": 1.9952632188796997, "learning_rate": 2e-05, "loss": 0.05211278, "step": 10614 }, { "epoch": 21.23, "grad_norm": 1.0210273265838623, "learning_rate": 2e-05, "loss": 0.03585964, "step": 10615 }, { "epoch": 21.232, "grad_norm": 1.1996471881866455, "learning_rate": 2e-05, "loss": 0.04059733, "step": 10616 }, { "epoch": 21.234, "grad_norm": 1.1930419206619263, "learning_rate": 2e-05, "loss": 0.0579964, "step": 10617 }, { "epoch": 21.236, "grad_norm": 1.5147624015808105, "learning_rate": 2e-05, "loss": 0.05640074, "step": 10618 }, { "epoch": 21.238, "grad_norm": 1.9012514352798462, "learning_rate": 2e-05, "loss": 0.07998705, "step": 10619 }, { "epoch": 21.24, "grad_norm": 1.1354387998580933, "learning_rate": 2e-05, "loss": 0.05712777, "step": 10620 }, { "epoch": 21.242, "grad_norm": 1.0657048225402832, "learning_rate": 2e-05, "loss": 0.03866805, "step": 10621 }, { "epoch": 21.244, "grad_norm": 1.4294742345809937, "learning_rate": 2e-05, "loss": 0.054425, "step": 10622 }, { "epoch": 21.246, "grad_norm": 1.3071699142456055, "learning_rate": 2e-05, "loss": 0.05525178, "step": 10623 }, { "epoch": 21.248, "grad_norm": 1.4722974300384521, "learning_rate": 2e-05, "loss": 0.06333463, "step": 10624 }, { "epoch": 21.25, "grad_norm": 1.655042290687561, "learning_rate": 2e-05, "loss": 0.04925643, "step": 10625 }, { "epoch": 21.252, "grad_norm": 1.0304545164108276, "learning_rate": 2e-05, "loss": 0.04096917, "step": 10626 }, { "epoch": 21.254, "grad_norm": 0.9811447858810425, "learning_rate": 2e-05, "loss": 0.04028199, "step": 10627 }, { "epoch": 21.256, "grad_norm": 1.7948307991027832, "learning_rate": 2e-05, "loss": 0.04732938, "step": 10628 }, { "epoch": 21.258, "grad_norm": 1.5040260553359985, "learning_rate": 2e-05, "loss": 0.04940037, "step": 10629 }, { "epoch": 21.26, "grad_norm": 1.1473209857940674, "learning_rate": 2e-05, "loss": 0.04660755, "step": 10630 }, { "epoch": 21.262, "grad_norm": 1.3235304355621338, "learning_rate": 2e-05, "loss": 0.03822428, "step": 10631 }, { "epoch": 21.264, "grad_norm": 1.4603519439697266, "learning_rate": 2e-05, "loss": 0.05476466, "step": 10632 }, { "epoch": 21.266, "grad_norm": 1.351792573928833, "learning_rate": 2e-05, "loss": 0.04963805, "step": 10633 }, { "epoch": 21.268, "grad_norm": 1.1041845083236694, "learning_rate": 2e-05, "loss": 0.0541786, "step": 10634 }, { "epoch": 21.27, "grad_norm": 1.3906861543655396, "learning_rate": 2e-05, "loss": 0.05100389, "step": 10635 }, { "epoch": 21.272, "grad_norm": 1.8753777742385864, "learning_rate": 2e-05, "loss": 0.04221532, "step": 10636 }, { "epoch": 21.274, "grad_norm": 1.187674641609192, "learning_rate": 2e-05, "loss": 0.04054579, "step": 10637 }, { "epoch": 21.276, "grad_norm": 1.9785590171813965, "learning_rate": 2e-05, "loss": 0.04755298, "step": 10638 }, { "epoch": 21.278, "grad_norm": 1.0015933513641357, "learning_rate": 2e-05, "loss": 0.04106369, "step": 10639 }, { "epoch": 21.28, "grad_norm": 2.8883860111236572, "learning_rate": 2e-05, "loss": 0.06277049, "step": 10640 }, { "epoch": 21.282, "grad_norm": 1.4691795110702515, "learning_rate": 2e-05, "loss": 0.04394303, "step": 10641 }, { "epoch": 21.284, "grad_norm": 1.2196604013442993, "learning_rate": 2e-05, "loss": 0.05366348, "step": 10642 }, { "epoch": 21.286, "grad_norm": 1.2930591106414795, "learning_rate": 2e-05, "loss": 0.05264851, "step": 10643 }, { "epoch": 21.288, "grad_norm": 1.211942434310913, "learning_rate": 2e-05, "loss": 0.0502063, "step": 10644 }, { "epoch": 21.29, "grad_norm": 1.1400648355484009, "learning_rate": 2e-05, "loss": 0.03204205, "step": 10645 }, { "epoch": 21.292, "grad_norm": 1.2991210222244263, "learning_rate": 2e-05, "loss": 0.04952317, "step": 10646 }, { "epoch": 21.294, "grad_norm": 1.1846965551376343, "learning_rate": 2e-05, "loss": 0.03298917, "step": 10647 }, { "epoch": 21.296, "grad_norm": 2.160949230194092, "learning_rate": 2e-05, "loss": 0.05455207, "step": 10648 }, { "epoch": 21.298, "grad_norm": 1.0781298875808716, "learning_rate": 2e-05, "loss": 0.04344229, "step": 10649 }, { "epoch": 21.3, "grad_norm": 1.2741973400115967, "learning_rate": 2e-05, "loss": 0.04083031, "step": 10650 }, { "epoch": 21.302, "grad_norm": 2.3289153575897217, "learning_rate": 2e-05, "loss": 0.05353005, "step": 10651 }, { "epoch": 21.304, "grad_norm": 1.4217759370803833, "learning_rate": 2e-05, "loss": 0.05329046, "step": 10652 }, { "epoch": 21.306, "grad_norm": 2.7043356895446777, "learning_rate": 2e-05, "loss": 0.05646773, "step": 10653 }, { "epoch": 21.308, "grad_norm": 1.777420997619629, "learning_rate": 2e-05, "loss": 0.03485641, "step": 10654 }, { "epoch": 21.31, "grad_norm": 1.568239688873291, "learning_rate": 2e-05, "loss": 0.03850968, "step": 10655 }, { "epoch": 21.312, "grad_norm": 1.7135896682739258, "learning_rate": 2e-05, "loss": 0.05973526, "step": 10656 }, { "epoch": 21.314, "grad_norm": 1.1795032024383545, "learning_rate": 2e-05, "loss": 0.03139373, "step": 10657 }, { "epoch": 21.316, "grad_norm": 2.166992664337158, "learning_rate": 2e-05, "loss": 0.05277683, "step": 10658 }, { "epoch": 21.318, "grad_norm": 1.2157166004180908, "learning_rate": 2e-05, "loss": 0.04410652, "step": 10659 }, { "epoch": 21.32, "grad_norm": 1.1267826557159424, "learning_rate": 2e-05, "loss": 0.0439358, "step": 10660 }, { "epoch": 21.322, "grad_norm": 1.082690954208374, "learning_rate": 2e-05, "loss": 0.0480907, "step": 10661 }, { "epoch": 21.324, "grad_norm": 1.557603359222412, "learning_rate": 2e-05, "loss": 0.05776751, "step": 10662 }, { "epoch": 21.326, "grad_norm": 1.3641945123672485, "learning_rate": 2e-05, "loss": 0.05607562, "step": 10663 }, { "epoch": 21.328, "grad_norm": 1.0702548027038574, "learning_rate": 2e-05, "loss": 0.03815268, "step": 10664 }, { "epoch": 21.33, "grad_norm": 0.9517786502838135, "learning_rate": 2e-05, "loss": 0.0358672, "step": 10665 }, { "epoch": 21.332, "grad_norm": 1.5815719366073608, "learning_rate": 2e-05, "loss": 0.05650991, "step": 10666 }, { "epoch": 21.334, "grad_norm": 1.1903462409973145, "learning_rate": 2e-05, "loss": 0.0521509, "step": 10667 }, { "epoch": 21.336, "grad_norm": 1.2470815181732178, "learning_rate": 2e-05, "loss": 0.04924322, "step": 10668 }, { "epoch": 21.338, "grad_norm": 1.0096725225448608, "learning_rate": 2e-05, "loss": 0.03073388, "step": 10669 }, { "epoch": 21.34, "grad_norm": 1.834978699684143, "learning_rate": 2e-05, "loss": 0.04468485, "step": 10670 }, { "epoch": 21.342, "grad_norm": 1.3140544891357422, "learning_rate": 2e-05, "loss": 0.04242922, "step": 10671 }, { "epoch": 21.344, "grad_norm": 1.2096999883651733, "learning_rate": 2e-05, "loss": 0.05891763, "step": 10672 }, { "epoch": 21.346, "grad_norm": 1.1214110851287842, "learning_rate": 2e-05, "loss": 0.06100915, "step": 10673 }, { "epoch": 21.348, "grad_norm": 1.1962796449661255, "learning_rate": 2e-05, "loss": 0.04508556, "step": 10674 }, { "epoch": 21.35, "grad_norm": 0.8824537396430969, "learning_rate": 2e-05, "loss": 0.03424706, "step": 10675 }, { "epoch": 21.352, "grad_norm": 1.7408071756362915, "learning_rate": 2e-05, "loss": 0.05153247, "step": 10676 }, { "epoch": 21.354, "grad_norm": 1.6729189157485962, "learning_rate": 2e-05, "loss": 0.04065928, "step": 10677 }, { "epoch": 21.356, "grad_norm": 1.539340853691101, "learning_rate": 2e-05, "loss": 0.05090845, "step": 10678 }, { "epoch": 21.358, "grad_norm": 1.6319481134414673, "learning_rate": 2e-05, "loss": 0.04200333, "step": 10679 }, { "epoch": 21.36, "grad_norm": 0.9873043894767761, "learning_rate": 2e-05, "loss": 0.03633, "step": 10680 }, { "epoch": 21.362, "grad_norm": 0.9711951613426208, "learning_rate": 2e-05, "loss": 0.04266242, "step": 10681 }, { "epoch": 21.364, "grad_norm": 1.160173773765564, "learning_rate": 2e-05, "loss": 0.05191543, "step": 10682 }, { "epoch": 21.366, "grad_norm": 1.3234270811080933, "learning_rate": 2e-05, "loss": 0.04084229, "step": 10683 }, { "epoch": 21.368, "grad_norm": 1.0652961730957031, "learning_rate": 2e-05, "loss": 0.04505602, "step": 10684 }, { "epoch": 21.37, "grad_norm": 1.6228474378585815, "learning_rate": 2e-05, "loss": 0.03944923, "step": 10685 }, { "epoch": 21.372, "grad_norm": 1.8732964992523193, "learning_rate": 2e-05, "loss": 0.05642599, "step": 10686 }, { "epoch": 21.374, "grad_norm": 0.7637419700622559, "learning_rate": 2e-05, "loss": 0.02391596, "step": 10687 }, { "epoch": 21.376, "grad_norm": 1.6537290811538696, "learning_rate": 2e-05, "loss": 0.0404052, "step": 10688 }, { "epoch": 21.378, "grad_norm": 0.9807186126708984, "learning_rate": 2e-05, "loss": 0.03436504, "step": 10689 }, { "epoch": 21.38, "grad_norm": 1.0753425359725952, "learning_rate": 2e-05, "loss": 0.05040091, "step": 10690 }, { "epoch": 21.382, "grad_norm": 2.0605592727661133, "learning_rate": 2e-05, "loss": 0.04176111, "step": 10691 }, { "epoch": 21.384, "grad_norm": 1.139441728591919, "learning_rate": 2e-05, "loss": 0.04633481, "step": 10692 }, { "epoch": 21.386, "grad_norm": 1.4741154909133911, "learning_rate": 2e-05, "loss": 0.04474012, "step": 10693 }, { "epoch": 21.388, "grad_norm": 1.269351601600647, "learning_rate": 2e-05, "loss": 0.05082484, "step": 10694 }, { "epoch": 21.39, "grad_norm": 0.9330033659934998, "learning_rate": 2e-05, "loss": 0.03949191, "step": 10695 }, { "epoch": 21.392, "grad_norm": 1.4127687215805054, "learning_rate": 2e-05, "loss": 0.06799394, "step": 10696 }, { "epoch": 21.394, "grad_norm": 1.27243971824646, "learning_rate": 2e-05, "loss": 0.05329065, "step": 10697 }, { "epoch": 21.396, "grad_norm": 4.482356071472168, "learning_rate": 2e-05, "loss": 0.06485481, "step": 10698 }, { "epoch": 21.398, "grad_norm": 1.146079182624817, "learning_rate": 2e-05, "loss": 0.05040262, "step": 10699 }, { "epoch": 21.4, "grad_norm": 0.9180232286453247, "learning_rate": 2e-05, "loss": 0.04055592, "step": 10700 }, { "epoch": 21.402, "grad_norm": 4.0807013511657715, "learning_rate": 2e-05, "loss": 0.04825997, "step": 10701 }, { "epoch": 21.404, "grad_norm": 1.5421088933944702, "learning_rate": 2e-05, "loss": 0.05332397, "step": 10702 }, { "epoch": 21.406, "grad_norm": 1.1146546602249146, "learning_rate": 2e-05, "loss": 0.04784375, "step": 10703 }, { "epoch": 21.408, "grad_norm": 4.651499271392822, "learning_rate": 2e-05, "loss": 0.06635147, "step": 10704 }, { "epoch": 21.41, "grad_norm": 2.12015962600708, "learning_rate": 2e-05, "loss": 0.06577589, "step": 10705 }, { "epoch": 21.412, "grad_norm": 1.3901772499084473, "learning_rate": 2e-05, "loss": 0.06080957, "step": 10706 }, { "epoch": 21.414, "grad_norm": 2.089585542678833, "learning_rate": 2e-05, "loss": 0.05386135, "step": 10707 }, { "epoch": 21.416, "grad_norm": 1.3305190801620483, "learning_rate": 2e-05, "loss": 0.06091367, "step": 10708 }, { "epoch": 21.418, "grad_norm": 1.0781517028808594, "learning_rate": 2e-05, "loss": 0.04775723, "step": 10709 }, { "epoch": 21.42, "grad_norm": 1.3431874513626099, "learning_rate": 2e-05, "loss": 0.04476028, "step": 10710 }, { "epoch": 21.422, "grad_norm": 1.3377794027328491, "learning_rate": 2e-05, "loss": 0.03685569, "step": 10711 }, { "epoch": 21.424, "grad_norm": 2.0752112865448, "learning_rate": 2e-05, "loss": 0.05354298, "step": 10712 }, { "epoch": 21.426, "grad_norm": 1.1806585788726807, "learning_rate": 2e-05, "loss": 0.05194972, "step": 10713 }, { "epoch": 21.428, "grad_norm": 1.1303317546844482, "learning_rate": 2e-05, "loss": 0.04121489, "step": 10714 }, { "epoch": 21.43, "grad_norm": 1.8700817823410034, "learning_rate": 2e-05, "loss": 0.06301868, "step": 10715 }, { "epoch": 21.432, "grad_norm": 1.181406855583191, "learning_rate": 2e-05, "loss": 0.03996438, "step": 10716 }, { "epoch": 21.434, "grad_norm": 1.2867814302444458, "learning_rate": 2e-05, "loss": 0.05031547, "step": 10717 }, { "epoch": 21.436, "grad_norm": 1.2296504974365234, "learning_rate": 2e-05, "loss": 0.05210525, "step": 10718 }, { "epoch": 21.438, "grad_norm": 1.2030439376831055, "learning_rate": 2e-05, "loss": 0.04983662, "step": 10719 }, { "epoch": 21.44, "grad_norm": 1.563281774520874, "learning_rate": 2e-05, "loss": 0.05288072, "step": 10720 }, { "epoch": 21.442, "grad_norm": 1.9170570373535156, "learning_rate": 2e-05, "loss": 0.06569774, "step": 10721 }, { "epoch": 21.444, "grad_norm": 1.6285518407821655, "learning_rate": 2e-05, "loss": 0.04179981, "step": 10722 }, { "epoch": 21.446, "grad_norm": 2.915325403213501, "learning_rate": 2e-05, "loss": 0.05017271, "step": 10723 }, { "epoch": 21.448, "grad_norm": 2.275808095932007, "learning_rate": 2e-05, "loss": 0.04769781, "step": 10724 }, { "epoch": 21.45, "grad_norm": 1.0752995014190674, "learning_rate": 2e-05, "loss": 0.04658436, "step": 10725 }, { "epoch": 21.452, "grad_norm": 1.1656360626220703, "learning_rate": 2e-05, "loss": 0.0411046, "step": 10726 }, { "epoch": 21.454, "grad_norm": 3.0095293521881104, "learning_rate": 2e-05, "loss": 0.04792668, "step": 10727 }, { "epoch": 21.456, "grad_norm": 2.100032091140747, "learning_rate": 2e-05, "loss": 0.04103816, "step": 10728 }, { "epoch": 21.458, "grad_norm": 1.425531029701233, "learning_rate": 2e-05, "loss": 0.0531222, "step": 10729 }, { "epoch": 21.46, "grad_norm": 1.2464319467544556, "learning_rate": 2e-05, "loss": 0.0381246, "step": 10730 }, { "epoch": 21.462, "grad_norm": 2.3038737773895264, "learning_rate": 2e-05, "loss": 0.06451148, "step": 10731 }, { "epoch": 21.464, "grad_norm": 1.0131616592407227, "learning_rate": 2e-05, "loss": 0.04211922, "step": 10732 }, { "epoch": 21.466, "grad_norm": 1.54676353931427, "learning_rate": 2e-05, "loss": 0.04370299, "step": 10733 }, { "epoch": 21.468, "grad_norm": 1.9857676029205322, "learning_rate": 2e-05, "loss": 0.06327908, "step": 10734 }, { "epoch": 21.47, "grad_norm": 1.5637359619140625, "learning_rate": 2e-05, "loss": 0.06312453, "step": 10735 }, { "epoch": 21.472, "grad_norm": 1.2545725107192993, "learning_rate": 2e-05, "loss": 0.03768359, "step": 10736 }, { "epoch": 21.474, "grad_norm": 1.2682950496673584, "learning_rate": 2e-05, "loss": 0.04753257, "step": 10737 }, { "epoch": 21.476, "grad_norm": 1.4683916568756104, "learning_rate": 2e-05, "loss": 0.04093602, "step": 10738 }, { "epoch": 21.478, "grad_norm": 1.864395260810852, "learning_rate": 2e-05, "loss": 0.0702012, "step": 10739 }, { "epoch": 21.48, "grad_norm": 1.4675545692443848, "learning_rate": 2e-05, "loss": 0.04307876, "step": 10740 }, { "epoch": 21.482, "grad_norm": 1.5357180833816528, "learning_rate": 2e-05, "loss": 0.05860517, "step": 10741 }, { "epoch": 21.484, "grad_norm": 1.5946459770202637, "learning_rate": 2e-05, "loss": 0.05215637, "step": 10742 }, { "epoch": 21.486, "grad_norm": 1.2073023319244385, "learning_rate": 2e-05, "loss": 0.04685083, "step": 10743 }, { "epoch": 21.488, "grad_norm": 2.062615394592285, "learning_rate": 2e-05, "loss": 0.0652276, "step": 10744 }, { "epoch": 21.49, "grad_norm": 1.4196233749389648, "learning_rate": 2e-05, "loss": 0.04821397, "step": 10745 }, { "epoch": 21.492, "grad_norm": 1.0786569118499756, "learning_rate": 2e-05, "loss": 0.04083278, "step": 10746 }, { "epoch": 21.494, "grad_norm": 2.8055970668792725, "learning_rate": 2e-05, "loss": 0.04834145, "step": 10747 }, { "epoch": 21.496, "grad_norm": 1.0388076305389404, "learning_rate": 2e-05, "loss": 0.03615007, "step": 10748 }, { "epoch": 21.498, "grad_norm": 1.2732515335083008, "learning_rate": 2e-05, "loss": 0.03172494, "step": 10749 }, { "epoch": 21.5, "grad_norm": 1.8116390705108643, "learning_rate": 2e-05, "loss": 0.03191656, "step": 10750 }, { "epoch": 21.502, "grad_norm": 1.3637638092041016, "learning_rate": 2e-05, "loss": 0.04614132, "step": 10751 }, { "epoch": 21.504, "grad_norm": 1.4618096351623535, "learning_rate": 2e-05, "loss": 0.06338081, "step": 10752 }, { "epoch": 21.506, "grad_norm": 1.139530897140503, "learning_rate": 2e-05, "loss": 0.04477368, "step": 10753 }, { "epoch": 21.508, "grad_norm": 1.5060710906982422, "learning_rate": 2e-05, "loss": 0.0608925, "step": 10754 }, { "epoch": 21.51, "grad_norm": 1.5198105573654175, "learning_rate": 2e-05, "loss": 0.04475032, "step": 10755 }, { "epoch": 21.512, "grad_norm": 1.1496227979660034, "learning_rate": 2e-05, "loss": 0.05887908, "step": 10756 }, { "epoch": 21.514, "grad_norm": 2.3476431369781494, "learning_rate": 2e-05, "loss": 0.05738109, "step": 10757 }, { "epoch": 21.516, "grad_norm": 1.5028834342956543, "learning_rate": 2e-05, "loss": 0.06443106, "step": 10758 }, { "epoch": 21.518, "grad_norm": 1.1287204027175903, "learning_rate": 2e-05, "loss": 0.04088497, "step": 10759 }, { "epoch": 21.52, "grad_norm": 1.3323323726654053, "learning_rate": 2e-05, "loss": 0.04499546, "step": 10760 }, { "epoch": 21.522, "grad_norm": 0.9581408500671387, "learning_rate": 2e-05, "loss": 0.03405827, "step": 10761 }, { "epoch": 21.524, "grad_norm": 1.2987138032913208, "learning_rate": 2e-05, "loss": 0.0462433, "step": 10762 }, { "epoch": 21.526, "grad_norm": 1.1162278652191162, "learning_rate": 2e-05, "loss": 0.04361096, "step": 10763 }, { "epoch": 21.528, "grad_norm": 1.6630140542984009, "learning_rate": 2e-05, "loss": 0.07264411, "step": 10764 }, { "epoch": 21.53, "grad_norm": 1.405669927597046, "learning_rate": 2e-05, "loss": 0.04709651, "step": 10765 }, { "epoch": 21.532, "grad_norm": 2.1564292907714844, "learning_rate": 2e-05, "loss": 0.04548428, "step": 10766 }, { "epoch": 21.534, "grad_norm": 1.4108883142471313, "learning_rate": 2e-05, "loss": 0.03512915, "step": 10767 }, { "epoch": 21.536, "grad_norm": 1.49359130859375, "learning_rate": 2e-05, "loss": 0.05225707, "step": 10768 }, { "epoch": 21.538, "grad_norm": 0.9672752022743225, "learning_rate": 2e-05, "loss": 0.03731929, "step": 10769 }, { "epoch": 21.54, "grad_norm": 1.511135458946228, "learning_rate": 2e-05, "loss": 0.06269777, "step": 10770 }, { "epoch": 21.542, "grad_norm": 1.0327112674713135, "learning_rate": 2e-05, "loss": 0.0403932, "step": 10771 }, { "epoch": 21.544, "grad_norm": 2.080780029296875, "learning_rate": 2e-05, "loss": 0.05038399, "step": 10772 }, { "epoch": 21.546, "grad_norm": 1.427672266960144, "learning_rate": 2e-05, "loss": 0.04966066, "step": 10773 }, { "epoch": 21.548000000000002, "grad_norm": 1.0006537437438965, "learning_rate": 2e-05, "loss": 0.02850555, "step": 10774 }, { "epoch": 21.55, "grad_norm": 1.6588624715805054, "learning_rate": 2e-05, "loss": 0.05049262, "step": 10775 }, { "epoch": 21.552, "grad_norm": 2.063532829284668, "learning_rate": 2e-05, "loss": 0.05635822, "step": 10776 }, { "epoch": 21.554, "grad_norm": 1.1709933280944824, "learning_rate": 2e-05, "loss": 0.03599269, "step": 10777 }, { "epoch": 21.556, "grad_norm": 1.1479626893997192, "learning_rate": 2e-05, "loss": 0.04424399, "step": 10778 }, { "epoch": 21.558, "grad_norm": 0.8640039563179016, "learning_rate": 2e-05, "loss": 0.03641316, "step": 10779 }, { "epoch": 21.56, "grad_norm": 1.5043388605117798, "learning_rate": 2e-05, "loss": 0.06287505, "step": 10780 }, { "epoch": 21.562, "grad_norm": 1.2115741968154907, "learning_rate": 2e-05, "loss": 0.04813265, "step": 10781 }, { "epoch": 21.564, "grad_norm": 1.55413818359375, "learning_rate": 2e-05, "loss": 0.0523724, "step": 10782 }, { "epoch": 21.566, "grad_norm": 1.6939338445663452, "learning_rate": 2e-05, "loss": 0.04972598, "step": 10783 }, { "epoch": 21.568, "grad_norm": 1.1721928119659424, "learning_rate": 2e-05, "loss": 0.04931511, "step": 10784 }, { "epoch": 21.57, "grad_norm": 1.24051833152771, "learning_rate": 2e-05, "loss": 0.05350111, "step": 10785 }, { "epoch": 21.572, "grad_norm": 1.2953356504440308, "learning_rate": 2e-05, "loss": 0.04935947, "step": 10786 }, { "epoch": 21.574, "grad_norm": 1.5951873064041138, "learning_rate": 2e-05, "loss": 0.07113877, "step": 10787 }, { "epoch": 21.576, "grad_norm": 1.4852906465530396, "learning_rate": 2e-05, "loss": 0.04671808, "step": 10788 }, { "epoch": 21.578, "grad_norm": 1.1027790307998657, "learning_rate": 2e-05, "loss": 0.0404595, "step": 10789 }, { "epoch": 21.58, "grad_norm": 1.1362584829330444, "learning_rate": 2e-05, "loss": 0.03537664, "step": 10790 }, { "epoch": 21.582, "grad_norm": 0.9323720932006836, "learning_rate": 2e-05, "loss": 0.04099556, "step": 10791 }, { "epoch": 21.584, "grad_norm": 1.0940784215927124, "learning_rate": 2e-05, "loss": 0.03483219, "step": 10792 }, { "epoch": 21.586, "grad_norm": 1.3247867822647095, "learning_rate": 2e-05, "loss": 0.06219739, "step": 10793 }, { "epoch": 21.588, "grad_norm": 1.0772418975830078, "learning_rate": 2e-05, "loss": 0.03814975, "step": 10794 }, { "epoch": 21.59, "grad_norm": 2.0017001628875732, "learning_rate": 2e-05, "loss": 0.05003048, "step": 10795 }, { "epoch": 21.592, "grad_norm": 3.4007842540740967, "learning_rate": 2e-05, "loss": 0.0767404, "step": 10796 }, { "epoch": 21.594, "grad_norm": 1.0665138959884644, "learning_rate": 2e-05, "loss": 0.04155235, "step": 10797 }, { "epoch": 21.596, "grad_norm": 1.6279805898666382, "learning_rate": 2e-05, "loss": 0.06509952, "step": 10798 }, { "epoch": 21.598, "grad_norm": 1.2621570825576782, "learning_rate": 2e-05, "loss": 0.05558634, "step": 10799 }, { "epoch": 21.6, "grad_norm": 1.0886354446411133, "learning_rate": 2e-05, "loss": 0.04162983, "step": 10800 }, { "epoch": 21.602, "grad_norm": 1.6637775897979736, "learning_rate": 2e-05, "loss": 0.06448497, "step": 10801 }, { "epoch": 21.604, "grad_norm": 1.436872124671936, "learning_rate": 2e-05, "loss": 0.06009813, "step": 10802 }, { "epoch": 21.606, "grad_norm": 1.527389407157898, "learning_rate": 2e-05, "loss": 0.05477608, "step": 10803 }, { "epoch": 21.608, "grad_norm": 2.3143742084503174, "learning_rate": 2e-05, "loss": 0.05368875, "step": 10804 }, { "epoch": 21.61, "grad_norm": 1.0676593780517578, "learning_rate": 2e-05, "loss": 0.04899472, "step": 10805 }, { "epoch": 21.612, "grad_norm": 1.1917104721069336, "learning_rate": 2e-05, "loss": 0.03964073, "step": 10806 }, { "epoch": 21.614, "grad_norm": 1.3936713933944702, "learning_rate": 2e-05, "loss": 0.04972361, "step": 10807 }, { "epoch": 21.616, "grad_norm": 1.148393988609314, "learning_rate": 2e-05, "loss": 0.04126555, "step": 10808 }, { "epoch": 21.618, "grad_norm": 1.179006576538086, "learning_rate": 2e-05, "loss": 0.05176573, "step": 10809 }, { "epoch": 21.62, "grad_norm": 1.3082034587860107, "learning_rate": 2e-05, "loss": 0.05216352, "step": 10810 }, { "epoch": 21.622, "grad_norm": 1.8960274457931519, "learning_rate": 2e-05, "loss": 0.05216168, "step": 10811 }, { "epoch": 21.624, "grad_norm": 0.9456546902656555, "learning_rate": 2e-05, "loss": 0.03780545, "step": 10812 }, { "epoch": 21.626, "grad_norm": 2.7292959690093994, "learning_rate": 2e-05, "loss": 0.04640452, "step": 10813 }, { "epoch": 21.628, "grad_norm": 1.14462411403656, "learning_rate": 2e-05, "loss": 0.05038172, "step": 10814 }, { "epoch": 21.63, "grad_norm": 1.1362615823745728, "learning_rate": 2e-05, "loss": 0.05208543, "step": 10815 }, { "epoch": 21.632, "grad_norm": 1.6947184801101685, "learning_rate": 2e-05, "loss": 0.05242987, "step": 10816 }, { "epoch": 21.634, "grad_norm": 1.591141939163208, "learning_rate": 2e-05, "loss": 0.07459897, "step": 10817 }, { "epoch": 21.636, "grad_norm": 1.2862956523895264, "learning_rate": 2e-05, "loss": 0.04276252, "step": 10818 }, { "epoch": 21.638, "grad_norm": 1.3754534721374512, "learning_rate": 2e-05, "loss": 0.04172885, "step": 10819 }, { "epoch": 21.64, "grad_norm": 1.357790231704712, "learning_rate": 2e-05, "loss": 0.05107214, "step": 10820 }, { "epoch": 21.642, "grad_norm": 1.0642123222351074, "learning_rate": 2e-05, "loss": 0.03948425, "step": 10821 }, { "epoch": 21.644, "grad_norm": 1.9813474416732788, "learning_rate": 2e-05, "loss": 0.05870941, "step": 10822 }, { "epoch": 21.646, "grad_norm": 1.0153528451919556, "learning_rate": 2e-05, "loss": 0.06308308, "step": 10823 }, { "epoch": 21.648, "grad_norm": 1.2893948554992676, "learning_rate": 2e-05, "loss": 0.05264116, "step": 10824 }, { "epoch": 21.65, "grad_norm": 1.7543838024139404, "learning_rate": 2e-05, "loss": 0.06136283, "step": 10825 }, { "epoch": 21.652, "grad_norm": 1.3445348739624023, "learning_rate": 2e-05, "loss": 0.05309365, "step": 10826 }, { "epoch": 21.654, "grad_norm": 1.296164870262146, "learning_rate": 2e-05, "loss": 0.03538866, "step": 10827 }, { "epoch": 21.656, "grad_norm": 1.0181268453598022, "learning_rate": 2e-05, "loss": 0.03438111, "step": 10828 }, { "epoch": 21.658, "grad_norm": 1.5132625102996826, "learning_rate": 2e-05, "loss": 0.05754954, "step": 10829 }, { "epoch": 21.66, "grad_norm": 1.1303023099899292, "learning_rate": 2e-05, "loss": 0.04443492, "step": 10830 }, { "epoch": 21.662, "grad_norm": 2.5929861068725586, "learning_rate": 2e-05, "loss": 0.06279647, "step": 10831 }, { "epoch": 21.664, "grad_norm": 1.8088085651397705, "learning_rate": 2e-05, "loss": 0.05454122, "step": 10832 }, { "epoch": 21.666, "grad_norm": 1.0593384504318237, "learning_rate": 2e-05, "loss": 0.04819227, "step": 10833 }, { "epoch": 21.668, "grad_norm": 1.9504371881484985, "learning_rate": 2e-05, "loss": 0.05102558, "step": 10834 }, { "epoch": 21.67, "grad_norm": 1.332598090171814, "learning_rate": 2e-05, "loss": 0.04735455, "step": 10835 }, { "epoch": 21.672, "grad_norm": 1.222787857055664, "learning_rate": 2e-05, "loss": 0.03774611, "step": 10836 }, { "epoch": 21.674, "grad_norm": 2.1958541870117188, "learning_rate": 2e-05, "loss": 0.04284629, "step": 10837 }, { "epoch": 21.676, "grad_norm": 1.383122444152832, "learning_rate": 2e-05, "loss": 0.05444495, "step": 10838 }, { "epoch": 21.678, "grad_norm": 1.6830791234970093, "learning_rate": 2e-05, "loss": 0.04696085, "step": 10839 }, { "epoch": 21.68, "grad_norm": 1.126076340675354, "learning_rate": 2e-05, "loss": 0.0394025, "step": 10840 }, { "epoch": 21.682, "grad_norm": 3.7565646171569824, "learning_rate": 2e-05, "loss": 0.04956517, "step": 10841 }, { "epoch": 21.684, "grad_norm": 1.0499671697616577, "learning_rate": 2e-05, "loss": 0.04113387, "step": 10842 }, { "epoch": 21.686, "grad_norm": 1.130775809288025, "learning_rate": 2e-05, "loss": 0.05019506, "step": 10843 }, { "epoch": 21.688, "grad_norm": 1.1295976638793945, "learning_rate": 2e-05, "loss": 0.03581254, "step": 10844 }, { "epoch": 21.69, "grad_norm": 1.6295576095581055, "learning_rate": 2e-05, "loss": 0.04524476, "step": 10845 }, { "epoch": 21.692, "grad_norm": 1.0274832248687744, "learning_rate": 2e-05, "loss": 0.03942137, "step": 10846 }, { "epoch": 21.694, "grad_norm": 1.5693303346633911, "learning_rate": 2e-05, "loss": 0.06840248, "step": 10847 }, { "epoch": 21.696, "grad_norm": 1.2781906127929688, "learning_rate": 2e-05, "loss": 0.06332578, "step": 10848 }, { "epoch": 21.698, "grad_norm": 1.1110953092575073, "learning_rate": 2e-05, "loss": 0.04137561, "step": 10849 }, { "epoch": 21.7, "grad_norm": 1.0088920593261719, "learning_rate": 2e-05, "loss": 0.04673091, "step": 10850 }, { "epoch": 21.701999999999998, "grad_norm": 1.2185165882110596, "learning_rate": 2e-05, "loss": 0.04827049, "step": 10851 }, { "epoch": 21.704, "grad_norm": 1.4220683574676514, "learning_rate": 2e-05, "loss": 0.04494026, "step": 10852 }, { "epoch": 21.706, "grad_norm": 1.5694994926452637, "learning_rate": 2e-05, "loss": 0.05738157, "step": 10853 }, { "epoch": 21.708, "grad_norm": 1.4578444957733154, "learning_rate": 2e-05, "loss": 0.05446623, "step": 10854 }, { "epoch": 21.71, "grad_norm": 1.6768006086349487, "learning_rate": 2e-05, "loss": 0.04381708, "step": 10855 }, { "epoch": 21.712, "grad_norm": 2.0263848304748535, "learning_rate": 2e-05, "loss": 0.06331879, "step": 10856 }, { "epoch": 21.714, "grad_norm": 1.229150652885437, "learning_rate": 2e-05, "loss": 0.04257523, "step": 10857 }, { "epoch": 21.716, "grad_norm": 1.7271251678466797, "learning_rate": 2e-05, "loss": 0.05899847, "step": 10858 }, { "epoch": 21.718, "grad_norm": 1.3719837665557861, "learning_rate": 2e-05, "loss": 0.03831611, "step": 10859 }, { "epoch": 21.72, "grad_norm": 2.190244197845459, "learning_rate": 2e-05, "loss": 0.06620126, "step": 10860 }, { "epoch": 21.722, "grad_norm": 1.3871395587921143, "learning_rate": 2e-05, "loss": 0.04863811, "step": 10861 }, { "epoch": 21.724, "grad_norm": 1.6706187725067139, "learning_rate": 2e-05, "loss": 0.04381331, "step": 10862 }, { "epoch": 21.726, "grad_norm": 0.9542924761772156, "learning_rate": 2e-05, "loss": 0.04143474, "step": 10863 }, { "epoch": 21.728, "grad_norm": 1.539429783821106, "learning_rate": 2e-05, "loss": 0.07104433, "step": 10864 }, { "epoch": 21.73, "grad_norm": 1.6637413501739502, "learning_rate": 2e-05, "loss": 0.04539631, "step": 10865 }, { "epoch": 21.732, "grad_norm": 1.422980546951294, "learning_rate": 2e-05, "loss": 0.07502291, "step": 10866 }, { "epoch": 21.734, "grad_norm": 1.6763197183609009, "learning_rate": 2e-05, "loss": 0.05456452, "step": 10867 }, { "epoch": 21.736, "grad_norm": 1.1481200456619263, "learning_rate": 2e-05, "loss": 0.03864836, "step": 10868 }, { "epoch": 21.738, "grad_norm": 1.497483491897583, "learning_rate": 2e-05, "loss": 0.03810099, "step": 10869 }, { "epoch": 21.74, "grad_norm": 1.053067684173584, "learning_rate": 2e-05, "loss": 0.04674568, "step": 10870 }, { "epoch": 21.742, "grad_norm": 1.0314161777496338, "learning_rate": 2e-05, "loss": 0.04480613, "step": 10871 }, { "epoch": 21.744, "grad_norm": 1.0678924322128296, "learning_rate": 2e-05, "loss": 0.04423539, "step": 10872 }, { "epoch": 21.746, "grad_norm": 1.354935884475708, "learning_rate": 2e-05, "loss": 0.044518, "step": 10873 }, { "epoch": 21.748, "grad_norm": 1.6646746397018433, "learning_rate": 2e-05, "loss": 0.03271763, "step": 10874 }, { "epoch": 21.75, "grad_norm": 1.0096784830093384, "learning_rate": 2e-05, "loss": 0.03809992, "step": 10875 }, { "epoch": 21.752, "grad_norm": 1.764803171157837, "learning_rate": 2e-05, "loss": 0.05012478, "step": 10876 }, { "epoch": 21.754, "grad_norm": 2.3023688793182373, "learning_rate": 2e-05, "loss": 0.05050313, "step": 10877 }, { "epoch": 21.756, "grad_norm": 1.1686108112335205, "learning_rate": 2e-05, "loss": 0.04730064, "step": 10878 }, { "epoch": 21.758, "grad_norm": 2.2827768325805664, "learning_rate": 2e-05, "loss": 0.05123483, "step": 10879 }, { "epoch": 21.76, "grad_norm": 1.111997127532959, "learning_rate": 2e-05, "loss": 0.04711284, "step": 10880 }, { "epoch": 21.762, "grad_norm": 1.7018520832061768, "learning_rate": 2e-05, "loss": 0.0456171, "step": 10881 }, { "epoch": 21.764, "grad_norm": 1.3569676876068115, "learning_rate": 2e-05, "loss": 0.04593941, "step": 10882 }, { "epoch": 21.766, "grad_norm": 1.3863903284072876, "learning_rate": 2e-05, "loss": 0.05311755, "step": 10883 }, { "epoch": 21.768, "grad_norm": 1.2897217273712158, "learning_rate": 2e-05, "loss": 0.05233236, "step": 10884 }, { "epoch": 21.77, "grad_norm": 1.732940435409546, "learning_rate": 2e-05, "loss": 0.04600258, "step": 10885 }, { "epoch": 21.772, "grad_norm": 1.3293756246566772, "learning_rate": 2e-05, "loss": 0.04925299, "step": 10886 }, { "epoch": 21.774, "grad_norm": 1.0012142658233643, "learning_rate": 2e-05, "loss": 0.04227997, "step": 10887 }, { "epoch": 21.776, "grad_norm": 1.422357439994812, "learning_rate": 2e-05, "loss": 0.04068128, "step": 10888 }, { "epoch": 21.778, "grad_norm": 0.9533287882804871, "learning_rate": 2e-05, "loss": 0.03588799, "step": 10889 }, { "epoch": 21.78, "grad_norm": 1.362334966659546, "learning_rate": 2e-05, "loss": 0.0448611, "step": 10890 }, { "epoch": 21.782, "grad_norm": 0.9369307160377502, "learning_rate": 2e-05, "loss": 0.04087372, "step": 10891 }, { "epoch": 21.784, "grad_norm": 1.2135220766067505, "learning_rate": 2e-05, "loss": 0.047553, "step": 10892 }, { "epoch": 21.786, "grad_norm": 0.9788151383399963, "learning_rate": 2e-05, "loss": 0.03620833, "step": 10893 }, { "epoch": 21.788, "grad_norm": 1.2716730833053589, "learning_rate": 2e-05, "loss": 0.03031558, "step": 10894 }, { "epoch": 21.79, "grad_norm": 1.116315245628357, "learning_rate": 2e-05, "loss": 0.04358283, "step": 10895 }, { "epoch": 21.792, "grad_norm": 0.9045530557632446, "learning_rate": 2e-05, "loss": 0.03885446, "step": 10896 }, { "epoch": 21.794, "grad_norm": 1.1230559349060059, "learning_rate": 2e-05, "loss": 0.04369999, "step": 10897 }, { "epoch": 21.796, "grad_norm": 0.982038676738739, "learning_rate": 2e-05, "loss": 0.04148884, "step": 10898 }, { "epoch": 21.798000000000002, "grad_norm": 1.258028507232666, "learning_rate": 2e-05, "loss": 0.05553634, "step": 10899 }, { "epoch": 21.8, "grad_norm": 1.2691971063613892, "learning_rate": 2e-05, "loss": 0.05354936, "step": 10900 }, { "epoch": 21.802, "grad_norm": 1.2180734872817993, "learning_rate": 2e-05, "loss": 0.04251622, "step": 10901 }, { "epoch": 21.804, "grad_norm": 2.1461055278778076, "learning_rate": 2e-05, "loss": 0.04220914, "step": 10902 }, { "epoch": 21.806, "grad_norm": 1.2941149473190308, "learning_rate": 2e-05, "loss": 0.04063112, "step": 10903 }, { "epoch": 21.808, "grad_norm": 1.5833343267440796, "learning_rate": 2e-05, "loss": 0.07265215, "step": 10904 }, { "epoch": 21.81, "grad_norm": 1.2763941287994385, "learning_rate": 2e-05, "loss": 0.05399381, "step": 10905 }, { "epoch": 21.812, "grad_norm": 1.1573187112808228, "learning_rate": 2e-05, "loss": 0.03452818, "step": 10906 }, { "epoch": 21.814, "grad_norm": 1.0586984157562256, "learning_rate": 2e-05, "loss": 0.04295314, "step": 10907 }, { "epoch": 21.816, "grad_norm": 1.6733862161636353, "learning_rate": 2e-05, "loss": 0.0511687, "step": 10908 }, { "epoch": 21.818, "grad_norm": 1.0059165954589844, "learning_rate": 2e-05, "loss": 0.03328501, "step": 10909 }, { "epoch": 21.82, "grad_norm": 1.6145689487457275, "learning_rate": 2e-05, "loss": 0.03465503, "step": 10910 }, { "epoch": 21.822, "grad_norm": 1.1521812677383423, "learning_rate": 2e-05, "loss": 0.055716, "step": 10911 }, { "epoch": 21.824, "grad_norm": 1.399356722831726, "learning_rate": 2e-05, "loss": 0.04399845, "step": 10912 }, { "epoch": 21.826, "grad_norm": 1.8220099210739136, "learning_rate": 2e-05, "loss": 0.04525939, "step": 10913 }, { "epoch": 21.828, "grad_norm": 0.8245351314544678, "learning_rate": 2e-05, "loss": 0.02732522, "step": 10914 }, { "epoch": 21.83, "grad_norm": 2.3092358112335205, "learning_rate": 2e-05, "loss": 0.05459534, "step": 10915 }, { "epoch": 21.832, "grad_norm": 1.2672592401504517, "learning_rate": 2e-05, "loss": 0.06113293, "step": 10916 }, { "epoch": 21.834, "grad_norm": 1.6747488975524902, "learning_rate": 2e-05, "loss": 0.06096224, "step": 10917 }, { "epoch": 21.836, "grad_norm": 1.7285672426223755, "learning_rate": 2e-05, "loss": 0.03563772, "step": 10918 }, { "epoch": 21.838, "grad_norm": 1.3562101125717163, "learning_rate": 2e-05, "loss": 0.0579962, "step": 10919 }, { "epoch": 21.84, "grad_norm": 1.3072152137756348, "learning_rate": 2e-05, "loss": 0.04561493, "step": 10920 }, { "epoch": 21.842, "grad_norm": 1.24988853931427, "learning_rate": 2e-05, "loss": 0.05130323, "step": 10921 }, { "epoch": 21.844, "grad_norm": 1.6222392320632935, "learning_rate": 2e-05, "loss": 0.04971826, "step": 10922 }, { "epoch": 21.846, "grad_norm": 1.030404806137085, "learning_rate": 2e-05, "loss": 0.04562754, "step": 10923 }, { "epoch": 21.848, "grad_norm": 1.1271477937698364, "learning_rate": 2e-05, "loss": 0.04258311, "step": 10924 }, { "epoch": 21.85, "grad_norm": 1.6309809684753418, "learning_rate": 2e-05, "loss": 0.05682607, "step": 10925 }, { "epoch": 21.852, "grad_norm": 1.4102809429168701, "learning_rate": 2e-05, "loss": 0.06260353, "step": 10926 }, { "epoch": 21.854, "grad_norm": 1.2079343795776367, "learning_rate": 2e-05, "loss": 0.04622602, "step": 10927 }, { "epoch": 21.856, "grad_norm": 1.3406726121902466, "learning_rate": 2e-05, "loss": 0.0426941, "step": 10928 }, { "epoch": 21.858, "grad_norm": 1.5345282554626465, "learning_rate": 2e-05, "loss": 0.07938831, "step": 10929 }, { "epoch": 21.86, "grad_norm": 1.1951526403427124, "learning_rate": 2e-05, "loss": 0.05457199, "step": 10930 }, { "epoch": 21.862, "grad_norm": 1.6740220785140991, "learning_rate": 2e-05, "loss": 0.04766332, "step": 10931 }, { "epoch": 21.864, "grad_norm": 1.5154451131820679, "learning_rate": 2e-05, "loss": 0.05034049, "step": 10932 }, { "epoch": 21.866, "grad_norm": 2.218768358230591, "learning_rate": 2e-05, "loss": 0.05671722, "step": 10933 }, { "epoch": 21.868, "grad_norm": 1.1361143589019775, "learning_rate": 2e-05, "loss": 0.05206899, "step": 10934 }, { "epoch": 21.87, "grad_norm": 1.7193797826766968, "learning_rate": 2e-05, "loss": 0.06362911, "step": 10935 }, { "epoch": 21.872, "grad_norm": 1.128735899925232, "learning_rate": 2e-05, "loss": 0.05590115, "step": 10936 }, { "epoch": 21.874, "grad_norm": 1.033158540725708, "learning_rate": 2e-05, "loss": 0.03183669, "step": 10937 }, { "epoch": 21.876, "grad_norm": 1.2122427225112915, "learning_rate": 2e-05, "loss": 0.04212833, "step": 10938 }, { "epoch": 21.878, "grad_norm": 1.3224132061004639, "learning_rate": 2e-05, "loss": 0.04563277, "step": 10939 }, { "epoch": 21.88, "grad_norm": 2.310457229614258, "learning_rate": 2e-05, "loss": 0.05610622, "step": 10940 }, { "epoch": 21.882, "grad_norm": 1.8583682775497437, "learning_rate": 2e-05, "loss": 0.04325613, "step": 10941 }, { "epoch": 21.884, "grad_norm": 1.2522855997085571, "learning_rate": 2e-05, "loss": 0.03730422, "step": 10942 }, { "epoch": 21.886, "grad_norm": 1.5563654899597168, "learning_rate": 2e-05, "loss": 0.04918503, "step": 10943 }, { "epoch": 21.888, "grad_norm": 1.0447373390197754, "learning_rate": 2e-05, "loss": 0.04041155, "step": 10944 }, { "epoch": 21.89, "grad_norm": 1.3849196434020996, "learning_rate": 2e-05, "loss": 0.06611056, "step": 10945 }, { "epoch": 21.892, "grad_norm": 1.6392872333526611, "learning_rate": 2e-05, "loss": 0.03978346, "step": 10946 }, { "epoch": 21.894, "grad_norm": 1.6267399787902832, "learning_rate": 2e-05, "loss": 0.04866889, "step": 10947 }, { "epoch": 21.896, "grad_norm": 1.8090860843658447, "learning_rate": 2e-05, "loss": 0.0593785, "step": 10948 }, { "epoch": 21.898, "grad_norm": 1.860074758529663, "learning_rate": 2e-05, "loss": 0.06724711, "step": 10949 }, { "epoch": 21.9, "grad_norm": 1.6484698057174683, "learning_rate": 2e-05, "loss": 0.06605899, "step": 10950 }, { "epoch": 21.902, "grad_norm": 1.1395916938781738, "learning_rate": 2e-05, "loss": 0.05008196, "step": 10951 }, { "epoch": 21.904, "grad_norm": 2.14670991897583, "learning_rate": 2e-05, "loss": 0.06410623, "step": 10952 }, { "epoch": 21.906, "grad_norm": 1.0113487243652344, "learning_rate": 2e-05, "loss": 0.04664245, "step": 10953 }, { "epoch": 21.908, "grad_norm": 1.0322539806365967, "learning_rate": 2e-05, "loss": 0.0400438, "step": 10954 }, { "epoch": 21.91, "grad_norm": 0.9443525671958923, "learning_rate": 2e-05, "loss": 0.03930948, "step": 10955 }, { "epoch": 21.912, "grad_norm": 2.4295783042907715, "learning_rate": 2e-05, "loss": 0.05779032, "step": 10956 }, { "epoch": 21.914, "grad_norm": 1.4361931085586548, "learning_rate": 2e-05, "loss": 0.051619, "step": 10957 }, { "epoch": 21.916, "grad_norm": 1.0598607063293457, "learning_rate": 2e-05, "loss": 0.04545189, "step": 10958 }, { "epoch": 21.918, "grad_norm": 1.5345853567123413, "learning_rate": 2e-05, "loss": 0.04772504, "step": 10959 }, { "epoch": 21.92, "grad_norm": 1.5085455179214478, "learning_rate": 2e-05, "loss": 0.06190134, "step": 10960 }, { "epoch": 21.922, "grad_norm": 1.9674993753433228, "learning_rate": 2e-05, "loss": 0.06687959, "step": 10961 }, { "epoch": 21.924, "grad_norm": 0.9924759268760681, "learning_rate": 2e-05, "loss": 0.03278512, "step": 10962 }, { "epoch": 21.926, "grad_norm": 1.6792593002319336, "learning_rate": 2e-05, "loss": 0.04734295, "step": 10963 }, { "epoch": 21.928, "grad_norm": 1.8099673986434937, "learning_rate": 2e-05, "loss": 0.04413921, "step": 10964 }, { "epoch": 21.93, "grad_norm": 1.0037261247634888, "learning_rate": 2e-05, "loss": 0.03000687, "step": 10965 }, { "epoch": 21.932, "grad_norm": 1.3202835321426392, "learning_rate": 2e-05, "loss": 0.05544813, "step": 10966 }, { "epoch": 21.934, "grad_norm": 1.1978645324707031, "learning_rate": 2e-05, "loss": 0.04972622, "step": 10967 }, { "epoch": 21.936, "grad_norm": 1.3868579864501953, "learning_rate": 2e-05, "loss": 0.03316442, "step": 10968 }, { "epoch": 21.938, "grad_norm": 0.976194441318512, "learning_rate": 2e-05, "loss": 0.03515359, "step": 10969 }, { "epoch": 21.94, "grad_norm": 1.1796832084655762, "learning_rate": 2e-05, "loss": 0.04262147, "step": 10970 }, { "epoch": 21.942, "grad_norm": 1.1597833633422852, "learning_rate": 2e-05, "loss": 0.05109448, "step": 10971 }, { "epoch": 21.944, "grad_norm": 1.3826942443847656, "learning_rate": 2e-05, "loss": 0.03746489, "step": 10972 }, { "epoch": 21.946, "grad_norm": 1.715853214263916, "learning_rate": 2e-05, "loss": 0.04782753, "step": 10973 }, { "epoch": 21.948, "grad_norm": 1.2093501091003418, "learning_rate": 2e-05, "loss": 0.04726864, "step": 10974 }, { "epoch": 21.95, "grad_norm": 1.207438349723816, "learning_rate": 2e-05, "loss": 0.03904613, "step": 10975 }, { "epoch": 21.951999999999998, "grad_norm": 1.121230125427246, "learning_rate": 2e-05, "loss": 0.04150212, "step": 10976 }, { "epoch": 21.954, "grad_norm": 1.3426141738891602, "learning_rate": 2e-05, "loss": 0.04944028, "step": 10977 }, { "epoch": 21.956, "grad_norm": 2.515803575515747, "learning_rate": 2e-05, "loss": 0.05761576, "step": 10978 }, { "epoch": 21.958, "grad_norm": 1.4491621255874634, "learning_rate": 2e-05, "loss": 0.06188128, "step": 10979 }, { "epoch": 21.96, "grad_norm": 1.2092634439468384, "learning_rate": 2e-05, "loss": 0.06259933, "step": 10980 }, { "epoch": 21.962, "grad_norm": 1.0891218185424805, "learning_rate": 2e-05, "loss": 0.04704313, "step": 10981 }, { "epoch": 21.964, "grad_norm": 1.0597481727600098, "learning_rate": 2e-05, "loss": 0.02693278, "step": 10982 }, { "epoch": 21.966, "grad_norm": 0.8099601864814758, "learning_rate": 2e-05, "loss": 0.02368376, "step": 10983 }, { "epoch": 21.968, "grad_norm": 1.3354405164718628, "learning_rate": 2e-05, "loss": 0.05511855, "step": 10984 }, { "epoch": 21.97, "grad_norm": 1.8093352317810059, "learning_rate": 2e-05, "loss": 0.04049457, "step": 10985 }, { "epoch": 21.972, "grad_norm": 1.0945965051651, "learning_rate": 2e-05, "loss": 0.05107197, "step": 10986 }, { "epoch": 21.974, "grad_norm": 2.620250940322876, "learning_rate": 2e-05, "loss": 0.04253346, "step": 10987 }, { "epoch": 21.976, "grad_norm": 2.089404344558716, "learning_rate": 2e-05, "loss": 0.05536151, "step": 10988 }, { "epoch": 21.978, "grad_norm": 1.1685433387756348, "learning_rate": 2e-05, "loss": 0.03540859, "step": 10989 }, { "epoch": 21.98, "grad_norm": 1.8869960308074951, "learning_rate": 2e-05, "loss": 0.04953475, "step": 10990 }, { "epoch": 21.982, "grad_norm": 0.9517631530761719, "learning_rate": 2e-05, "loss": 0.03841473, "step": 10991 }, { "epoch": 21.984, "grad_norm": 1.0570545196533203, "learning_rate": 2e-05, "loss": 0.04897962, "step": 10992 }, { "epoch": 21.986, "grad_norm": 1.2882373332977295, "learning_rate": 2e-05, "loss": 0.05040508, "step": 10993 }, { "epoch": 21.988, "grad_norm": 1.749763011932373, "learning_rate": 2e-05, "loss": 0.04004006, "step": 10994 }, { "epoch": 21.99, "grad_norm": 1.2804243564605713, "learning_rate": 2e-05, "loss": 0.04072858, "step": 10995 }, { "epoch": 21.992, "grad_norm": 1.1499717235565186, "learning_rate": 2e-05, "loss": 0.04644898, "step": 10996 }, { "epoch": 21.994, "grad_norm": 1.7657212018966675, "learning_rate": 2e-05, "loss": 0.05860261, "step": 10997 }, { "epoch": 21.996, "grad_norm": 0.9009549021720886, "learning_rate": 2e-05, "loss": 0.03138966, "step": 10998 }, { "epoch": 21.998, "grad_norm": 1.6469109058380127, "learning_rate": 2e-05, "loss": 0.04713497, "step": 10999 }, { "epoch": 22.0, "grad_norm": 1.7399426698684692, "learning_rate": 2e-05, "loss": 0.04571211, "step": 11000 }, { "epoch": 22.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9700598802395209, "Equal_1": 0.992, "Equal_2": 0.9720558882235529, "Equal_3": 0.9780439121756487, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.998, "Perpendicular_2": 0.98, "Perpendicular_3": 0.8657314629258517, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9932000000000001, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 226.4035, "eval_samples_per_second": 46.377, "eval_steps_per_second": 0.928, "step": 11000 }, { "epoch": 22.002, "grad_norm": 1.935760259628296, "learning_rate": 2e-05, "loss": 0.04039805, "step": 11001 }, { "epoch": 22.004, "grad_norm": 1.729925274848938, "learning_rate": 2e-05, "loss": 0.04348754, "step": 11002 }, { "epoch": 22.006, "grad_norm": 1.3928816318511963, "learning_rate": 2e-05, "loss": 0.04797959, "step": 11003 }, { "epoch": 22.008, "grad_norm": 3.0192599296569824, "learning_rate": 2e-05, "loss": 0.04947034, "step": 11004 }, { "epoch": 22.01, "grad_norm": 3.3571126461029053, "learning_rate": 2e-05, "loss": 0.05177777, "step": 11005 }, { "epoch": 22.012, "grad_norm": 1.587371826171875, "learning_rate": 2e-05, "loss": 0.05374225, "step": 11006 }, { "epoch": 22.014, "grad_norm": 1.3989698886871338, "learning_rate": 2e-05, "loss": 0.0425331, "step": 11007 }, { "epoch": 22.016, "grad_norm": 1.620147705078125, "learning_rate": 2e-05, "loss": 0.05342864, "step": 11008 }, { "epoch": 22.018, "grad_norm": 1.040038824081421, "learning_rate": 2e-05, "loss": 0.04247821, "step": 11009 }, { "epoch": 22.02, "grad_norm": 1.378523588180542, "learning_rate": 2e-05, "loss": 0.05350643, "step": 11010 }, { "epoch": 22.022, "grad_norm": 0.9320111274719238, "learning_rate": 2e-05, "loss": 0.03401447, "step": 11011 }, { "epoch": 22.024, "grad_norm": 0.9269822835922241, "learning_rate": 2e-05, "loss": 0.04305762, "step": 11012 }, { "epoch": 22.026, "grad_norm": 1.6538724899291992, "learning_rate": 2e-05, "loss": 0.05331837, "step": 11013 }, { "epoch": 22.028, "grad_norm": 1.2550455331802368, "learning_rate": 2e-05, "loss": 0.04774629, "step": 11014 }, { "epoch": 22.03, "grad_norm": 1.8850221633911133, "learning_rate": 2e-05, "loss": 0.04961438, "step": 11015 }, { "epoch": 22.032, "grad_norm": 0.9145897626876831, "learning_rate": 2e-05, "loss": 0.03717394, "step": 11016 }, { "epoch": 22.034, "grad_norm": 1.0213181972503662, "learning_rate": 2e-05, "loss": 0.03861319, "step": 11017 }, { "epoch": 22.036, "grad_norm": 1.159011960029602, "learning_rate": 2e-05, "loss": 0.03287625, "step": 11018 }, { "epoch": 22.038, "grad_norm": 1.1738529205322266, "learning_rate": 2e-05, "loss": 0.03955688, "step": 11019 }, { "epoch": 22.04, "grad_norm": 1.6557778120040894, "learning_rate": 2e-05, "loss": 0.04527711, "step": 11020 }, { "epoch": 22.042, "grad_norm": 1.2668328285217285, "learning_rate": 2e-05, "loss": 0.06192822, "step": 11021 }, { "epoch": 22.044, "grad_norm": 1.0749953985214233, "learning_rate": 2e-05, "loss": 0.0389572, "step": 11022 }, { "epoch": 22.046, "grad_norm": 1.850034475326538, "learning_rate": 2e-05, "loss": 0.05764248, "step": 11023 }, { "epoch": 22.048, "grad_norm": 1.4813024997711182, "learning_rate": 2e-05, "loss": 0.06234466, "step": 11024 }, { "epoch": 22.05, "grad_norm": 0.9873860478401184, "learning_rate": 2e-05, "loss": 0.04163551, "step": 11025 }, { "epoch": 22.052, "grad_norm": 1.1059823036193848, "learning_rate": 2e-05, "loss": 0.04430852, "step": 11026 }, { "epoch": 22.054, "grad_norm": 1.6120470762252808, "learning_rate": 2e-05, "loss": 0.05113713, "step": 11027 }, { "epoch": 22.056, "grad_norm": 2.478240728378296, "learning_rate": 2e-05, "loss": 0.05589008, "step": 11028 }, { "epoch": 22.058, "grad_norm": 1.3711963891983032, "learning_rate": 2e-05, "loss": 0.05275831, "step": 11029 }, { "epoch": 22.06, "grad_norm": 1.391385793685913, "learning_rate": 2e-05, "loss": 0.05214929, "step": 11030 }, { "epoch": 22.062, "grad_norm": 1.1093858480453491, "learning_rate": 2e-05, "loss": 0.03810849, "step": 11031 }, { "epoch": 22.064, "grad_norm": 2.73942494392395, "learning_rate": 2e-05, "loss": 0.05958373, "step": 11032 }, { "epoch": 22.066, "grad_norm": 1.4206058979034424, "learning_rate": 2e-05, "loss": 0.04502981, "step": 11033 }, { "epoch": 22.068, "grad_norm": 1.3584966659545898, "learning_rate": 2e-05, "loss": 0.04773968, "step": 11034 }, { "epoch": 22.07, "grad_norm": 1.1971324682235718, "learning_rate": 2e-05, "loss": 0.0451732, "step": 11035 }, { "epoch": 22.072, "grad_norm": 1.4459877014160156, "learning_rate": 2e-05, "loss": 0.05190061, "step": 11036 }, { "epoch": 22.074, "grad_norm": 3.07961106300354, "learning_rate": 2e-05, "loss": 0.07771592, "step": 11037 }, { "epoch": 22.076, "grad_norm": 1.0631959438323975, "learning_rate": 2e-05, "loss": 0.03867687, "step": 11038 }, { "epoch": 22.078, "grad_norm": 1.6710801124572754, "learning_rate": 2e-05, "loss": 0.04370876, "step": 11039 }, { "epoch": 22.08, "grad_norm": 1.7581576108932495, "learning_rate": 2e-05, "loss": 0.05494396, "step": 11040 }, { "epoch": 22.082, "grad_norm": 1.2223470211029053, "learning_rate": 2e-05, "loss": 0.05481771, "step": 11041 }, { "epoch": 22.084, "grad_norm": 1.671237587928772, "learning_rate": 2e-05, "loss": 0.04445202, "step": 11042 }, { "epoch": 22.086, "grad_norm": 0.9967159032821655, "learning_rate": 2e-05, "loss": 0.04678237, "step": 11043 }, { "epoch": 22.088, "grad_norm": 1.241431474685669, "learning_rate": 2e-05, "loss": 0.05459955, "step": 11044 }, { "epoch": 22.09, "grad_norm": 1.017122507095337, "learning_rate": 2e-05, "loss": 0.02829801, "step": 11045 }, { "epoch": 22.092, "grad_norm": 1.6178524494171143, "learning_rate": 2e-05, "loss": 0.04392669, "step": 11046 }, { "epoch": 22.094, "grad_norm": 1.0610991716384888, "learning_rate": 2e-05, "loss": 0.037874, "step": 11047 }, { "epoch": 22.096, "grad_norm": 1.202981948852539, "learning_rate": 2e-05, "loss": 0.06172846, "step": 11048 }, { "epoch": 22.098, "grad_norm": 0.9979387521743774, "learning_rate": 2e-05, "loss": 0.04054443, "step": 11049 }, { "epoch": 22.1, "grad_norm": 1.689109206199646, "learning_rate": 2e-05, "loss": 0.05622686, "step": 11050 }, { "epoch": 22.102, "grad_norm": 1.26865553855896, "learning_rate": 2e-05, "loss": 0.04523615, "step": 11051 }, { "epoch": 22.104, "grad_norm": 0.9446829557418823, "learning_rate": 2e-05, "loss": 0.03537866, "step": 11052 }, { "epoch": 22.106, "grad_norm": 1.5860592126846313, "learning_rate": 2e-05, "loss": 0.03632271, "step": 11053 }, { "epoch": 22.108, "grad_norm": 1.5904357433319092, "learning_rate": 2e-05, "loss": 0.06361252, "step": 11054 }, { "epoch": 22.11, "grad_norm": 1.2690680027008057, "learning_rate": 2e-05, "loss": 0.04704365, "step": 11055 }, { "epoch": 22.112, "grad_norm": 0.9691974520683289, "learning_rate": 2e-05, "loss": 0.03102996, "step": 11056 }, { "epoch": 22.114, "grad_norm": 1.0802628993988037, "learning_rate": 2e-05, "loss": 0.03536987, "step": 11057 }, { "epoch": 22.116, "grad_norm": 2.7259693145751953, "learning_rate": 2e-05, "loss": 0.06912223, "step": 11058 }, { "epoch": 22.118, "grad_norm": 1.0998468399047852, "learning_rate": 2e-05, "loss": 0.04484056, "step": 11059 }, { "epoch": 22.12, "grad_norm": 3.512763261795044, "learning_rate": 2e-05, "loss": 0.05085948, "step": 11060 }, { "epoch": 22.122, "grad_norm": 1.017800211906433, "learning_rate": 2e-05, "loss": 0.05098267, "step": 11061 }, { "epoch": 22.124, "grad_norm": 0.9071416854858398, "learning_rate": 2e-05, "loss": 0.02892078, "step": 11062 }, { "epoch": 22.126, "grad_norm": 1.695021629333496, "learning_rate": 2e-05, "loss": 0.06086297, "step": 11063 }, { "epoch": 22.128, "grad_norm": 1.6080007553100586, "learning_rate": 2e-05, "loss": 0.05113797, "step": 11064 }, { "epoch": 22.13, "grad_norm": 1.7706447839736938, "learning_rate": 2e-05, "loss": 0.04474971, "step": 11065 }, { "epoch": 22.132, "grad_norm": 2.455507755279541, "learning_rate": 2e-05, "loss": 0.05328702, "step": 11066 }, { "epoch": 22.134, "grad_norm": 1.513025164604187, "learning_rate": 2e-05, "loss": 0.05276646, "step": 11067 }, { "epoch": 22.136, "grad_norm": 3.423887252807617, "learning_rate": 2e-05, "loss": 0.04866498, "step": 11068 }, { "epoch": 22.138, "grad_norm": 1.0251408815383911, "learning_rate": 2e-05, "loss": 0.03739586, "step": 11069 }, { "epoch": 22.14, "grad_norm": 1.5795085430145264, "learning_rate": 2e-05, "loss": 0.06240051, "step": 11070 }, { "epoch": 22.142, "grad_norm": 1.9535906314849854, "learning_rate": 2e-05, "loss": 0.0656325, "step": 11071 }, { "epoch": 22.144, "grad_norm": 1.7015113830566406, "learning_rate": 2e-05, "loss": 0.04866337, "step": 11072 }, { "epoch": 22.146, "grad_norm": 0.8736777901649475, "learning_rate": 2e-05, "loss": 0.03521525, "step": 11073 }, { "epoch": 22.148, "grad_norm": 1.3966928720474243, "learning_rate": 2e-05, "loss": 0.04495484, "step": 11074 }, { "epoch": 22.15, "grad_norm": 1.309707760810852, "learning_rate": 2e-05, "loss": 0.05940238, "step": 11075 }, { "epoch": 22.152, "grad_norm": 1.0975288152694702, "learning_rate": 2e-05, "loss": 0.04613318, "step": 11076 }, { "epoch": 22.154, "grad_norm": 2.721820592880249, "learning_rate": 2e-05, "loss": 0.04357797, "step": 11077 }, { "epoch": 22.156, "grad_norm": 2.6884758472442627, "learning_rate": 2e-05, "loss": 0.05115152, "step": 11078 }, { "epoch": 22.158, "grad_norm": 2.625033378601074, "learning_rate": 2e-05, "loss": 0.07806052, "step": 11079 }, { "epoch": 22.16, "grad_norm": 1.2966443300247192, "learning_rate": 2e-05, "loss": 0.04672267, "step": 11080 }, { "epoch": 22.162, "grad_norm": 1.6097663640975952, "learning_rate": 2e-05, "loss": 0.04457952, "step": 11081 }, { "epoch": 22.164, "grad_norm": 1.030554175376892, "learning_rate": 2e-05, "loss": 0.04810381, "step": 11082 }, { "epoch": 22.166, "grad_norm": 1.702781319618225, "learning_rate": 2e-05, "loss": 0.04801179, "step": 11083 }, { "epoch": 22.168, "grad_norm": 1.03678297996521, "learning_rate": 2e-05, "loss": 0.05808982, "step": 11084 }, { "epoch": 22.17, "grad_norm": 0.8880029320716858, "learning_rate": 2e-05, "loss": 0.02563297, "step": 11085 }, { "epoch": 22.172, "grad_norm": 1.8329299688339233, "learning_rate": 2e-05, "loss": 0.05821917, "step": 11086 }, { "epoch": 22.174, "grad_norm": 1.047378659248352, "learning_rate": 2e-05, "loss": 0.03271423, "step": 11087 }, { "epoch": 22.176, "grad_norm": 1.2597047090530396, "learning_rate": 2e-05, "loss": 0.04456035, "step": 11088 }, { "epoch": 22.178, "grad_norm": 1.36540949344635, "learning_rate": 2e-05, "loss": 0.05128956, "step": 11089 }, { "epoch": 22.18, "grad_norm": 1.0094643831253052, "learning_rate": 2e-05, "loss": 0.04086157, "step": 11090 }, { "epoch": 22.182, "grad_norm": 1.1568093299865723, "learning_rate": 2e-05, "loss": 0.04216065, "step": 11091 }, { "epoch": 22.184, "grad_norm": 1.5784229040145874, "learning_rate": 2e-05, "loss": 0.04853484, "step": 11092 }, { "epoch": 22.186, "grad_norm": 1.3022431135177612, "learning_rate": 2e-05, "loss": 0.05129544, "step": 11093 }, { "epoch": 22.188, "grad_norm": 1.2289499044418335, "learning_rate": 2e-05, "loss": 0.04040147, "step": 11094 }, { "epoch": 22.19, "grad_norm": 1.2581167221069336, "learning_rate": 2e-05, "loss": 0.04087251, "step": 11095 }, { "epoch": 22.192, "grad_norm": 1.4437354803085327, "learning_rate": 2e-05, "loss": 0.03741935, "step": 11096 }, { "epoch": 22.194, "grad_norm": 0.9804332852363586, "learning_rate": 2e-05, "loss": 0.04441322, "step": 11097 }, { "epoch": 22.196, "grad_norm": 1.0826776027679443, "learning_rate": 2e-05, "loss": 0.04588358, "step": 11098 }, { "epoch": 22.198, "grad_norm": 1.3598271608352661, "learning_rate": 2e-05, "loss": 0.03236514, "step": 11099 }, { "epoch": 22.2, "grad_norm": 1.1172486543655396, "learning_rate": 2e-05, "loss": 0.03859956, "step": 11100 }, { "epoch": 22.202, "grad_norm": 1.064820408821106, "learning_rate": 2e-05, "loss": 0.0424341, "step": 11101 }, { "epoch": 22.204, "grad_norm": 1.3667649030685425, "learning_rate": 2e-05, "loss": 0.06930325, "step": 11102 }, { "epoch": 22.206, "grad_norm": 1.2815967798233032, "learning_rate": 2e-05, "loss": 0.03202675, "step": 11103 }, { "epoch": 22.208, "grad_norm": 1.3072644472122192, "learning_rate": 2e-05, "loss": 0.06806996, "step": 11104 }, { "epoch": 22.21, "grad_norm": 1.215680718421936, "learning_rate": 2e-05, "loss": 0.05417985, "step": 11105 }, { "epoch": 22.212, "grad_norm": 2.0547776222229004, "learning_rate": 2e-05, "loss": 0.05925012, "step": 11106 }, { "epoch": 22.214, "grad_norm": 1.4285054206848145, "learning_rate": 2e-05, "loss": 0.04930936, "step": 11107 }, { "epoch": 22.216, "grad_norm": 1.4046630859375, "learning_rate": 2e-05, "loss": 0.05431837, "step": 11108 }, { "epoch": 22.218, "grad_norm": 1.1555949449539185, "learning_rate": 2e-05, "loss": 0.05411812, "step": 11109 }, { "epoch": 22.22, "grad_norm": 1.4010975360870361, "learning_rate": 2e-05, "loss": 0.04974956, "step": 11110 }, { "epoch": 22.222, "grad_norm": 1.4039974212646484, "learning_rate": 2e-05, "loss": 0.06029295, "step": 11111 }, { "epoch": 22.224, "grad_norm": 0.8453831076622009, "learning_rate": 2e-05, "loss": 0.03084135, "step": 11112 }, { "epoch": 22.226, "grad_norm": 1.0024372339248657, "learning_rate": 2e-05, "loss": 0.03826562, "step": 11113 }, { "epoch": 22.228, "grad_norm": 1.1104563474655151, "learning_rate": 2e-05, "loss": 0.0350478, "step": 11114 }, { "epoch": 22.23, "grad_norm": 1.2982219457626343, "learning_rate": 2e-05, "loss": 0.05836608, "step": 11115 }, { "epoch": 22.232, "grad_norm": 1.3631715774536133, "learning_rate": 2e-05, "loss": 0.0533788, "step": 11116 }, { "epoch": 22.234, "grad_norm": 1.2830848693847656, "learning_rate": 2e-05, "loss": 0.06203049, "step": 11117 }, { "epoch": 22.236, "grad_norm": 1.7681833505630493, "learning_rate": 2e-05, "loss": 0.03540882, "step": 11118 }, { "epoch": 22.238, "grad_norm": 1.1788599491119385, "learning_rate": 2e-05, "loss": 0.04617755, "step": 11119 }, { "epoch": 22.24, "grad_norm": 0.7942465543746948, "learning_rate": 2e-05, "loss": 0.02479826, "step": 11120 }, { "epoch": 22.242, "grad_norm": 1.8044131994247437, "learning_rate": 2e-05, "loss": 0.05589304, "step": 11121 }, { "epoch": 22.244, "grad_norm": 1.6039032936096191, "learning_rate": 2e-05, "loss": 0.05433145, "step": 11122 }, { "epoch": 22.246, "grad_norm": 1.1072659492492676, "learning_rate": 2e-05, "loss": 0.05166779, "step": 11123 }, { "epoch": 22.248, "grad_norm": 2.0531651973724365, "learning_rate": 2e-05, "loss": 0.04209354, "step": 11124 }, { "epoch": 22.25, "grad_norm": 1.3364015817642212, "learning_rate": 2e-05, "loss": 0.04050201, "step": 11125 }, { "epoch": 22.252, "grad_norm": 1.7827415466308594, "learning_rate": 2e-05, "loss": 0.04026122, "step": 11126 }, { "epoch": 22.254, "grad_norm": 1.2651749849319458, "learning_rate": 2e-05, "loss": 0.06153499, "step": 11127 }, { "epoch": 22.256, "grad_norm": 1.4194684028625488, "learning_rate": 2e-05, "loss": 0.05961439, "step": 11128 }, { "epoch": 22.258, "grad_norm": 1.597408413887024, "learning_rate": 2e-05, "loss": 0.05282076, "step": 11129 }, { "epoch": 22.26, "grad_norm": 2.9087939262390137, "learning_rate": 2e-05, "loss": 0.05277985, "step": 11130 }, { "epoch": 22.262, "grad_norm": 1.388075351715088, "learning_rate": 2e-05, "loss": 0.05548573, "step": 11131 }, { "epoch": 22.264, "grad_norm": 1.0936086177825928, "learning_rate": 2e-05, "loss": 0.04672196, "step": 11132 }, { "epoch": 22.266, "grad_norm": 1.4347175359725952, "learning_rate": 2e-05, "loss": 0.06305401, "step": 11133 }, { "epoch": 22.268, "grad_norm": 1.683622121810913, "learning_rate": 2e-05, "loss": 0.04272688, "step": 11134 }, { "epoch": 22.27, "grad_norm": 1.3902970552444458, "learning_rate": 2e-05, "loss": 0.03159359, "step": 11135 }, { "epoch": 22.272, "grad_norm": 1.1875675916671753, "learning_rate": 2e-05, "loss": 0.04473014, "step": 11136 }, { "epoch": 22.274, "grad_norm": 1.2046570777893066, "learning_rate": 2e-05, "loss": 0.04370328, "step": 11137 }, { "epoch": 22.276, "grad_norm": 1.563090443611145, "learning_rate": 2e-05, "loss": 0.04196423, "step": 11138 }, { "epoch": 22.278, "grad_norm": 1.1478464603424072, "learning_rate": 2e-05, "loss": 0.04743166, "step": 11139 }, { "epoch": 22.28, "grad_norm": 1.5553689002990723, "learning_rate": 2e-05, "loss": 0.03396824, "step": 11140 }, { "epoch": 22.282, "grad_norm": 1.1259592771530151, "learning_rate": 2e-05, "loss": 0.04865803, "step": 11141 }, { "epoch": 22.284, "grad_norm": 1.0936449766159058, "learning_rate": 2e-05, "loss": 0.04534347, "step": 11142 }, { "epoch": 22.286, "grad_norm": 1.8274104595184326, "learning_rate": 2e-05, "loss": 0.0487612, "step": 11143 }, { "epoch": 22.288, "grad_norm": 1.5041760206222534, "learning_rate": 2e-05, "loss": 0.04928614, "step": 11144 }, { "epoch": 22.29, "grad_norm": 1.0196545124053955, "learning_rate": 2e-05, "loss": 0.04669318, "step": 11145 }, { "epoch": 22.292, "grad_norm": 1.218065857887268, "learning_rate": 2e-05, "loss": 0.0528445, "step": 11146 }, { "epoch": 22.294, "grad_norm": 2.150017499923706, "learning_rate": 2e-05, "loss": 0.04658528, "step": 11147 }, { "epoch": 22.296, "grad_norm": 1.1517325639724731, "learning_rate": 2e-05, "loss": 0.03993255, "step": 11148 }, { "epoch": 22.298, "grad_norm": 1.0617889165878296, "learning_rate": 2e-05, "loss": 0.05029877, "step": 11149 }, { "epoch": 22.3, "grad_norm": 1.3276013135910034, "learning_rate": 2e-05, "loss": 0.047178, "step": 11150 }, { "epoch": 22.302, "grad_norm": 1.0658388137817383, "learning_rate": 2e-05, "loss": 0.04065624, "step": 11151 }, { "epoch": 22.304, "grad_norm": 2.4880316257476807, "learning_rate": 2e-05, "loss": 0.05593013, "step": 11152 }, { "epoch": 22.306, "grad_norm": 1.0430607795715332, "learning_rate": 2e-05, "loss": 0.04028732, "step": 11153 }, { "epoch": 22.308, "grad_norm": 2.278242826461792, "learning_rate": 2e-05, "loss": 0.04597948, "step": 11154 }, { "epoch": 22.31, "grad_norm": 0.884898841381073, "learning_rate": 2e-05, "loss": 0.03191692, "step": 11155 }, { "epoch": 22.312, "grad_norm": 1.5687628984451294, "learning_rate": 2e-05, "loss": 0.04811794, "step": 11156 }, { "epoch": 22.314, "grad_norm": 0.8942727446556091, "learning_rate": 2e-05, "loss": 0.02991883, "step": 11157 }, { "epoch": 22.316, "grad_norm": 1.0042197704315186, "learning_rate": 2e-05, "loss": 0.03692505, "step": 11158 }, { "epoch": 22.318, "grad_norm": 1.1697272062301636, "learning_rate": 2e-05, "loss": 0.0426156, "step": 11159 }, { "epoch": 22.32, "grad_norm": 1.189145803451538, "learning_rate": 2e-05, "loss": 0.04834644, "step": 11160 }, { "epoch": 22.322, "grad_norm": 1.0281339883804321, "learning_rate": 2e-05, "loss": 0.04019223, "step": 11161 }, { "epoch": 22.324, "grad_norm": 0.9581253528594971, "learning_rate": 2e-05, "loss": 0.03984208, "step": 11162 }, { "epoch": 22.326, "grad_norm": 1.1453323364257812, "learning_rate": 2e-05, "loss": 0.04484681, "step": 11163 }, { "epoch": 22.328, "grad_norm": 2.0202462673187256, "learning_rate": 2e-05, "loss": 0.04616878, "step": 11164 }, { "epoch": 22.33, "grad_norm": 1.2088522911071777, "learning_rate": 2e-05, "loss": 0.04525774, "step": 11165 }, { "epoch": 22.332, "grad_norm": 4.262028217315674, "learning_rate": 2e-05, "loss": 0.06251769, "step": 11166 }, { "epoch": 22.334, "grad_norm": 1.0222514867782593, "learning_rate": 2e-05, "loss": 0.04027661, "step": 11167 }, { "epoch": 22.336, "grad_norm": 1.2390207052230835, "learning_rate": 2e-05, "loss": 0.0467163, "step": 11168 }, { "epoch": 22.338, "grad_norm": 1.3372920751571655, "learning_rate": 2e-05, "loss": 0.05568913, "step": 11169 }, { "epoch": 22.34, "grad_norm": 0.9658917188644409, "learning_rate": 2e-05, "loss": 0.03143013, "step": 11170 }, { "epoch": 22.342, "grad_norm": 1.0128836631774902, "learning_rate": 2e-05, "loss": 0.04203345, "step": 11171 }, { "epoch": 22.344, "grad_norm": 1.9480278491973877, "learning_rate": 2e-05, "loss": 0.03550385, "step": 11172 }, { "epoch": 22.346, "grad_norm": 1.201077938079834, "learning_rate": 2e-05, "loss": 0.05699923, "step": 11173 }, { "epoch": 22.348, "grad_norm": 1.6389007568359375, "learning_rate": 2e-05, "loss": 0.04556689, "step": 11174 }, { "epoch": 22.35, "grad_norm": 1.007587194442749, "learning_rate": 2e-05, "loss": 0.04380955, "step": 11175 }, { "epoch": 22.352, "grad_norm": 1.4060540199279785, "learning_rate": 2e-05, "loss": 0.06061304, "step": 11176 }, { "epoch": 22.354, "grad_norm": 1.0752365589141846, "learning_rate": 2e-05, "loss": 0.04984618, "step": 11177 }, { "epoch": 22.356, "grad_norm": 1.4179226160049438, "learning_rate": 2e-05, "loss": 0.05122635, "step": 11178 }, { "epoch": 22.358, "grad_norm": 1.2161222696304321, "learning_rate": 2e-05, "loss": 0.04443014, "step": 11179 }, { "epoch": 22.36, "grad_norm": 1.0068039894104004, "learning_rate": 2e-05, "loss": 0.04299643, "step": 11180 }, { "epoch": 22.362, "grad_norm": 1.4668631553649902, "learning_rate": 2e-05, "loss": 0.05090029, "step": 11181 }, { "epoch": 22.364, "grad_norm": 1.4759671688079834, "learning_rate": 2e-05, "loss": 0.04892571, "step": 11182 }, { "epoch": 22.366, "grad_norm": 1.1552584171295166, "learning_rate": 2e-05, "loss": 0.04284762, "step": 11183 }, { "epoch": 22.368, "grad_norm": 1.4301639795303345, "learning_rate": 2e-05, "loss": 0.04588412, "step": 11184 }, { "epoch": 22.37, "grad_norm": 0.8851962089538574, "learning_rate": 2e-05, "loss": 0.03863471, "step": 11185 }, { "epoch": 22.372, "grad_norm": 0.9694459438323975, "learning_rate": 2e-05, "loss": 0.03979157, "step": 11186 }, { "epoch": 22.374, "grad_norm": 1.5153483152389526, "learning_rate": 2e-05, "loss": 0.03654516, "step": 11187 }, { "epoch": 22.376, "grad_norm": 1.5796185731887817, "learning_rate": 2e-05, "loss": 0.04834542, "step": 11188 }, { "epoch": 22.378, "grad_norm": 1.2904592752456665, "learning_rate": 2e-05, "loss": 0.05003019, "step": 11189 }, { "epoch": 22.38, "grad_norm": 1.4257230758666992, "learning_rate": 2e-05, "loss": 0.0520186, "step": 11190 }, { "epoch": 22.382, "grad_norm": 1.1525768041610718, "learning_rate": 2e-05, "loss": 0.0521295, "step": 11191 }, { "epoch": 22.384, "grad_norm": 1.2500125169754028, "learning_rate": 2e-05, "loss": 0.05156438, "step": 11192 }, { "epoch": 22.386, "grad_norm": 1.0971205234527588, "learning_rate": 2e-05, "loss": 0.04962741, "step": 11193 }, { "epoch": 22.388, "grad_norm": 1.2623246908187866, "learning_rate": 2e-05, "loss": 0.05229411, "step": 11194 }, { "epoch": 22.39, "grad_norm": 1.3581517934799194, "learning_rate": 2e-05, "loss": 0.04757297, "step": 11195 }, { "epoch": 22.392, "grad_norm": 1.973177194595337, "learning_rate": 2e-05, "loss": 0.0477929, "step": 11196 }, { "epoch": 22.394, "grad_norm": 1.2530797719955444, "learning_rate": 2e-05, "loss": 0.04155411, "step": 11197 }, { "epoch": 22.396, "grad_norm": 1.097910761833191, "learning_rate": 2e-05, "loss": 0.04435322, "step": 11198 }, { "epoch": 22.398, "grad_norm": 1.0793019533157349, "learning_rate": 2e-05, "loss": 0.04141264, "step": 11199 }, { "epoch": 22.4, "grad_norm": 1.178741216659546, "learning_rate": 2e-05, "loss": 0.0349354, "step": 11200 }, { "epoch": 22.402, "grad_norm": 0.7700680494308472, "learning_rate": 2e-05, "loss": 0.02276843, "step": 11201 }, { "epoch": 22.404, "grad_norm": 1.1633824110031128, "learning_rate": 2e-05, "loss": 0.0542013, "step": 11202 }, { "epoch": 22.406, "grad_norm": 0.9481919407844543, "learning_rate": 2e-05, "loss": 0.04108407, "step": 11203 }, { "epoch": 22.408, "grad_norm": 1.5173581838607788, "learning_rate": 2e-05, "loss": 0.04660044, "step": 11204 }, { "epoch": 22.41, "grad_norm": 0.9978870153427124, "learning_rate": 2e-05, "loss": 0.0420578, "step": 11205 }, { "epoch": 22.412, "grad_norm": 1.3910508155822754, "learning_rate": 2e-05, "loss": 0.05603454, "step": 11206 }, { "epoch": 22.414, "grad_norm": 1.0581121444702148, "learning_rate": 2e-05, "loss": 0.03749997, "step": 11207 }, { "epoch": 22.416, "grad_norm": 1.807260274887085, "learning_rate": 2e-05, "loss": 0.05125757, "step": 11208 }, { "epoch": 22.418, "grad_norm": 1.7451093196868896, "learning_rate": 2e-05, "loss": 0.05212605, "step": 11209 }, { "epoch": 22.42, "grad_norm": 1.1237857341766357, "learning_rate": 2e-05, "loss": 0.03593703, "step": 11210 }, { "epoch": 22.422, "grad_norm": 1.2452716827392578, "learning_rate": 2e-05, "loss": 0.04008294, "step": 11211 }, { "epoch": 22.424, "grad_norm": 1.5367351770401, "learning_rate": 2e-05, "loss": 0.03283983, "step": 11212 }, { "epoch": 22.426, "grad_norm": 1.7981481552124023, "learning_rate": 2e-05, "loss": 0.03905568, "step": 11213 }, { "epoch": 22.428, "grad_norm": 1.2273157835006714, "learning_rate": 2e-05, "loss": 0.06320807, "step": 11214 }, { "epoch": 22.43, "grad_norm": 1.124375343322754, "learning_rate": 2e-05, "loss": 0.044132, "step": 11215 }, { "epoch": 22.432, "grad_norm": 4.146836757659912, "learning_rate": 2e-05, "loss": 0.05401932, "step": 11216 }, { "epoch": 22.434, "grad_norm": 1.6984989643096924, "learning_rate": 2e-05, "loss": 0.05831388, "step": 11217 }, { "epoch": 22.436, "grad_norm": 1.3385144472122192, "learning_rate": 2e-05, "loss": 0.04252683, "step": 11218 }, { "epoch": 22.438, "grad_norm": 1.135701298713684, "learning_rate": 2e-05, "loss": 0.04262855, "step": 11219 }, { "epoch": 22.44, "grad_norm": 1.0818277597427368, "learning_rate": 2e-05, "loss": 0.04118967, "step": 11220 }, { "epoch": 22.442, "grad_norm": 1.0221068859100342, "learning_rate": 2e-05, "loss": 0.03991093, "step": 11221 }, { "epoch": 22.444, "grad_norm": 1.1591988801956177, "learning_rate": 2e-05, "loss": 0.04749956, "step": 11222 }, { "epoch": 22.446, "grad_norm": 1.4463655948638916, "learning_rate": 2e-05, "loss": 0.05898266, "step": 11223 }, { "epoch": 22.448, "grad_norm": 1.0147346258163452, "learning_rate": 2e-05, "loss": 0.03914528, "step": 11224 }, { "epoch": 22.45, "grad_norm": 2.412890911102295, "learning_rate": 2e-05, "loss": 0.06601059, "step": 11225 }, { "epoch": 22.452, "grad_norm": 1.8312084674835205, "learning_rate": 2e-05, "loss": 0.06209372, "step": 11226 }, { "epoch": 22.454, "grad_norm": 1.5064696073532104, "learning_rate": 2e-05, "loss": 0.04767104, "step": 11227 }, { "epoch": 22.456, "grad_norm": 1.5853573083877563, "learning_rate": 2e-05, "loss": 0.05212314, "step": 11228 }, { "epoch": 22.458, "grad_norm": 3.271353244781494, "learning_rate": 2e-05, "loss": 0.05404135, "step": 11229 }, { "epoch": 22.46, "grad_norm": 0.950188159942627, "learning_rate": 2e-05, "loss": 0.04307424, "step": 11230 }, { "epoch": 22.462, "grad_norm": 1.4316763877868652, "learning_rate": 2e-05, "loss": 0.03547331, "step": 11231 }, { "epoch": 22.464, "grad_norm": 1.3197733163833618, "learning_rate": 2e-05, "loss": 0.05826892, "step": 11232 }, { "epoch": 22.466, "grad_norm": 1.016535997390747, "learning_rate": 2e-05, "loss": 0.04592193, "step": 11233 }, { "epoch": 22.468, "grad_norm": 2.066204786300659, "learning_rate": 2e-05, "loss": 0.05223761, "step": 11234 }, { "epoch": 22.47, "grad_norm": 1.3129621744155884, "learning_rate": 2e-05, "loss": 0.05153164, "step": 11235 }, { "epoch": 22.472, "grad_norm": 1.5808768272399902, "learning_rate": 2e-05, "loss": 0.04139033, "step": 11236 }, { "epoch": 22.474, "grad_norm": 1.104193925857544, "learning_rate": 2e-05, "loss": 0.04869783, "step": 11237 }, { "epoch": 22.476, "grad_norm": 0.9962319135665894, "learning_rate": 2e-05, "loss": 0.03923816, "step": 11238 }, { "epoch": 22.478, "grad_norm": 0.9615497589111328, "learning_rate": 2e-05, "loss": 0.04201663, "step": 11239 }, { "epoch": 22.48, "grad_norm": 2.339108467102051, "learning_rate": 2e-05, "loss": 0.07047258, "step": 11240 }, { "epoch": 22.482, "grad_norm": 1.3772393465042114, "learning_rate": 2e-05, "loss": 0.03775259, "step": 11241 }, { "epoch": 22.484, "grad_norm": 0.8356649875640869, "learning_rate": 2e-05, "loss": 0.03486666, "step": 11242 }, { "epoch": 22.486, "grad_norm": 1.8160996437072754, "learning_rate": 2e-05, "loss": 0.04783738, "step": 11243 }, { "epoch": 22.488, "grad_norm": 1.566346287727356, "learning_rate": 2e-05, "loss": 0.04367648, "step": 11244 }, { "epoch": 22.49, "grad_norm": 1.509894847869873, "learning_rate": 2e-05, "loss": 0.04414636, "step": 11245 }, { "epoch": 22.492, "grad_norm": 1.6763086318969727, "learning_rate": 2e-05, "loss": 0.05617331, "step": 11246 }, { "epoch": 22.494, "grad_norm": 1.9927916526794434, "learning_rate": 2e-05, "loss": 0.06488167, "step": 11247 }, { "epoch": 22.496, "grad_norm": 1.1888140439987183, "learning_rate": 2e-05, "loss": 0.06056303, "step": 11248 }, { "epoch": 22.498, "grad_norm": 1.3410838842391968, "learning_rate": 2e-05, "loss": 0.03829567, "step": 11249 }, { "epoch": 22.5, "grad_norm": 2.9376325607299805, "learning_rate": 2e-05, "loss": 0.05869907, "step": 11250 }, { "epoch": 22.502, "grad_norm": 2.4430620670318604, "learning_rate": 2e-05, "loss": 0.04974097, "step": 11251 }, { "epoch": 22.504, "grad_norm": 1.0205100774765015, "learning_rate": 2e-05, "loss": 0.03839823, "step": 11252 }, { "epoch": 22.506, "grad_norm": 1.1806446313858032, "learning_rate": 2e-05, "loss": 0.03811061, "step": 11253 }, { "epoch": 22.508, "grad_norm": 1.2597897052764893, "learning_rate": 2e-05, "loss": 0.05731502, "step": 11254 }, { "epoch": 22.51, "grad_norm": 1.3902602195739746, "learning_rate": 2e-05, "loss": 0.0482579, "step": 11255 }, { "epoch": 22.512, "grad_norm": 1.2300817966461182, "learning_rate": 2e-05, "loss": 0.03932236, "step": 11256 }, { "epoch": 22.514, "grad_norm": 1.4585747718811035, "learning_rate": 2e-05, "loss": 0.05475751, "step": 11257 }, { "epoch": 22.516, "grad_norm": 1.0700310468673706, "learning_rate": 2e-05, "loss": 0.03310098, "step": 11258 }, { "epoch": 22.518, "grad_norm": 1.2809871435165405, "learning_rate": 2e-05, "loss": 0.05043205, "step": 11259 }, { "epoch": 22.52, "grad_norm": 0.992548942565918, "learning_rate": 2e-05, "loss": 0.04890532, "step": 11260 }, { "epoch": 22.522, "grad_norm": 1.677968144416809, "learning_rate": 2e-05, "loss": 0.06289562, "step": 11261 }, { "epoch": 22.524, "grad_norm": 1.2811237573623657, "learning_rate": 2e-05, "loss": 0.03953015, "step": 11262 }, { "epoch": 22.526, "grad_norm": 1.222488284111023, "learning_rate": 2e-05, "loss": 0.04750559, "step": 11263 }, { "epoch": 22.528, "grad_norm": 1.314456582069397, "learning_rate": 2e-05, "loss": 0.04124779, "step": 11264 }, { "epoch": 22.53, "grad_norm": 1.0283966064453125, "learning_rate": 2e-05, "loss": 0.04295792, "step": 11265 }, { "epoch": 22.532, "grad_norm": 1.4707634449005127, "learning_rate": 2e-05, "loss": 0.04803476, "step": 11266 }, { "epoch": 22.534, "grad_norm": 0.9261155128479004, "learning_rate": 2e-05, "loss": 0.04424895, "step": 11267 }, { "epoch": 22.536, "grad_norm": 1.6212893724441528, "learning_rate": 2e-05, "loss": 0.03988468, "step": 11268 }, { "epoch": 22.538, "grad_norm": 1.6285959482192993, "learning_rate": 2e-05, "loss": 0.04195971, "step": 11269 }, { "epoch": 22.54, "grad_norm": 1.1415143013000488, "learning_rate": 2e-05, "loss": 0.04952957, "step": 11270 }, { "epoch": 22.542, "grad_norm": 1.723099946975708, "learning_rate": 2e-05, "loss": 0.04793706, "step": 11271 }, { "epoch": 22.544, "grad_norm": 0.8945137858390808, "learning_rate": 2e-05, "loss": 0.03014116, "step": 11272 }, { "epoch": 22.546, "grad_norm": 1.2609267234802246, "learning_rate": 2e-05, "loss": 0.06327485, "step": 11273 }, { "epoch": 22.548000000000002, "grad_norm": 1.42205810546875, "learning_rate": 2e-05, "loss": 0.07144931, "step": 11274 }, { "epoch": 22.55, "grad_norm": 2.167952299118042, "learning_rate": 2e-05, "loss": 0.0404605, "step": 11275 }, { "epoch": 22.552, "grad_norm": 2.3600051403045654, "learning_rate": 2e-05, "loss": 0.0627309, "step": 11276 }, { "epoch": 22.554, "grad_norm": 1.2906250953674316, "learning_rate": 2e-05, "loss": 0.04399919, "step": 11277 }, { "epoch": 22.556, "grad_norm": 2.2019996643066406, "learning_rate": 2e-05, "loss": 0.04619977, "step": 11278 }, { "epoch": 22.558, "grad_norm": 0.883650004863739, "learning_rate": 2e-05, "loss": 0.03224398, "step": 11279 }, { "epoch": 22.56, "grad_norm": 1.681480050086975, "learning_rate": 2e-05, "loss": 0.0416001, "step": 11280 }, { "epoch": 22.562, "grad_norm": 0.9726937413215637, "learning_rate": 2e-05, "loss": 0.03095537, "step": 11281 }, { "epoch": 22.564, "grad_norm": 5.695174694061279, "learning_rate": 2e-05, "loss": 0.05156635, "step": 11282 }, { "epoch": 22.566, "grad_norm": 1.8662149906158447, "learning_rate": 2e-05, "loss": 0.06729166, "step": 11283 }, { "epoch": 22.568, "grad_norm": 1.7864165306091309, "learning_rate": 2e-05, "loss": 0.04472494, "step": 11284 }, { "epoch": 22.57, "grad_norm": 1.8035988807678223, "learning_rate": 2e-05, "loss": 0.06171039, "step": 11285 }, { "epoch": 22.572, "grad_norm": 1.1095938682556152, "learning_rate": 2e-05, "loss": 0.0470986, "step": 11286 }, { "epoch": 22.574, "grad_norm": 1.7826234102249146, "learning_rate": 2e-05, "loss": 0.05883911, "step": 11287 }, { "epoch": 22.576, "grad_norm": 1.5495597124099731, "learning_rate": 2e-05, "loss": 0.05044777, "step": 11288 }, { "epoch": 22.578, "grad_norm": 1.5816668272018433, "learning_rate": 2e-05, "loss": 0.0499214, "step": 11289 }, { "epoch": 22.58, "grad_norm": 1.217455506324768, "learning_rate": 2e-05, "loss": 0.0448308, "step": 11290 }, { "epoch": 22.582, "grad_norm": 1.0450185537338257, "learning_rate": 2e-05, "loss": 0.05334631, "step": 11291 }, { "epoch": 22.584, "grad_norm": 1.3096561431884766, "learning_rate": 2e-05, "loss": 0.04120782, "step": 11292 }, { "epoch": 22.586, "grad_norm": 1.7983711957931519, "learning_rate": 2e-05, "loss": 0.07007837, "step": 11293 }, { "epoch": 22.588, "grad_norm": 2.4795901775360107, "learning_rate": 2e-05, "loss": 0.05331931, "step": 11294 }, { "epoch": 22.59, "grad_norm": 1.3445086479187012, "learning_rate": 2e-05, "loss": 0.05260248, "step": 11295 }, { "epoch": 22.592, "grad_norm": 1.2150129079818726, "learning_rate": 2e-05, "loss": 0.04383817, "step": 11296 }, { "epoch": 22.594, "grad_norm": 2.317598342895508, "learning_rate": 2e-05, "loss": 0.045393, "step": 11297 }, { "epoch": 22.596, "grad_norm": 1.7175372838974, "learning_rate": 2e-05, "loss": 0.05940254, "step": 11298 }, { "epoch": 22.598, "grad_norm": 1.3424409627914429, "learning_rate": 2e-05, "loss": 0.05914028, "step": 11299 }, { "epoch": 22.6, "grad_norm": 0.9762082695960999, "learning_rate": 2e-05, "loss": 0.03965374, "step": 11300 }, { "epoch": 22.602, "grad_norm": 2.227386951446533, "learning_rate": 2e-05, "loss": 0.05952931, "step": 11301 }, { "epoch": 22.604, "grad_norm": 2.2554845809936523, "learning_rate": 2e-05, "loss": 0.05298226, "step": 11302 }, { "epoch": 22.606, "grad_norm": 1.5867938995361328, "learning_rate": 2e-05, "loss": 0.0593065, "step": 11303 }, { "epoch": 22.608, "grad_norm": 1.2409868240356445, "learning_rate": 2e-05, "loss": 0.0532985, "step": 11304 }, { "epoch": 22.61, "grad_norm": 1.1701220273971558, "learning_rate": 2e-05, "loss": 0.04675061, "step": 11305 }, { "epoch": 22.612, "grad_norm": 1.4538002014160156, "learning_rate": 2e-05, "loss": 0.03411378, "step": 11306 }, { "epoch": 22.614, "grad_norm": 1.265302300453186, "learning_rate": 2e-05, "loss": 0.04163972, "step": 11307 }, { "epoch": 22.616, "grad_norm": 1.2093217372894287, "learning_rate": 2e-05, "loss": 0.04240926, "step": 11308 }, { "epoch": 22.618, "grad_norm": 0.9568122029304504, "learning_rate": 2e-05, "loss": 0.03643463, "step": 11309 }, { "epoch": 22.62, "grad_norm": 1.7461224794387817, "learning_rate": 2e-05, "loss": 0.06344056, "step": 11310 }, { "epoch": 22.622, "grad_norm": 0.8804039359092712, "learning_rate": 2e-05, "loss": 0.03555988, "step": 11311 }, { "epoch": 22.624, "grad_norm": 1.1765679121017456, "learning_rate": 2e-05, "loss": 0.04863975, "step": 11312 }, { "epoch": 22.626, "grad_norm": 1.5991400480270386, "learning_rate": 2e-05, "loss": 0.0656317, "step": 11313 }, { "epoch": 22.628, "grad_norm": 0.9540390372276306, "learning_rate": 2e-05, "loss": 0.04416461, "step": 11314 }, { "epoch": 22.63, "grad_norm": 2.332988977432251, "learning_rate": 2e-05, "loss": 0.0473526, "step": 11315 }, { "epoch": 22.632, "grad_norm": 1.6032054424285889, "learning_rate": 2e-05, "loss": 0.0433956, "step": 11316 }, { "epoch": 22.634, "grad_norm": 1.5610928535461426, "learning_rate": 2e-05, "loss": 0.05073221, "step": 11317 }, { "epoch": 22.636, "grad_norm": 1.028322696685791, "learning_rate": 2e-05, "loss": 0.04589737, "step": 11318 }, { "epoch": 22.638, "grad_norm": 1.856195330619812, "learning_rate": 2e-05, "loss": 0.05912708, "step": 11319 }, { "epoch": 22.64, "grad_norm": 1.0746749639511108, "learning_rate": 2e-05, "loss": 0.04217374, "step": 11320 }, { "epoch": 22.642, "grad_norm": 1.9546751976013184, "learning_rate": 2e-05, "loss": 0.03752425, "step": 11321 }, { "epoch": 22.644, "grad_norm": 0.8943085074424744, "learning_rate": 2e-05, "loss": 0.04021142, "step": 11322 }, { "epoch": 22.646, "grad_norm": 1.4297624826431274, "learning_rate": 2e-05, "loss": 0.0436245, "step": 11323 }, { "epoch": 22.648, "grad_norm": 0.9395558834075928, "learning_rate": 2e-05, "loss": 0.03864255, "step": 11324 }, { "epoch": 22.65, "grad_norm": 1.1229286193847656, "learning_rate": 2e-05, "loss": 0.04691697, "step": 11325 }, { "epoch": 22.652, "grad_norm": 2.0542104244232178, "learning_rate": 2e-05, "loss": 0.0425556, "step": 11326 }, { "epoch": 22.654, "grad_norm": 1.3759492635726929, "learning_rate": 2e-05, "loss": 0.05112073, "step": 11327 }, { "epoch": 22.656, "grad_norm": 1.428395390510559, "learning_rate": 2e-05, "loss": 0.05420773, "step": 11328 }, { "epoch": 22.658, "grad_norm": 3.3277676105499268, "learning_rate": 2e-05, "loss": 0.06010792, "step": 11329 }, { "epoch": 22.66, "grad_norm": 1.1325587034225464, "learning_rate": 2e-05, "loss": 0.05574021, "step": 11330 }, { "epoch": 22.662, "grad_norm": 2.413510322570801, "learning_rate": 2e-05, "loss": 0.04270217, "step": 11331 }, { "epoch": 22.664, "grad_norm": 1.581922173500061, "learning_rate": 2e-05, "loss": 0.04196506, "step": 11332 }, { "epoch": 22.666, "grad_norm": 1.715911865234375, "learning_rate": 2e-05, "loss": 0.0484398, "step": 11333 }, { "epoch": 22.668, "grad_norm": 1.3386316299438477, "learning_rate": 2e-05, "loss": 0.04054473, "step": 11334 }, { "epoch": 22.67, "grad_norm": 1.3010609149932861, "learning_rate": 2e-05, "loss": 0.05318974, "step": 11335 }, { "epoch": 22.672, "grad_norm": 1.148655891418457, "learning_rate": 2e-05, "loss": 0.05669343, "step": 11336 }, { "epoch": 22.674, "grad_norm": 1.2497706413269043, "learning_rate": 2e-05, "loss": 0.03447173, "step": 11337 }, { "epoch": 22.676, "grad_norm": 1.5564143657684326, "learning_rate": 2e-05, "loss": 0.05818712, "step": 11338 }, { "epoch": 22.678, "grad_norm": 1.333359956741333, "learning_rate": 2e-05, "loss": 0.06120655, "step": 11339 }, { "epoch": 22.68, "grad_norm": 1.028952956199646, "learning_rate": 2e-05, "loss": 0.03525584, "step": 11340 }, { "epoch": 22.682, "grad_norm": 1.0384706258773804, "learning_rate": 2e-05, "loss": 0.05063495, "step": 11341 }, { "epoch": 22.684, "grad_norm": 1.161934494972229, "learning_rate": 2e-05, "loss": 0.04656558, "step": 11342 }, { "epoch": 22.686, "grad_norm": 1.9336819648742676, "learning_rate": 2e-05, "loss": 0.05889698, "step": 11343 }, { "epoch": 22.688, "grad_norm": 1.1754485368728638, "learning_rate": 2e-05, "loss": 0.04346108, "step": 11344 }, { "epoch": 22.69, "grad_norm": 1.1147347688674927, "learning_rate": 2e-05, "loss": 0.05715887, "step": 11345 }, { "epoch": 22.692, "grad_norm": 1.520220160484314, "learning_rate": 2e-05, "loss": 0.06286645, "step": 11346 }, { "epoch": 22.694, "grad_norm": 1.3758745193481445, "learning_rate": 2e-05, "loss": 0.03801577, "step": 11347 }, { "epoch": 22.696, "grad_norm": 1.4507955312728882, "learning_rate": 2e-05, "loss": 0.04826813, "step": 11348 }, { "epoch": 22.698, "grad_norm": 3.3915886878967285, "learning_rate": 2e-05, "loss": 0.05802152, "step": 11349 }, { "epoch": 22.7, "grad_norm": 1.1307430267333984, "learning_rate": 2e-05, "loss": 0.05724068, "step": 11350 }, { "epoch": 22.701999999999998, "grad_norm": 1.0051422119140625, "learning_rate": 2e-05, "loss": 0.03366059, "step": 11351 }, { "epoch": 22.704, "grad_norm": 1.6272162199020386, "learning_rate": 2e-05, "loss": 0.0499667, "step": 11352 }, { "epoch": 22.706, "grad_norm": 1.028485655784607, "learning_rate": 2e-05, "loss": 0.04679807, "step": 11353 }, { "epoch": 22.708, "grad_norm": 1.0300134420394897, "learning_rate": 2e-05, "loss": 0.04558141, "step": 11354 }, { "epoch": 22.71, "grad_norm": 1.814501404762268, "learning_rate": 2e-05, "loss": 0.06453112, "step": 11355 }, { "epoch": 22.712, "grad_norm": 0.9794653654098511, "learning_rate": 2e-05, "loss": 0.03579765, "step": 11356 }, { "epoch": 22.714, "grad_norm": 1.3388313055038452, "learning_rate": 2e-05, "loss": 0.05922037, "step": 11357 }, { "epoch": 22.716, "grad_norm": 1.2953592538833618, "learning_rate": 2e-05, "loss": 0.04218416, "step": 11358 }, { "epoch": 22.718, "grad_norm": 0.8578874468803406, "learning_rate": 2e-05, "loss": 0.03297038, "step": 11359 }, { "epoch": 22.72, "grad_norm": 1.0272828340530396, "learning_rate": 2e-05, "loss": 0.03724246, "step": 11360 }, { "epoch": 22.722, "grad_norm": 1.2599496841430664, "learning_rate": 2e-05, "loss": 0.04356437, "step": 11361 }, { "epoch": 22.724, "grad_norm": 1.1682437658309937, "learning_rate": 2e-05, "loss": 0.05652201, "step": 11362 }, { "epoch": 22.726, "grad_norm": 1.5914980173110962, "learning_rate": 2e-05, "loss": 0.0434085, "step": 11363 }, { "epoch": 22.728, "grad_norm": 1.011545181274414, "learning_rate": 2e-05, "loss": 0.02298982, "step": 11364 }, { "epoch": 22.73, "grad_norm": 1.488634467124939, "learning_rate": 2e-05, "loss": 0.04813875, "step": 11365 }, { "epoch": 22.732, "grad_norm": 1.4329646825790405, "learning_rate": 2e-05, "loss": 0.04607438, "step": 11366 }, { "epoch": 22.734, "grad_norm": 1.1745033264160156, "learning_rate": 2e-05, "loss": 0.04736361, "step": 11367 }, { "epoch": 22.736, "grad_norm": 1.7856942415237427, "learning_rate": 2e-05, "loss": 0.06851799, "step": 11368 }, { "epoch": 22.738, "grad_norm": 1.6058284044265747, "learning_rate": 2e-05, "loss": 0.05773376, "step": 11369 }, { "epoch": 22.74, "grad_norm": 1.0595792531967163, "learning_rate": 2e-05, "loss": 0.03921947, "step": 11370 }, { "epoch": 22.742, "grad_norm": 2.631887912750244, "learning_rate": 2e-05, "loss": 0.048835, "step": 11371 }, { "epoch": 22.744, "grad_norm": 2.225297451019287, "learning_rate": 2e-05, "loss": 0.05954102, "step": 11372 }, { "epoch": 22.746, "grad_norm": 1.1826605796813965, "learning_rate": 2e-05, "loss": 0.0434887, "step": 11373 }, { "epoch": 22.748, "grad_norm": 1.1224970817565918, "learning_rate": 2e-05, "loss": 0.04280729, "step": 11374 }, { "epoch": 22.75, "grad_norm": 1.5083030462265015, "learning_rate": 2e-05, "loss": 0.03788081, "step": 11375 }, { "epoch": 22.752, "grad_norm": 1.428519368171692, "learning_rate": 2e-05, "loss": 0.0628354, "step": 11376 }, { "epoch": 22.754, "grad_norm": 0.9323002099990845, "learning_rate": 2e-05, "loss": 0.03480873, "step": 11377 }, { "epoch": 22.756, "grad_norm": 1.294869303703308, "learning_rate": 2e-05, "loss": 0.05386038, "step": 11378 }, { "epoch": 22.758, "grad_norm": 1.188521146774292, "learning_rate": 2e-05, "loss": 0.0495003, "step": 11379 }, { "epoch": 22.76, "grad_norm": 1.137667179107666, "learning_rate": 2e-05, "loss": 0.04593113, "step": 11380 }, { "epoch": 22.762, "grad_norm": 1.3861621618270874, "learning_rate": 2e-05, "loss": 0.05198872, "step": 11381 }, { "epoch": 22.764, "grad_norm": 1.252461552619934, "learning_rate": 2e-05, "loss": 0.05113174, "step": 11382 }, { "epoch": 22.766, "grad_norm": 2.794287919998169, "learning_rate": 2e-05, "loss": 0.05880801, "step": 11383 }, { "epoch": 22.768, "grad_norm": 1.926071286201477, "learning_rate": 2e-05, "loss": 0.06563574, "step": 11384 }, { "epoch": 22.77, "grad_norm": 1.3863534927368164, "learning_rate": 2e-05, "loss": 0.04758198, "step": 11385 }, { "epoch": 22.772, "grad_norm": 1.2674082517623901, "learning_rate": 2e-05, "loss": 0.04836858, "step": 11386 }, { "epoch": 22.774, "grad_norm": 1.2133840322494507, "learning_rate": 2e-05, "loss": 0.04101015, "step": 11387 }, { "epoch": 22.776, "grad_norm": 2.0674915313720703, "learning_rate": 2e-05, "loss": 0.05079395, "step": 11388 }, { "epoch": 22.778, "grad_norm": 1.3434568643569946, "learning_rate": 2e-05, "loss": 0.05766287, "step": 11389 }, { "epoch": 22.78, "grad_norm": 1.2926524877548218, "learning_rate": 2e-05, "loss": 0.04099448, "step": 11390 }, { "epoch": 22.782, "grad_norm": 1.6340913772583008, "learning_rate": 2e-05, "loss": 0.04330701, "step": 11391 }, { "epoch": 22.784, "grad_norm": 1.2272448539733887, "learning_rate": 2e-05, "loss": 0.03834049, "step": 11392 }, { "epoch": 22.786, "grad_norm": 1.040420651435852, "learning_rate": 2e-05, "loss": 0.0404409, "step": 11393 }, { "epoch": 22.788, "grad_norm": 1.2369282245635986, "learning_rate": 2e-05, "loss": 0.03675054, "step": 11394 }, { "epoch": 22.79, "grad_norm": 1.3325989246368408, "learning_rate": 2e-05, "loss": 0.06250765, "step": 11395 }, { "epoch": 22.792, "grad_norm": 1.061942219734192, "learning_rate": 2e-05, "loss": 0.0585088, "step": 11396 }, { "epoch": 22.794, "grad_norm": 1.2768962383270264, "learning_rate": 2e-05, "loss": 0.05231911, "step": 11397 }, { "epoch": 22.796, "grad_norm": 1.19620943069458, "learning_rate": 2e-05, "loss": 0.06196873, "step": 11398 }, { "epoch": 22.798000000000002, "grad_norm": 2.1286544799804688, "learning_rate": 2e-05, "loss": 0.05616631, "step": 11399 }, { "epoch": 22.8, "grad_norm": 2.932293176651001, "learning_rate": 2e-05, "loss": 0.06682356, "step": 11400 }, { "epoch": 22.802, "grad_norm": 1.0911834239959717, "learning_rate": 2e-05, "loss": 0.04658094, "step": 11401 }, { "epoch": 22.804, "grad_norm": 0.9460340142250061, "learning_rate": 2e-05, "loss": 0.03477623, "step": 11402 }, { "epoch": 22.806, "grad_norm": 1.1509696245193481, "learning_rate": 2e-05, "loss": 0.04269531, "step": 11403 }, { "epoch": 22.808, "grad_norm": 1.5344792604446411, "learning_rate": 2e-05, "loss": 0.04405046, "step": 11404 }, { "epoch": 22.81, "grad_norm": 1.2647613286972046, "learning_rate": 2e-05, "loss": 0.0525385, "step": 11405 }, { "epoch": 22.812, "grad_norm": 1.2023735046386719, "learning_rate": 2e-05, "loss": 0.04129605, "step": 11406 }, { "epoch": 22.814, "grad_norm": 1.1928139925003052, "learning_rate": 2e-05, "loss": 0.05221763, "step": 11407 }, { "epoch": 22.816, "grad_norm": 1.1574828624725342, "learning_rate": 2e-05, "loss": 0.04604352, "step": 11408 }, { "epoch": 22.818, "grad_norm": 0.9061412811279297, "learning_rate": 2e-05, "loss": 0.03747375, "step": 11409 }, { "epoch": 22.82, "grad_norm": 2.022199869155884, "learning_rate": 2e-05, "loss": 0.06008542, "step": 11410 }, { "epoch": 22.822, "grad_norm": 1.593889594078064, "learning_rate": 2e-05, "loss": 0.05851516, "step": 11411 }, { "epoch": 22.824, "grad_norm": 1.23905348777771, "learning_rate": 2e-05, "loss": 0.05341779, "step": 11412 }, { "epoch": 22.826, "grad_norm": 1.443379521369934, "learning_rate": 2e-05, "loss": 0.04833699, "step": 11413 }, { "epoch": 22.828, "grad_norm": 1.0179507732391357, "learning_rate": 2e-05, "loss": 0.0374125, "step": 11414 }, { "epoch": 22.83, "grad_norm": 1.5878279209136963, "learning_rate": 2e-05, "loss": 0.04750193, "step": 11415 }, { "epoch": 22.832, "grad_norm": 1.1982150077819824, "learning_rate": 2e-05, "loss": 0.0585673, "step": 11416 }, { "epoch": 22.834, "grad_norm": 1.4235703945159912, "learning_rate": 2e-05, "loss": 0.04424921, "step": 11417 }, { "epoch": 22.836, "grad_norm": 1.261198878288269, "learning_rate": 2e-05, "loss": 0.05466432, "step": 11418 }, { "epoch": 22.838, "grad_norm": 1.261440396308899, "learning_rate": 2e-05, "loss": 0.05268441, "step": 11419 }, { "epoch": 22.84, "grad_norm": 1.2065094709396362, "learning_rate": 2e-05, "loss": 0.06124427, "step": 11420 }, { "epoch": 22.842, "grad_norm": 1.6197278499603271, "learning_rate": 2e-05, "loss": 0.04379071, "step": 11421 }, { "epoch": 22.844, "grad_norm": 1.806473970413208, "learning_rate": 2e-05, "loss": 0.05088264, "step": 11422 }, { "epoch": 22.846, "grad_norm": 1.4095724821090698, "learning_rate": 2e-05, "loss": 0.04434279, "step": 11423 }, { "epoch": 22.848, "grad_norm": 1.14448082447052, "learning_rate": 2e-05, "loss": 0.0351125, "step": 11424 }, { "epoch": 22.85, "grad_norm": 1.0613905191421509, "learning_rate": 2e-05, "loss": 0.04821333, "step": 11425 }, { "epoch": 22.852, "grad_norm": 1.7093424797058105, "learning_rate": 2e-05, "loss": 0.05173958, "step": 11426 }, { "epoch": 22.854, "grad_norm": 0.8182287216186523, "learning_rate": 2e-05, "loss": 0.02085271, "step": 11427 }, { "epoch": 22.856, "grad_norm": 1.0979381799697876, "learning_rate": 2e-05, "loss": 0.0405486, "step": 11428 }, { "epoch": 22.858, "grad_norm": 2.0296878814697266, "learning_rate": 2e-05, "loss": 0.06506156, "step": 11429 }, { "epoch": 22.86, "grad_norm": 1.5988435745239258, "learning_rate": 2e-05, "loss": 0.05538159, "step": 11430 }, { "epoch": 22.862, "grad_norm": 1.8655071258544922, "learning_rate": 2e-05, "loss": 0.05889206, "step": 11431 }, { "epoch": 22.864, "grad_norm": 1.1035047769546509, "learning_rate": 2e-05, "loss": 0.03811453, "step": 11432 }, { "epoch": 22.866, "grad_norm": 1.5903456211090088, "learning_rate": 2e-05, "loss": 0.03839494, "step": 11433 }, { "epoch": 22.868, "grad_norm": 1.2116554975509644, "learning_rate": 2e-05, "loss": 0.04667623, "step": 11434 }, { "epoch": 22.87, "grad_norm": 1.1557137966156006, "learning_rate": 2e-05, "loss": 0.04948284, "step": 11435 }, { "epoch": 22.872, "grad_norm": 3.118579387664795, "learning_rate": 2e-05, "loss": 0.05913485, "step": 11436 }, { "epoch": 22.874, "grad_norm": 1.4192508459091187, "learning_rate": 2e-05, "loss": 0.04781287, "step": 11437 }, { "epoch": 22.876, "grad_norm": 1.2156481742858887, "learning_rate": 2e-05, "loss": 0.03643696, "step": 11438 }, { "epoch": 22.878, "grad_norm": 1.2126352787017822, "learning_rate": 2e-05, "loss": 0.05261337, "step": 11439 }, { "epoch": 22.88, "grad_norm": 1.0004485845565796, "learning_rate": 2e-05, "loss": 0.03148644, "step": 11440 }, { "epoch": 22.882, "grad_norm": 1.1723521947860718, "learning_rate": 2e-05, "loss": 0.05497139, "step": 11441 }, { "epoch": 22.884, "grad_norm": 1.052226185798645, "learning_rate": 2e-05, "loss": 0.04890184, "step": 11442 }, { "epoch": 22.886, "grad_norm": 2.601367473602295, "learning_rate": 2e-05, "loss": 0.04642366, "step": 11443 }, { "epoch": 22.888, "grad_norm": 1.6198233366012573, "learning_rate": 2e-05, "loss": 0.04375637, "step": 11444 }, { "epoch": 22.89, "grad_norm": 1.0251045227050781, "learning_rate": 2e-05, "loss": 0.04236152, "step": 11445 }, { "epoch": 22.892, "grad_norm": 1.0182973146438599, "learning_rate": 2e-05, "loss": 0.03910048, "step": 11446 }, { "epoch": 22.894, "grad_norm": 0.9043207764625549, "learning_rate": 2e-05, "loss": 0.03130884, "step": 11447 }, { "epoch": 22.896, "grad_norm": 1.5654839277267456, "learning_rate": 2e-05, "loss": 0.04570423, "step": 11448 }, { "epoch": 22.898, "grad_norm": 1.3308196067810059, "learning_rate": 2e-05, "loss": 0.0585776, "step": 11449 }, { "epoch": 22.9, "grad_norm": 1.065496563911438, "learning_rate": 2e-05, "loss": 0.0327067, "step": 11450 }, { "epoch": 22.902, "grad_norm": 0.9588587284088135, "learning_rate": 2e-05, "loss": 0.02495749, "step": 11451 }, { "epoch": 22.904, "grad_norm": 1.0691012144088745, "learning_rate": 2e-05, "loss": 0.03999297, "step": 11452 }, { "epoch": 22.906, "grad_norm": 1.4028337001800537, "learning_rate": 2e-05, "loss": 0.05478033, "step": 11453 }, { "epoch": 22.908, "grad_norm": 1.8534777164459229, "learning_rate": 2e-05, "loss": 0.04649662, "step": 11454 }, { "epoch": 22.91, "grad_norm": 2.728555917739868, "learning_rate": 2e-05, "loss": 0.05643704, "step": 11455 }, { "epoch": 22.912, "grad_norm": 2.3018362522125244, "learning_rate": 2e-05, "loss": 0.04570448, "step": 11456 }, { "epoch": 22.914, "grad_norm": 1.0240752696990967, "learning_rate": 2e-05, "loss": 0.04288086, "step": 11457 }, { "epoch": 22.916, "grad_norm": 1.1028988361358643, "learning_rate": 2e-05, "loss": 0.04273102, "step": 11458 }, { "epoch": 22.918, "grad_norm": 1.3973126411437988, "learning_rate": 2e-05, "loss": 0.03367525, "step": 11459 }, { "epoch": 22.92, "grad_norm": 1.355122685432434, "learning_rate": 2e-05, "loss": 0.04072536, "step": 11460 }, { "epoch": 22.922, "grad_norm": 1.3600130081176758, "learning_rate": 2e-05, "loss": 0.0566312, "step": 11461 }, { "epoch": 22.924, "grad_norm": 1.4610315561294556, "learning_rate": 2e-05, "loss": 0.06304776, "step": 11462 }, { "epoch": 22.926, "grad_norm": 1.6216849088668823, "learning_rate": 2e-05, "loss": 0.04048252, "step": 11463 }, { "epoch": 22.928, "grad_norm": 1.0937893390655518, "learning_rate": 2e-05, "loss": 0.03741532, "step": 11464 }, { "epoch": 22.93, "grad_norm": 1.043770432472229, "learning_rate": 2e-05, "loss": 0.04591145, "step": 11465 }, { "epoch": 22.932, "grad_norm": 1.2273602485656738, "learning_rate": 2e-05, "loss": 0.03821223, "step": 11466 }, { "epoch": 22.934, "grad_norm": 1.9527608156204224, "learning_rate": 2e-05, "loss": 0.0479451, "step": 11467 }, { "epoch": 22.936, "grad_norm": 2.1063270568847656, "learning_rate": 2e-05, "loss": 0.06443761, "step": 11468 }, { "epoch": 22.938, "grad_norm": 1.3264068365097046, "learning_rate": 2e-05, "loss": 0.05181552, "step": 11469 }, { "epoch": 22.94, "grad_norm": 2.037461996078491, "learning_rate": 2e-05, "loss": 0.05650178, "step": 11470 }, { "epoch": 22.942, "grad_norm": 1.7283660173416138, "learning_rate": 2e-05, "loss": 0.04151305, "step": 11471 }, { "epoch": 22.944, "grad_norm": 1.9940475225448608, "learning_rate": 2e-05, "loss": 0.04008929, "step": 11472 }, { "epoch": 22.946, "grad_norm": 1.205177903175354, "learning_rate": 2e-05, "loss": 0.05304724, "step": 11473 }, { "epoch": 22.948, "grad_norm": 1.9214202165603638, "learning_rate": 2e-05, "loss": 0.05114603, "step": 11474 }, { "epoch": 22.95, "grad_norm": 1.2213608026504517, "learning_rate": 2e-05, "loss": 0.03934781, "step": 11475 }, { "epoch": 22.951999999999998, "grad_norm": 2.133103132247925, "learning_rate": 2e-05, "loss": 0.04767356, "step": 11476 }, { "epoch": 22.954, "grad_norm": 1.201549768447876, "learning_rate": 2e-05, "loss": 0.03965754, "step": 11477 }, { "epoch": 22.956, "grad_norm": 1.0932912826538086, "learning_rate": 2e-05, "loss": 0.04426933, "step": 11478 }, { "epoch": 22.958, "grad_norm": 0.9014148712158203, "learning_rate": 2e-05, "loss": 0.03379968, "step": 11479 }, { "epoch": 22.96, "grad_norm": 1.128990888595581, "learning_rate": 2e-05, "loss": 0.0428699, "step": 11480 }, { "epoch": 22.962, "grad_norm": 1.4506347179412842, "learning_rate": 2e-05, "loss": 0.05526916, "step": 11481 }, { "epoch": 22.964, "grad_norm": 1.4444808959960938, "learning_rate": 2e-05, "loss": 0.04153492, "step": 11482 }, { "epoch": 22.966, "grad_norm": 1.2618721723556519, "learning_rate": 2e-05, "loss": 0.04521314, "step": 11483 }, { "epoch": 22.968, "grad_norm": 1.2819956541061401, "learning_rate": 2e-05, "loss": 0.04434113, "step": 11484 }, { "epoch": 22.97, "grad_norm": 1.0580623149871826, "learning_rate": 2e-05, "loss": 0.05579165, "step": 11485 }, { "epoch": 22.972, "grad_norm": 1.8078941106796265, "learning_rate": 2e-05, "loss": 0.04561062, "step": 11486 }, { "epoch": 22.974, "grad_norm": 1.7995368242263794, "learning_rate": 2e-05, "loss": 0.04184832, "step": 11487 }, { "epoch": 22.976, "grad_norm": 3.262955904006958, "learning_rate": 2e-05, "loss": 0.05064858, "step": 11488 }, { "epoch": 22.978, "grad_norm": 1.1863486766815186, "learning_rate": 2e-05, "loss": 0.06295459, "step": 11489 }, { "epoch": 22.98, "grad_norm": 1.45112144947052, "learning_rate": 2e-05, "loss": 0.04033203, "step": 11490 }, { "epoch": 22.982, "grad_norm": 1.8730523586273193, "learning_rate": 2e-05, "loss": 0.04470631, "step": 11491 }, { "epoch": 22.984, "grad_norm": 1.4328550100326538, "learning_rate": 2e-05, "loss": 0.06587666, "step": 11492 }, { "epoch": 22.986, "grad_norm": 2.2640762329101562, "learning_rate": 2e-05, "loss": 0.05154163, "step": 11493 }, { "epoch": 22.988, "grad_norm": 3.426724910736084, "learning_rate": 2e-05, "loss": 0.03605981, "step": 11494 }, { "epoch": 22.99, "grad_norm": 1.3949122428894043, "learning_rate": 2e-05, "loss": 0.03751953, "step": 11495 }, { "epoch": 22.992, "grad_norm": 1.3085368871688843, "learning_rate": 2e-05, "loss": 0.04209456, "step": 11496 }, { "epoch": 22.994, "grad_norm": 1.3357843160629272, "learning_rate": 2e-05, "loss": 0.03631935, "step": 11497 }, { "epoch": 22.996, "grad_norm": 1.2063790559768677, "learning_rate": 2e-05, "loss": 0.04160905, "step": 11498 }, { "epoch": 22.998, "grad_norm": 0.9796919226646423, "learning_rate": 2e-05, "loss": 0.03963297, "step": 11499 }, { "epoch": 23.0, "grad_norm": 1.127573847770691, "learning_rate": 2e-05, "loss": 0.03552626, "step": 11500 }, { "epoch": 23.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9800399201596807, "Equal_1": 0.994, "Equal_2": 0.9680638722554891, "Equal_3": 0.9800399201596807, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.99, "Perpendicular_1": 0.996, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8597194388777555, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.9868666666666667, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 227.6198, "eval_samples_per_second": 46.13, "eval_steps_per_second": 0.923, "step": 11500 }, { "epoch": 23.002, "grad_norm": 1.0146960020065308, "learning_rate": 2e-05, "loss": 0.04464405, "step": 11501 }, { "epoch": 23.004, "grad_norm": 1.085135817527771, "learning_rate": 2e-05, "loss": 0.04671686, "step": 11502 }, { "epoch": 23.006, "grad_norm": 1.070618987083435, "learning_rate": 2e-05, "loss": 0.05140489, "step": 11503 }, { "epoch": 23.008, "grad_norm": 1.2592740058898926, "learning_rate": 2e-05, "loss": 0.05386601, "step": 11504 }, { "epoch": 23.01, "grad_norm": 1.0952107906341553, "learning_rate": 2e-05, "loss": 0.04357245, "step": 11505 }, { "epoch": 23.012, "grad_norm": 1.1056619882583618, "learning_rate": 2e-05, "loss": 0.03597664, "step": 11506 }, { "epoch": 23.014, "grad_norm": 1.104623794555664, "learning_rate": 2e-05, "loss": 0.03131764, "step": 11507 }, { "epoch": 23.016, "grad_norm": 0.8807752132415771, "learning_rate": 2e-05, "loss": 0.04032353, "step": 11508 }, { "epoch": 23.018, "grad_norm": 1.0665936470031738, "learning_rate": 2e-05, "loss": 0.0361053, "step": 11509 }, { "epoch": 23.02, "grad_norm": 1.4993975162506104, "learning_rate": 2e-05, "loss": 0.05781484, "step": 11510 }, { "epoch": 23.022, "grad_norm": 1.7669090032577515, "learning_rate": 2e-05, "loss": 0.06065506, "step": 11511 }, { "epoch": 23.024, "grad_norm": 1.4423679113388062, "learning_rate": 2e-05, "loss": 0.06354275, "step": 11512 }, { "epoch": 23.026, "grad_norm": 1.8576370477676392, "learning_rate": 2e-05, "loss": 0.07017103, "step": 11513 }, { "epoch": 23.028, "grad_norm": 1.1650328636169434, "learning_rate": 2e-05, "loss": 0.04397281, "step": 11514 }, { "epoch": 23.03, "grad_norm": 1.4697377681732178, "learning_rate": 2e-05, "loss": 0.04977475, "step": 11515 }, { "epoch": 23.032, "grad_norm": 1.537423014640808, "learning_rate": 2e-05, "loss": 0.04907055, "step": 11516 }, { "epoch": 23.034, "grad_norm": 1.1378226280212402, "learning_rate": 2e-05, "loss": 0.0431536, "step": 11517 }, { "epoch": 23.036, "grad_norm": 1.3547303676605225, "learning_rate": 2e-05, "loss": 0.04395192, "step": 11518 }, { "epoch": 23.038, "grad_norm": 0.8885576725006104, "learning_rate": 2e-05, "loss": 0.0410811, "step": 11519 }, { "epoch": 23.04, "grad_norm": 1.6321918964385986, "learning_rate": 2e-05, "loss": 0.06359672, "step": 11520 }, { "epoch": 23.042, "grad_norm": 1.268312931060791, "learning_rate": 2e-05, "loss": 0.05183922, "step": 11521 }, { "epoch": 23.044, "grad_norm": 1.0964752435684204, "learning_rate": 2e-05, "loss": 0.04159138, "step": 11522 }, { "epoch": 23.046, "grad_norm": 1.5512797832489014, "learning_rate": 2e-05, "loss": 0.03773006, "step": 11523 }, { "epoch": 23.048, "grad_norm": 1.8368301391601562, "learning_rate": 2e-05, "loss": 0.06314449, "step": 11524 }, { "epoch": 23.05, "grad_norm": 1.4855458736419678, "learning_rate": 2e-05, "loss": 0.05684523, "step": 11525 }, { "epoch": 23.052, "grad_norm": 1.1217402219772339, "learning_rate": 2e-05, "loss": 0.05039538, "step": 11526 }, { "epoch": 23.054, "grad_norm": 3.0990982055664062, "learning_rate": 2e-05, "loss": 0.05401253, "step": 11527 }, { "epoch": 23.056, "grad_norm": 1.8776130676269531, "learning_rate": 2e-05, "loss": 0.0594167, "step": 11528 }, { "epoch": 23.058, "grad_norm": 1.211838722229004, "learning_rate": 2e-05, "loss": 0.06041618, "step": 11529 }, { "epoch": 23.06, "grad_norm": 0.9875292778015137, "learning_rate": 2e-05, "loss": 0.03855842, "step": 11530 }, { "epoch": 23.062, "grad_norm": 1.0106637477874756, "learning_rate": 2e-05, "loss": 0.04279304, "step": 11531 }, { "epoch": 23.064, "grad_norm": 1.1279103755950928, "learning_rate": 2e-05, "loss": 0.03844076, "step": 11532 }, { "epoch": 23.066, "grad_norm": 1.5234408378601074, "learning_rate": 2e-05, "loss": 0.05372392, "step": 11533 }, { "epoch": 23.068, "grad_norm": 1.098601222038269, "learning_rate": 2e-05, "loss": 0.03984475, "step": 11534 }, { "epoch": 23.07, "grad_norm": 1.3221733570098877, "learning_rate": 2e-05, "loss": 0.05712114, "step": 11535 }, { "epoch": 23.072, "grad_norm": 1.094106912612915, "learning_rate": 2e-05, "loss": 0.04199158, "step": 11536 }, { "epoch": 23.074, "grad_norm": 1.5803296566009521, "learning_rate": 2e-05, "loss": 0.06201686, "step": 11537 }, { "epoch": 23.076, "grad_norm": 4.082747459411621, "learning_rate": 2e-05, "loss": 0.06215588, "step": 11538 }, { "epoch": 23.078, "grad_norm": 1.3342355489730835, "learning_rate": 2e-05, "loss": 0.0446651, "step": 11539 }, { "epoch": 23.08, "grad_norm": 2.138357162475586, "learning_rate": 2e-05, "loss": 0.065138, "step": 11540 }, { "epoch": 23.082, "grad_norm": 1.1780990362167358, "learning_rate": 2e-05, "loss": 0.04524734, "step": 11541 }, { "epoch": 23.084, "grad_norm": 0.977323055267334, "learning_rate": 2e-05, "loss": 0.03879407, "step": 11542 }, { "epoch": 23.086, "grad_norm": 1.875959873199463, "learning_rate": 2e-05, "loss": 0.04647957, "step": 11543 }, { "epoch": 23.088, "grad_norm": 1.402848243713379, "learning_rate": 2e-05, "loss": 0.05177037, "step": 11544 }, { "epoch": 23.09, "grad_norm": 1.8001723289489746, "learning_rate": 2e-05, "loss": 0.05626069, "step": 11545 }, { "epoch": 23.092, "grad_norm": 1.72246253490448, "learning_rate": 2e-05, "loss": 0.0512192, "step": 11546 }, { "epoch": 23.094, "grad_norm": 1.720243215560913, "learning_rate": 2e-05, "loss": 0.04664902, "step": 11547 }, { "epoch": 23.096, "grad_norm": 1.1276965141296387, "learning_rate": 2e-05, "loss": 0.03830987, "step": 11548 }, { "epoch": 23.098, "grad_norm": 1.4146757125854492, "learning_rate": 2e-05, "loss": 0.04342667, "step": 11549 }, { "epoch": 23.1, "grad_norm": 0.981442391872406, "learning_rate": 2e-05, "loss": 0.03301752, "step": 11550 }, { "epoch": 23.102, "grad_norm": 0.9637369513511658, "learning_rate": 2e-05, "loss": 0.03874366, "step": 11551 }, { "epoch": 23.104, "grad_norm": 1.0109279155731201, "learning_rate": 2e-05, "loss": 0.03627685, "step": 11552 }, { "epoch": 23.106, "grad_norm": 1.0362370014190674, "learning_rate": 2e-05, "loss": 0.04287652, "step": 11553 }, { "epoch": 23.108, "grad_norm": 5.353565216064453, "learning_rate": 2e-05, "loss": 0.06025293, "step": 11554 }, { "epoch": 23.11, "grad_norm": 0.8398458957672119, "learning_rate": 2e-05, "loss": 0.03079301, "step": 11555 }, { "epoch": 23.112, "grad_norm": 1.8144707679748535, "learning_rate": 2e-05, "loss": 0.04267026, "step": 11556 }, { "epoch": 23.114, "grad_norm": 1.2978721857070923, "learning_rate": 2e-05, "loss": 0.05681873, "step": 11557 }, { "epoch": 23.116, "grad_norm": 2.3321759700775146, "learning_rate": 2e-05, "loss": 0.04559003, "step": 11558 }, { "epoch": 23.118, "grad_norm": 1.3800153732299805, "learning_rate": 2e-05, "loss": 0.03817922, "step": 11559 }, { "epoch": 23.12, "grad_norm": 2.3808610439300537, "learning_rate": 2e-05, "loss": 0.04928463, "step": 11560 }, { "epoch": 23.122, "grad_norm": 1.07627272605896, "learning_rate": 2e-05, "loss": 0.05258366, "step": 11561 }, { "epoch": 23.124, "grad_norm": 1.3099822998046875, "learning_rate": 2e-05, "loss": 0.04576315, "step": 11562 }, { "epoch": 23.126, "grad_norm": 1.2224770784378052, "learning_rate": 2e-05, "loss": 0.03834332, "step": 11563 }, { "epoch": 23.128, "grad_norm": 1.0849539041519165, "learning_rate": 2e-05, "loss": 0.03901177, "step": 11564 }, { "epoch": 23.13, "grad_norm": 1.6349326372146606, "learning_rate": 2e-05, "loss": 0.04507283, "step": 11565 }, { "epoch": 23.132, "grad_norm": 1.1077353954315186, "learning_rate": 2e-05, "loss": 0.04588814, "step": 11566 }, { "epoch": 23.134, "grad_norm": 0.9734787940979004, "learning_rate": 2e-05, "loss": 0.03829316, "step": 11567 }, { "epoch": 23.136, "grad_norm": 1.3829809427261353, "learning_rate": 2e-05, "loss": 0.05702911, "step": 11568 }, { "epoch": 23.138, "grad_norm": 2.404566526412964, "learning_rate": 2e-05, "loss": 0.03387457, "step": 11569 }, { "epoch": 23.14, "grad_norm": 1.1466082334518433, "learning_rate": 2e-05, "loss": 0.05090828, "step": 11570 }, { "epoch": 23.142, "grad_norm": 2.5590660572052, "learning_rate": 2e-05, "loss": 0.05354975, "step": 11571 }, { "epoch": 23.144, "grad_norm": 2.0133118629455566, "learning_rate": 2e-05, "loss": 0.06877681, "step": 11572 }, { "epoch": 23.146, "grad_norm": 2.0013816356658936, "learning_rate": 2e-05, "loss": 0.04198175, "step": 11573 }, { "epoch": 23.148, "grad_norm": 1.2530235052108765, "learning_rate": 2e-05, "loss": 0.03617892, "step": 11574 }, { "epoch": 23.15, "grad_norm": 1.3855843544006348, "learning_rate": 2e-05, "loss": 0.05185246, "step": 11575 }, { "epoch": 23.152, "grad_norm": 0.9014667272567749, "learning_rate": 2e-05, "loss": 0.04844087, "step": 11576 }, { "epoch": 23.154, "grad_norm": 1.680556058883667, "learning_rate": 2e-05, "loss": 0.04120886, "step": 11577 }, { "epoch": 23.156, "grad_norm": 1.6221988201141357, "learning_rate": 2e-05, "loss": 0.05760796, "step": 11578 }, { "epoch": 23.158, "grad_norm": 2.6494626998901367, "learning_rate": 2e-05, "loss": 0.06236105, "step": 11579 }, { "epoch": 23.16, "grad_norm": 1.8423081636428833, "learning_rate": 2e-05, "loss": 0.04586459, "step": 11580 }, { "epoch": 23.162, "grad_norm": 1.4955379962921143, "learning_rate": 2e-05, "loss": 0.04270253, "step": 11581 }, { "epoch": 23.164, "grad_norm": 1.1177822351455688, "learning_rate": 2e-05, "loss": 0.05179861, "step": 11582 }, { "epoch": 23.166, "grad_norm": 0.9064000248908997, "learning_rate": 2e-05, "loss": 0.03743205, "step": 11583 }, { "epoch": 23.168, "grad_norm": 1.2021297216415405, "learning_rate": 2e-05, "loss": 0.05493079, "step": 11584 }, { "epoch": 23.17, "grad_norm": 1.4332400560379028, "learning_rate": 2e-05, "loss": 0.05197261, "step": 11585 }, { "epoch": 23.172, "grad_norm": 1.0850114822387695, "learning_rate": 2e-05, "loss": 0.03374135, "step": 11586 }, { "epoch": 23.174, "grad_norm": 1.0908195972442627, "learning_rate": 2e-05, "loss": 0.04641365, "step": 11587 }, { "epoch": 23.176, "grad_norm": 1.216330885887146, "learning_rate": 2e-05, "loss": 0.05652422, "step": 11588 }, { "epoch": 23.178, "grad_norm": 1.3994098901748657, "learning_rate": 2e-05, "loss": 0.0552398, "step": 11589 }, { "epoch": 23.18, "grad_norm": 1.5974634885787964, "learning_rate": 2e-05, "loss": 0.05754232, "step": 11590 }, { "epoch": 23.182, "grad_norm": 1.360548496246338, "learning_rate": 2e-05, "loss": 0.05203604, "step": 11591 }, { "epoch": 23.184, "grad_norm": 0.9656210541725159, "learning_rate": 2e-05, "loss": 0.04140483, "step": 11592 }, { "epoch": 23.186, "grad_norm": 0.958460807800293, "learning_rate": 2e-05, "loss": 0.04025957, "step": 11593 }, { "epoch": 23.188, "grad_norm": 1.0074541568756104, "learning_rate": 2e-05, "loss": 0.04300274, "step": 11594 }, { "epoch": 23.19, "grad_norm": 1.719303011894226, "learning_rate": 2e-05, "loss": 0.05605261, "step": 11595 }, { "epoch": 23.192, "grad_norm": 1.993430256843567, "learning_rate": 2e-05, "loss": 0.05034896, "step": 11596 }, { "epoch": 23.194, "grad_norm": 1.3678110837936401, "learning_rate": 2e-05, "loss": 0.05784823, "step": 11597 }, { "epoch": 23.196, "grad_norm": 1.2149230241775513, "learning_rate": 2e-05, "loss": 0.05422278, "step": 11598 }, { "epoch": 23.198, "grad_norm": 1.0377055406570435, "learning_rate": 2e-05, "loss": 0.03950357, "step": 11599 }, { "epoch": 23.2, "grad_norm": 1.1512510776519775, "learning_rate": 2e-05, "loss": 0.03755477, "step": 11600 }, { "epoch": 23.202, "grad_norm": 0.972597599029541, "learning_rate": 2e-05, "loss": 0.03577134, "step": 11601 }, { "epoch": 23.204, "grad_norm": 1.2005105018615723, "learning_rate": 2e-05, "loss": 0.0534524, "step": 11602 }, { "epoch": 23.206, "grad_norm": 1.8143349885940552, "learning_rate": 2e-05, "loss": 0.04815232, "step": 11603 }, { "epoch": 23.208, "grad_norm": 1.7275186777114868, "learning_rate": 2e-05, "loss": 0.06460041, "step": 11604 }, { "epoch": 23.21, "grad_norm": 1.0689847469329834, "learning_rate": 2e-05, "loss": 0.05154752, "step": 11605 }, { "epoch": 23.212, "grad_norm": 1.4193809032440186, "learning_rate": 2e-05, "loss": 0.04169119, "step": 11606 }, { "epoch": 23.214, "grad_norm": 1.3391445875167847, "learning_rate": 2e-05, "loss": 0.05282654, "step": 11607 }, { "epoch": 23.216, "grad_norm": 2.8363728523254395, "learning_rate": 2e-05, "loss": 0.07106539, "step": 11608 }, { "epoch": 23.218, "grad_norm": 1.0268906354904175, "learning_rate": 2e-05, "loss": 0.04184026, "step": 11609 }, { "epoch": 23.22, "grad_norm": 1.13827383518219, "learning_rate": 2e-05, "loss": 0.0574695, "step": 11610 }, { "epoch": 23.222, "grad_norm": 0.9532491564750671, "learning_rate": 2e-05, "loss": 0.04152424, "step": 11611 }, { "epoch": 23.224, "grad_norm": 1.4836177825927734, "learning_rate": 2e-05, "loss": 0.04662114, "step": 11612 }, { "epoch": 23.226, "grad_norm": 1.1817634105682373, "learning_rate": 2e-05, "loss": 0.04197218, "step": 11613 }, { "epoch": 23.228, "grad_norm": 1.1005860567092896, "learning_rate": 2e-05, "loss": 0.0448859, "step": 11614 }, { "epoch": 23.23, "grad_norm": 0.9331889152526855, "learning_rate": 2e-05, "loss": 0.04716832, "step": 11615 }, { "epoch": 23.232, "grad_norm": 1.717016339302063, "learning_rate": 2e-05, "loss": 0.06619509, "step": 11616 }, { "epoch": 23.234, "grad_norm": 1.5852023363113403, "learning_rate": 2e-05, "loss": 0.05006515, "step": 11617 }, { "epoch": 23.236, "grad_norm": 1.3254833221435547, "learning_rate": 2e-05, "loss": 0.04656006, "step": 11618 }, { "epoch": 23.238, "grad_norm": 1.1319782733917236, "learning_rate": 2e-05, "loss": 0.0480526, "step": 11619 }, { "epoch": 23.24, "grad_norm": 1.8729907274246216, "learning_rate": 2e-05, "loss": 0.07541776, "step": 11620 }, { "epoch": 23.242, "grad_norm": 0.9438799023628235, "learning_rate": 2e-05, "loss": 0.03605196, "step": 11621 }, { "epoch": 23.244, "grad_norm": 1.1917450428009033, "learning_rate": 2e-05, "loss": 0.04152335, "step": 11622 }, { "epoch": 23.246, "grad_norm": 0.9761221408843994, "learning_rate": 2e-05, "loss": 0.03051461, "step": 11623 }, { "epoch": 23.248, "grad_norm": 1.5227864980697632, "learning_rate": 2e-05, "loss": 0.05406623, "step": 11624 }, { "epoch": 23.25, "grad_norm": 0.9571890234947205, "learning_rate": 2e-05, "loss": 0.04194787, "step": 11625 }, { "epoch": 23.252, "grad_norm": 1.4765560626983643, "learning_rate": 2e-05, "loss": 0.0584995, "step": 11626 }, { "epoch": 23.254, "grad_norm": 1.07988703250885, "learning_rate": 2e-05, "loss": 0.04565503, "step": 11627 }, { "epoch": 23.256, "grad_norm": 1.7986540794372559, "learning_rate": 2e-05, "loss": 0.04161085, "step": 11628 }, { "epoch": 23.258, "grad_norm": 1.4125012159347534, "learning_rate": 2e-05, "loss": 0.04386605, "step": 11629 }, { "epoch": 23.26, "grad_norm": 1.1008765697479248, "learning_rate": 2e-05, "loss": 0.05585647, "step": 11630 }, { "epoch": 23.262, "grad_norm": 1.260725975036621, "learning_rate": 2e-05, "loss": 0.04911609, "step": 11631 }, { "epoch": 23.264, "grad_norm": 1.1550300121307373, "learning_rate": 2e-05, "loss": 0.05345702, "step": 11632 }, { "epoch": 23.266, "grad_norm": 1.1696397066116333, "learning_rate": 2e-05, "loss": 0.04665539, "step": 11633 }, { "epoch": 23.268, "grad_norm": 0.9723567962646484, "learning_rate": 2e-05, "loss": 0.04099352, "step": 11634 }, { "epoch": 23.27, "grad_norm": 1.3305162191390991, "learning_rate": 2e-05, "loss": 0.04013722, "step": 11635 }, { "epoch": 23.272, "grad_norm": 1.8601722717285156, "learning_rate": 2e-05, "loss": 0.07036926, "step": 11636 }, { "epoch": 23.274, "grad_norm": 1.105675220489502, "learning_rate": 2e-05, "loss": 0.0540611, "step": 11637 }, { "epoch": 23.276, "grad_norm": 1.6436797380447388, "learning_rate": 2e-05, "loss": 0.06247162, "step": 11638 }, { "epoch": 23.278, "grad_norm": 1.45890474319458, "learning_rate": 2e-05, "loss": 0.04240998, "step": 11639 }, { "epoch": 23.28, "grad_norm": 1.7451809644699097, "learning_rate": 2e-05, "loss": 0.06078637, "step": 11640 }, { "epoch": 23.282, "grad_norm": 1.2856923341751099, "learning_rate": 2e-05, "loss": 0.03985235, "step": 11641 }, { "epoch": 23.284, "grad_norm": 1.1352111101150513, "learning_rate": 2e-05, "loss": 0.03900644, "step": 11642 }, { "epoch": 23.286, "grad_norm": 0.8437964916229248, "learning_rate": 2e-05, "loss": 0.02842924, "step": 11643 }, { "epoch": 23.288, "grad_norm": 0.9723466038703918, "learning_rate": 2e-05, "loss": 0.03522197, "step": 11644 }, { "epoch": 23.29, "grad_norm": 1.050916314125061, "learning_rate": 2e-05, "loss": 0.03594717, "step": 11645 }, { "epoch": 23.292, "grad_norm": 1.0665785074234009, "learning_rate": 2e-05, "loss": 0.03906161, "step": 11646 }, { "epoch": 23.294, "grad_norm": 1.0101227760314941, "learning_rate": 2e-05, "loss": 0.04303355, "step": 11647 }, { "epoch": 23.296, "grad_norm": 1.6306806802749634, "learning_rate": 2e-05, "loss": 0.05207025, "step": 11648 }, { "epoch": 23.298, "grad_norm": 1.2075953483581543, "learning_rate": 2e-05, "loss": 0.05993555, "step": 11649 }, { "epoch": 23.3, "grad_norm": 1.0680861473083496, "learning_rate": 2e-05, "loss": 0.03620788, "step": 11650 }, { "epoch": 23.302, "grad_norm": 2.1970956325531006, "learning_rate": 2e-05, "loss": 0.05075941, "step": 11651 }, { "epoch": 23.304, "grad_norm": 1.3954427242279053, "learning_rate": 2e-05, "loss": 0.05664573, "step": 11652 }, { "epoch": 23.306, "grad_norm": 1.4532697200775146, "learning_rate": 2e-05, "loss": 0.04408948, "step": 11653 }, { "epoch": 23.308, "grad_norm": 1.0909913778305054, "learning_rate": 2e-05, "loss": 0.04104931, "step": 11654 }, { "epoch": 23.31, "grad_norm": 1.0103586912155151, "learning_rate": 2e-05, "loss": 0.03559302, "step": 11655 }, { "epoch": 23.312, "grad_norm": 1.0986278057098389, "learning_rate": 2e-05, "loss": 0.04635569, "step": 11656 }, { "epoch": 23.314, "grad_norm": 1.1429859399795532, "learning_rate": 2e-05, "loss": 0.04956363, "step": 11657 }, { "epoch": 23.316, "grad_norm": 2.244128704071045, "learning_rate": 2e-05, "loss": 0.04716303, "step": 11658 }, { "epoch": 23.318, "grad_norm": 1.6920887231826782, "learning_rate": 2e-05, "loss": 0.05023804, "step": 11659 }, { "epoch": 23.32, "grad_norm": 0.8607492446899414, "learning_rate": 2e-05, "loss": 0.03242617, "step": 11660 }, { "epoch": 23.322, "grad_norm": 1.2864484786987305, "learning_rate": 2e-05, "loss": 0.05867041, "step": 11661 }, { "epoch": 23.324, "grad_norm": 2.5509493350982666, "learning_rate": 2e-05, "loss": 0.04753581, "step": 11662 }, { "epoch": 23.326, "grad_norm": 1.8006631135940552, "learning_rate": 2e-05, "loss": 0.04960055, "step": 11663 }, { "epoch": 23.328, "grad_norm": 1.153206467628479, "learning_rate": 2e-05, "loss": 0.04587705, "step": 11664 }, { "epoch": 23.33, "grad_norm": 1.0099129676818848, "learning_rate": 2e-05, "loss": 0.03694653, "step": 11665 }, { "epoch": 23.332, "grad_norm": 0.8580511212348938, "learning_rate": 2e-05, "loss": 0.02735648, "step": 11666 }, { "epoch": 23.334, "grad_norm": 1.7664073705673218, "learning_rate": 2e-05, "loss": 0.04164409, "step": 11667 }, { "epoch": 23.336, "grad_norm": 1.0428500175476074, "learning_rate": 2e-05, "loss": 0.0339654, "step": 11668 }, { "epoch": 23.338, "grad_norm": 1.1137148141860962, "learning_rate": 2e-05, "loss": 0.04441859, "step": 11669 }, { "epoch": 23.34, "grad_norm": 1.4066476821899414, "learning_rate": 2e-05, "loss": 0.03940981, "step": 11670 }, { "epoch": 23.342, "grad_norm": 0.7992669343948364, "learning_rate": 2e-05, "loss": 0.02978232, "step": 11671 }, { "epoch": 23.344, "grad_norm": 1.151203989982605, "learning_rate": 2e-05, "loss": 0.05043959, "step": 11672 }, { "epoch": 23.346, "grad_norm": 1.4840251207351685, "learning_rate": 2e-05, "loss": 0.04110231, "step": 11673 }, { "epoch": 23.348, "grad_norm": 0.8699586987495422, "learning_rate": 2e-05, "loss": 0.03934082, "step": 11674 }, { "epoch": 23.35, "grad_norm": 1.0714006423950195, "learning_rate": 2e-05, "loss": 0.05557939, "step": 11675 }, { "epoch": 23.352, "grad_norm": 1.8060479164123535, "learning_rate": 2e-05, "loss": 0.04335602, "step": 11676 }, { "epoch": 23.354, "grad_norm": 1.0608428716659546, "learning_rate": 2e-05, "loss": 0.0539423, "step": 11677 }, { "epoch": 23.356, "grad_norm": 1.9058010578155518, "learning_rate": 2e-05, "loss": 0.05520298, "step": 11678 }, { "epoch": 23.358, "grad_norm": 1.1205326318740845, "learning_rate": 2e-05, "loss": 0.05260883, "step": 11679 }, { "epoch": 23.36, "grad_norm": 1.5235289335250854, "learning_rate": 2e-05, "loss": 0.05235814, "step": 11680 }, { "epoch": 23.362, "grad_norm": 1.7222963571548462, "learning_rate": 2e-05, "loss": 0.05660929, "step": 11681 }, { "epoch": 23.364, "grad_norm": 1.0869920253753662, "learning_rate": 2e-05, "loss": 0.0365209, "step": 11682 }, { "epoch": 23.366, "grad_norm": 1.1982978582382202, "learning_rate": 2e-05, "loss": 0.03702242, "step": 11683 }, { "epoch": 23.368, "grad_norm": 1.871098279953003, "learning_rate": 2e-05, "loss": 0.04647014, "step": 11684 }, { "epoch": 23.37, "grad_norm": 1.0521349906921387, "learning_rate": 2e-05, "loss": 0.04743152, "step": 11685 }, { "epoch": 23.372, "grad_norm": 1.1643682718276978, "learning_rate": 2e-05, "loss": 0.06197054, "step": 11686 }, { "epoch": 23.374, "grad_norm": 1.3755227327346802, "learning_rate": 2e-05, "loss": 0.03752012, "step": 11687 }, { "epoch": 23.376, "grad_norm": 0.9514060020446777, "learning_rate": 2e-05, "loss": 0.03984525, "step": 11688 }, { "epoch": 23.378, "grad_norm": 1.241223931312561, "learning_rate": 2e-05, "loss": 0.05416253, "step": 11689 }, { "epoch": 23.38, "grad_norm": 1.7155836820602417, "learning_rate": 2e-05, "loss": 0.05160542, "step": 11690 }, { "epoch": 23.382, "grad_norm": 1.0345395803451538, "learning_rate": 2e-05, "loss": 0.04331732, "step": 11691 }, { "epoch": 23.384, "grad_norm": 1.135293960571289, "learning_rate": 2e-05, "loss": 0.0501378, "step": 11692 }, { "epoch": 23.386, "grad_norm": 2.3069839477539062, "learning_rate": 2e-05, "loss": 0.05308401, "step": 11693 }, { "epoch": 23.388, "grad_norm": 1.1050124168395996, "learning_rate": 2e-05, "loss": 0.03854794, "step": 11694 }, { "epoch": 23.39, "grad_norm": 1.3092459440231323, "learning_rate": 2e-05, "loss": 0.04546222, "step": 11695 }, { "epoch": 23.392, "grad_norm": 1.677419900894165, "learning_rate": 2e-05, "loss": 0.05057566, "step": 11696 }, { "epoch": 23.394, "grad_norm": 1.4629788398742676, "learning_rate": 2e-05, "loss": 0.04984573, "step": 11697 }, { "epoch": 23.396, "grad_norm": 1.3752899169921875, "learning_rate": 2e-05, "loss": 0.05107006, "step": 11698 }, { "epoch": 23.398, "grad_norm": 1.6810131072998047, "learning_rate": 2e-05, "loss": 0.04762748, "step": 11699 }, { "epoch": 23.4, "grad_norm": 1.1630942821502686, "learning_rate": 2e-05, "loss": 0.0471971, "step": 11700 }, { "epoch": 23.402, "grad_norm": 1.2814292907714844, "learning_rate": 2e-05, "loss": 0.04152463, "step": 11701 }, { "epoch": 23.404, "grad_norm": 0.8314868211746216, "learning_rate": 2e-05, "loss": 0.03304271, "step": 11702 }, { "epoch": 23.406, "grad_norm": 1.9218738079071045, "learning_rate": 2e-05, "loss": 0.04656883, "step": 11703 }, { "epoch": 23.408, "grad_norm": 1.8409647941589355, "learning_rate": 2e-05, "loss": 0.05018547, "step": 11704 }, { "epoch": 23.41, "grad_norm": 1.3367799520492554, "learning_rate": 2e-05, "loss": 0.04093, "step": 11705 }, { "epoch": 23.412, "grad_norm": 1.148029088973999, "learning_rate": 2e-05, "loss": 0.03622163, "step": 11706 }, { "epoch": 23.414, "grad_norm": 2.034200668334961, "learning_rate": 2e-05, "loss": 0.05376649, "step": 11707 }, { "epoch": 23.416, "grad_norm": 1.0153529644012451, "learning_rate": 2e-05, "loss": 0.04229759, "step": 11708 }, { "epoch": 23.418, "grad_norm": 2.8429248332977295, "learning_rate": 2e-05, "loss": 0.06880462, "step": 11709 }, { "epoch": 23.42, "grad_norm": 1.5293234586715698, "learning_rate": 2e-05, "loss": 0.04994109, "step": 11710 }, { "epoch": 23.422, "grad_norm": 0.9945678114891052, "learning_rate": 2e-05, "loss": 0.04129409, "step": 11711 }, { "epoch": 23.424, "grad_norm": 1.2656761407852173, "learning_rate": 2e-05, "loss": 0.04188651, "step": 11712 }, { "epoch": 23.426, "grad_norm": 1.0823739767074585, "learning_rate": 2e-05, "loss": 0.04094054, "step": 11713 }, { "epoch": 23.428, "grad_norm": 1.1941206455230713, "learning_rate": 2e-05, "loss": 0.05164913, "step": 11714 }, { "epoch": 23.43, "grad_norm": 1.3087862730026245, "learning_rate": 2e-05, "loss": 0.04426258, "step": 11715 }, { "epoch": 23.432, "grad_norm": 1.0260250568389893, "learning_rate": 2e-05, "loss": 0.03790071, "step": 11716 }, { "epoch": 23.434, "grad_norm": 2.0383365154266357, "learning_rate": 2e-05, "loss": 0.06778058, "step": 11717 }, { "epoch": 23.436, "grad_norm": 1.0122857093811035, "learning_rate": 2e-05, "loss": 0.03338874, "step": 11718 }, { "epoch": 23.438, "grad_norm": 1.064690351486206, "learning_rate": 2e-05, "loss": 0.03518835, "step": 11719 }, { "epoch": 23.44, "grad_norm": 1.067372441291809, "learning_rate": 2e-05, "loss": 0.04104721, "step": 11720 }, { "epoch": 23.442, "grad_norm": 2.22314453125, "learning_rate": 2e-05, "loss": 0.05303524, "step": 11721 }, { "epoch": 23.444, "grad_norm": 0.8205355405807495, "learning_rate": 2e-05, "loss": 0.03085201, "step": 11722 }, { "epoch": 23.446, "grad_norm": 0.8851141333580017, "learning_rate": 2e-05, "loss": 0.04088315, "step": 11723 }, { "epoch": 23.448, "grad_norm": 0.8439245820045471, "learning_rate": 2e-05, "loss": 0.03267584, "step": 11724 }, { "epoch": 23.45, "grad_norm": 1.1292567253112793, "learning_rate": 2e-05, "loss": 0.03631708, "step": 11725 }, { "epoch": 23.452, "grad_norm": 2.2150816917419434, "learning_rate": 2e-05, "loss": 0.05653385, "step": 11726 }, { "epoch": 23.454, "grad_norm": 1.1721841096878052, "learning_rate": 2e-05, "loss": 0.04569966, "step": 11727 }, { "epoch": 23.456, "grad_norm": 1.8009384870529175, "learning_rate": 2e-05, "loss": 0.04997317, "step": 11728 }, { "epoch": 23.458, "grad_norm": 2.156970977783203, "learning_rate": 2e-05, "loss": 0.03871626, "step": 11729 }, { "epoch": 23.46, "grad_norm": 1.5007227659225464, "learning_rate": 2e-05, "loss": 0.05501127, "step": 11730 }, { "epoch": 23.462, "grad_norm": 1.2243270874023438, "learning_rate": 2e-05, "loss": 0.03410867, "step": 11731 }, { "epoch": 23.464, "grad_norm": 1.2385157346725464, "learning_rate": 2e-05, "loss": 0.04714824, "step": 11732 }, { "epoch": 23.466, "grad_norm": 1.1942919492721558, "learning_rate": 2e-05, "loss": 0.05242515, "step": 11733 }, { "epoch": 23.468, "grad_norm": 1.3962806463241577, "learning_rate": 2e-05, "loss": 0.03354634, "step": 11734 }, { "epoch": 23.47, "grad_norm": 3.531313896179199, "learning_rate": 2e-05, "loss": 0.04878384, "step": 11735 }, { "epoch": 23.472, "grad_norm": 1.5076217651367188, "learning_rate": 2e-05, "loss": 0.04809828, "step": 11736 }, { "epoch": 23.474, "grad_norm": 1.497107744216919, "learning_rate": 2e-05, "loss": 0.06067564, "step": 11737 }, { "epoch": 23.476, "grad_norm": 2.0633797645568848, "learning_rate": 2e-05, "loss": 0.05666287, "step": 11738 }, { "epoch": 23.478, "grad_norm": 1.43509840965271, "learning_rate": 2e-05, "loss": 0.06193584, "step": 11739 }, { "epoch": 23.48, "grad_norm": 1.2016088962554932, "learning_rate": 2e-05, "loss": 0.05340843, "step": 11740 }, { "epoch": 23.482, "grad_norm": 3.1868913173675537, "learning_rate": 2e-05, "loss": 0.06024293, "step": 11741 }, { "epoch": 23.484, "grad_norm": 1.1516119241714478, "learning_rate": 2e-05, "loss": 0.0561817, "step": 11742 }, { "epoch": 23.486, "grad_norm": 1.2162227630615234, "learning_rate": 2e-05, "loss": 0.0495167, "step": 11743 }, { "epoch": 23.488, "grad_norm": 1.0048890113830566, "learning_rate": 2e-05, "loss": 0.04121789, "step": 11744 }, { "epoch": 23.49, "grad_norm": 1.1558607816696167, "learning_rate": 2e-05, "loss": 0.04298407, "step": 11745 }, { "epoch": 23.492, "grad_norm": 1.695831537246704, "learning_rate": 2e-05, "loss": 0.04677217, "step": 11746 }, { "epoch": 23.494, "grad_norm": 1.328750729560852, "learning_rate": 2e-05, "loss": 0.04823466, "step": 11747 }, { "epoch": 23.496, "grad_norm": 1.229340672492981, "learning_rate": 2e-05, "loss": 0.05476249, "step": 11748 }, { "epoch": 23.498, "grad_norm": 1.466972827911377, "learning_rate": 2e-05, "loss": 0.05146263, "step": 11749 }, { "epoch": 23.5, "grad_norm": 1.397045373916626, "learning_rate": 2e-05, "loss": 0.03999738, "step": 11750 }, { "epoch": 23.502, "grad_norm": 1.045440673828125, "learning_rate": 2e-05, "loss": 0.04604465, "step": 11751 }, { "epoch": 23.504, "grad_norm": 1.059058427810669, "learning_rate": 2e-05, "loss": 0.0395628, "step": 11752 }, { "epoch": 23.506, "grad_norm": 1.169434905052185, "learning_rate": 2e-05, "loss": 0.05249533, "step": 11753 }, { "epoch": 23.508, "grad_norm": 2.695093870162964, "learning_rate": 2e-05, "loss": 0.06053265, "step": 11754 }, { "epoch": 23.51, "grad_norm": 1.384024739265442, "learning_rate": 2e-05, "loss": 0.06655477, "step": 11755 }, { "epoch": 23.512, "grad_norm": 1.1353545188903809, "learning_rate": 2e-05, "loss": 0.03866877, "step": 11756 }, { "epoch": 23.514, "grad_norm": 1.3949236869812012, "learning_rate": 2e-05, "loss": 0.05025262, "step": 11757 }, { "epoch": 23.516, "grad_norm": 1.613383173942566, "learning_rate": 2e-05, "loss": 0.04375897, "step": 11758 }, { "epoch": 23.518, "grad_norm": 0.9528374671936035, "learning_rate": 2e-05, "loss": 0.04427399, "step": 11759 }, { "epoch": 23.52, "grad_norm": 1.153380274772644, "learning_rate": 2e-05, "loss": 0.05213205, "step": 11760 }, { "epoch": 23.522, "grad_norm": 1.8406866788864136, "learning_rate": 2e-05, "loss": 0.05012078, "step": 11761 }, { "epoch": 23.524, "grad_norm": 0.9942479133605957, "learning_rate": 2e-05, "loss": 0.03646605, "step": 11762 }, { "epoch": 23.526, "grad_norm": 1.291311264038086, "learning_rate": 2e-05, "loss": 0.02971187, "step": 11763 }, { "epoch": 23.528, "grad_norm": 1.2812575101852417, "learning_rate": 2e-05, "loss": 0.04475303, "step": 11764 }, { "epoch": 23.53, "grad_norm": 1.380100131034851, "learning_rate": 2e-05, "loss": 0.05594578, "step": 11765 }, { "epoch": 23.532, "grad_norm": 1.386405348777771, "learning_rate": 2e-05, "loss": 0.04840854, "step": 11766 }, { "epoch": 23.534, "grad_norm": 0.8896753787994385, "learning_rate": 2e-05, "loss": 0.03411419, "step": 11767 }, { "epoch": 23.536, "grad_norm": 1.3684544563293457, "learning_rate": 2e-05, "loss": 0.06168128, "step": 11768 }, { "epoch": 23.538, "grad_norm": 1.0067439079284668, "learning_rate": 2e-05, "loss": 0.04723697, "step": 11769 }, { "epoch": 23.54, "grad_norm": 1.6060326099395752, "learning_rate": 2e-05, "loss": 0.05475333, "step": 11770 }, { "epoch": 23.542, "grad_norm": 1.3384634256362915, "learning_rate": 2e-05, "loss": 0.04966965, "step": 11771 }, { "epoch": 23.544, "grad_norm": 0.91178297996521, "learning_rate": 2e-05, "loss": 0.029213, "step": 11772 }, { "epoch": 23.546, "grad_norm": 1.450881838798523, "learning_rate": 2e-05, "loss": 0.05454703, "step": 11773 }, { "epoch": 23.548000000000002, "grad_norm": 2.4632232189178467, "learning_rate": 2e-05, "loss": 0.05430477, "step": 11774 }, { "epoch": 23.55, "grad_norm": 1.1012457609176636, "learning_rate": 2e-05, "loss": 0.04569916, "step": 11775 }, { "epoch": 23.552, "grad_norm": 1.1762793064117432, "learning_rate": 2e-05, "loss": 0.04588043, "step": 11776 }, { "epoch": 23.554, "grad_norm": 0.906623363494873, "learning_rate": 2e-05, "loss": 0.03874638, "step": 11777 }, { "epoch": 23.556, "grad_norm": 2.751420259475708, "learning_rate": 2e-05, "loss": 0.05000962, "step": 11778 }, { "epoch": 23.558, "grad_norm": 1.063916802406311, "learning_rate": 2e-05, "loss": 0.03942411, "step": 11779 }, { "epoch": 23.56, "grad_norm": 1.1969093084335327, "learning_rate": 2e-05, "loss": 0.05103308, "step": 11780 }, { "epoch": 23.562, "grad_norm": 1.7806241512298584, "learning_rate": 2e-05, "loss": 0.05435123, "step": 11781 }, { "epoch": 23.564, "grad_norm": 1.3605821132659912, "learning_rate": 2e-05, "loss": 0.03649136, "step": 11782 }, { "epoch": 23.566, "grad_norm": 1.7655729055404663, "learning_rate": 2e-05, "loss": 0.04097831, "step": 11783 }, { "epoch": 23.568, "grad_norm": 1.388277530670166, "learning_rate": 2e-05, "loss": 0.0489667, "step": 11784 }, { "epoch": 23.57, "grad_norm": 1.4355459213256836, "learning_rate": 2e-05, "loss": 0.05197411, "step": 11785 }, { "epoch": 23.572, "grad_norm": 1.0454986095428467, "learning_rate": 2e-05, "loss": 0.04989415, "step": 11786 }, { "epoch": 23.574, "grad_norm": 2.4582231044769287, "learning_rate": 2e-05, "loss": 0.03980158, "step": 11787 }, { "epoch": 23.576, "grad_norm": 1.1055392026901245, "learning_rate": 2e-05, "loss": 0.0397646, "step": 11788 }, { "epoch": 23.578, "grad_norm": 1.476279377937317, "learning_rate": 2e-05, "loss": 0.04830015, "step": 11789 }, { "epoch": 23.58, "grad_norm": 1.1403915882110596, "learning_rate": 2e-05, "loss": 0.03799029, "step": 11790 }, { "epoch": 23.582, "grad_norm": 1.5290175676345825, "learning_rate": 2e-05, "loss": 0.04618038, "step": 11791 }, { "epoch": 23.584, "grad_norm": 1.0033074617385864, "learning_rate": 2e-05, "loss": 0.0429245, "step": 11792 }, { "epoch": 23.586, "grad_norm": 1.131286382675171, "learning_rate": 2e-05, "loss": 0.04294648, "step": 11793 }, { "epoch": 23.588, "grad_norm": 0.9808322787284851, "learning_rate": 2e-05, "loss": 0.04869968, "step": 11794 }, { "epoch": 23.59, "grad_norm": 1.3313595056533813, "learning_rate": 2e-05, "loss": 0.04685473, "step": 11795 }, { "epoch": 23.592, "grad_norm": 1.3252618312835693, "learning_rate": 2e-05, "loss": 0.03575489, "step": 11796 }, { "epoch": 23.594, "grad_norm": 2.706871509552002, "learning_rate": 2e-05, "loss": 0.0328575, "step": 11797 }, { "epoch": 23.596, "grad_norm": 1.5286343097686768, "learning_rate": 2e-05, "loss": 0.06336575, "step": 11798 }, { "epoch": 23.598, "grad_norm": 3.0045270919799805, "learning_rate": 2e-05, "loss": 0.05927711, "step": 11799 }, { "epoch": 23.6, "grad_norm": 0.9562656283378601, "learning_rate": 2e-05, "loss": 0.03408673, "step": 11800 }, { "epoch": 23.602, "grad_norm": 1.2885017395019531, "learning_rate": 2e-05, "loss": 0.05612068, "step": 11801 }, { "epoch": 23.604, "grad_norm": 1.1519379615783691, "learning_rate": 2e-05, "loss": 0.04493074, "step": 11802 }, { "epoch": 23.606, "grad_norm": 1.654046654701233, "learning_rate": 2e-05, "loss": 0.04403114, "step": 11803 }, { "epoch": 23.608, "grad_norm": 1.0165921449661255, "learning_rate": 2e-05, "loss": 0.04292047, "step": 11804 }, { "epoch": 23.61, "grad_norm": 1.532755732536316, "learning_rate": 2e-05, "loss": 0.04846454, "step": 11805 }, { "epoch": 23.612, "grad_norm": 1.1519556045532227, "learning_rate": 2e-05, "loss": 0.04868887, "step": 11806 }, { "epoch": 23.614, "grad_norm": 1.8565826416015625, "learning_rate": 2e-05, "loss": 0.05638482, "step": 11807 }, { "epoch": 23.616, "grad_norm": 1.0234509706497192, "learning_rate": 2e-05, "loss": 0.03301515, "step": 11808 }, { "epoch": 23.618, "grad_norm": 0.8989819884300232, "learning_rate": 2e-05, "loss": 0.0356473, "step": 11809 }, { "epoch": 23.62, "grad_norm": 1.0497586727142334, "learning_rate": 2e-05, "loss": 0.04663119, "step": 11810 }, { "epoch": 23.622, "grad_norm": 1.7920395135879517, "learning_rate": 2e-05, "loss": 0.05507524, "step": 11811 }, { "epoch": 23.624, "grad_norm": 1.1872884035110474, "learning_rate": 2e-05, "loss": 0.03647828, "step": 11812 }, { "epoch": 23.626, "grad_norm": 1.3653554916381836, "learning_rate": 2e-05, "loss": 0.03552261, "step": 11813 }, { "epoch": 23.628, "grad_norm": 1.036534309387207, "learning_rate": 2e-05, "loss": 0.04042344, "step": 11814 }, { "epoch": 23.63, "grad_norm": 1.2500540018081665, "learning_rate": 2e-05, "loss": 0.04299323, "step": 11815 }, { "epoch": 23.632, "grad_norm": 1.2500555515289307, "learning_rate": 2e-05, "loss": 0.04927852, "step": 11816 }, { "epoch": 23.634, "grad_norm": 1.5942015647888184, "learning_rate": 2e-05, "loss": 0.04399975, "step": 11817 }, { "epoch": 23.636, "grad_norm": 1.6040215492248535, "learning_rate": 2e-05, "loss": 0.05214205, "step": 11818 }, { "epoch": 23.638, "grad_norm": 1.375636339187622, "learning_rate": 2e-05, "loss": 0.04437149, "step": 11819 }, { "epoch": 23.64, "grad_norm": 3.817338228225708, "learning_rate": 2e-05, "loss": 0.05615721, "step": 11820 }, { "epoch": 23.642, "grad_norm": 1.073360562324524, "learning_rate": 2e-05, "loss": 0.0375028, "step": 11821 }, { "epoch": 23.644, "grad_norm": 1.9736756086349487, "learning_rate": 2e-05, "loss": 0.05358336, "step": 11822 }, { "epoch": 23.646, "grad_norm": 1.1594146490097046, "learning_rate": 2e-05, "loss": 0.05045512, "step": 11823 }, { "epoch": 23.648, "grad_norm": 1.4118045568466187, "learning_rate": 2e-05, "loss": 0.04839828, "step": 11824 }, { "epoch": 23.65, "grad_norm": 1.1874898672103882, "learning_rate": 2e-05, "loss": 0.04599501, "step": 11825 }, { "epoch": 23.652, "grad_norm": 1.9041502475738525, "learning_rate": 2e-05, "loss": 0.04374641, "step": 11826 }, { "epoch": 23.654, "grad_norm": 2.0155675411224365, "learning_rate": 2e-05, "loss": 0.06777727, "step": 11827 }, { "epoch": 23.656, "grad_norm": 1.5998084545135498, "learning_rate": 2e-05, "loss": 0.04298867, "step": 11828 }, { "epoch": 23.658, "grad_norm": 1.1356955766677856, "learning_rate": 2e-05, "loss": 0.042227, "step": 11829 }, { "epoch": 23.66, "grad_norm": 1.096351981163025, "learning_rate": 2e-05, "loss": 0.04028612, "step": 11830 }, { "epoch": 23.662, "grad_norm": 1.2924623489379883, "learning_rate": 2e-05, "loss": 0.04348958, "step": 11831 }, { "epoch": 23.664, "grad_norm": 0.9682716727256775, "learning_rate": 2e-05, "loss": 0.02754121, "step": 11832 }, { "epoch": 23.666, "grad_norm": 1.1650999784469604, "learning_rate": 2e-05, "loss": 0.06063723, "step": 11833 }, { "epoch": 23.668, "grad_norm": 2.5099270343780518, "learning_rate": 2e-05, "loss": 0.05424255, "step": 11834 }, { "epoch": 23.67, "grad_norm": 1.1080573797225952, "learning_rate": 2e-05, "loss": 0.04906466, "step": 11835 }, { "epoch": 23.672, "grad_norm": 1.1627823114395142, "learning_rate": 2e-05, "loss": 0.05064415, "step": 11836 }, { "epoch": 23.674, "grad_norm": 0.9511369466781616, "learning_rate": 2e-05, "loss": 0.03248414, "step": 11837 }, { "epoch": 23.676, "grad_norm": 0.9693873524665833, "learning_rate": 2e-05, "loss": 0.03603654, "step": 11838 }, { "epoch": 23.678, "grad_norm": 1.150111198425293, "learning_rate": 2e-05, "loss": 0.05226473, "step": 11839 }, { "epoch": 23.68, "grad_norm": 2.6451776027679443, "learning_rate": 2e-05, "loss": 0.04576471, "step": 11840 }, { "epoch": 23.682, "grad_norm": 1.0278831720352173, "learning_rate": 2e-05, "loss": 0.04985963, "step": 11841 }, { "epoch": 23.684, "grad_norm": 1.8761433362960815, "learning_rate": 2e-05, "loss": 0.04240131, "step": 11842 }, { "epoch": 23.686, "grad_norm": 1.3320919275283813, "learning_rate": 2e-05, "loss": 0.0409328, "step": 11843 }, { "epoch": 23.688, "grad_norm": 1.6709333658218384, "learning_rate": 2e-05, "loss": 0.05572405, "step": 11844 }, { "epoch": 23.69, "grad_norm": 1.7668256759643555, "learning_rate": 2e-05, "loss": 0.0600642, "step": 11845 }, { "epoch": 23.692, "grad_norm": 1.149810552597046, "learning_rate": 2e-05, "loss": 0.056114, "step": 11846 }, { "epoch": 23.694, "grad_norm": 2.03243088722229, "learning_rate": 2e-05, "loss": 0.0506327, "step": 11847 }, { "epoch": 23.696, "grad_norm": 0.8601809740066528, "learning_rate": 2e-05, "loss": 0.03495654, "step": 11848 }, { "epoch": 23.698, "grad_norm": 1.5391650199890137, "learning_rate": 2e-05, "loss": 0.05761977, "step": 11849 }, { "epoch": 23.7, "grad_norm": 1.2558999061584473, "learning_rate": 2e-05, "loss": 0.05472016, "step": 11850 }, { "epoch": 23.701999999999998, "grad_norm": 1.312123417854309, "learning_rate": 2e-05, "loss": 0.04737009, "step": 11851 }, { "epoch": 23.704, "grad_norm": 1.7144548892974854, "learning_rate": 2e-05, "loss": 0.04608443, "step": 11852 }, { "epoch": 23.706, "grad_norm": 2.0079281330108643, "learning_rate": 2e-05, "loss": 0.05733472, "step": 11853 }, { "epoch": 23.708, "grad_norm": 1.4179093837738037, "learning_rate": 2e-05, "loss": 0.04972552, "step": 11854 }, { "epoch": 23.71, "grad_norm": 0.9516226649284363, "learning_rate": 2e-05, "loss": 0.03760123, "step": 11855 }, { "epoch": 23.712, "grad_norm": 1.7413215637207031, "learning_rate": 2e-05, "loss": 0.03998146, "step": 11856 }, { "epoch": 23.714, "grad_norm": 1.10871422290802, "learning_rate": 2e-05, "loss": 0.05418255, "step": 11857 }, { "epoch": 23.716, "grad_norm": 1.0018901824951172, "learning_rate": 2e-05, "loss": 0.04085057, "step": 11858 }, { "epoch": 23.718, "grad_norm": 1.0174988508224487, "learning_rate": 2e-05, "loss": 0.04316361, "step": 11859 }, { "epoch": 23.72, "grad_norm": 1.4634778499603271, "learning_rate": 2e-05, "loss": 0.03112759, "step": 11860 }, { "epoch": 23.722, "grad_norm": 0.9857693910598755, "learning_rate": 2e-05, "loss": 0.02957954, "step": 11861 }, { "epoch": 23.724, "grad_norm": 1.380418300628662, "learning_rate": 2e-05, "loss": 0.05130707, "step": 11862 }, { "epoch": 23.726, "grad_norm": 1.2355725765228271, "learning_rate": 2e-05, "loss": 0.04811347, "step": 11863 }, { "epoch": 23.728, "grad_norm": 1.304316759109497, "learning_rate": 2e-05, "loss": 0.04496055, "step": 11864 }, { "epoch": 23.73, "grad_norm": 4.475430488586426, "learning_rate": 2e-05, "loss": 0.05949744, "step": 11865 }, { "epoch": 23.732, "grad_norm": 1.322509765625, "learning_rate": 2e-05, "loss": 0.05688853, "step": 11866 }, { "epoch": 23.734, "grad_norm": 1.3185429573059082, "learning_rate": 2e-05, "loss": 0.06079708, "step": 11867 }, { "epoch": 23.736, "grad_norm": 1.5176721811294556, "learning_rate": 2e-05, "loss": 0.03962064, "step": 11868 }, { "epoch": 23.738, "grad_norm": 1.6204832792282104, "learning_rate": 2e-05, "loss": 0.04753038, "step": 11869 }, { "epoch": 23.74, "grad_norm": 1.3147227764129639, "learning_rate": 2e-05, "loss": 0.04551903, "step": 11870 }, { "epoch": 23.742, "grad_norm": 1.7578351497650146, "learning_rate": 2e-05, "loss": 0.05363294, "step": 11871 }, { "epoch": 23.744, "grad_norm": 0.9598556160926819, "learning_rate": 2e-05, "loss": 0.03397725, "step": 11872 }, { "epoch": 23.746, "grad_norm": 1.1836851835250854, "learning_rate": 2e-05, "loss": 0.04913048, "step": 11873 }, { "epoch": 23.748, "grad_norm": 1.6102029085159302, "learning_rate": 2e-05, "loss": 0.05629463, "step": 11874 }, { "epoch": 23.75, "grad_norm": 1.4498350620269775, "learning_rate": 2e-05, "loss": 0.05195166, "step": 11875 }, { "epoch": 23.752, "grad_norm": 1.0940892696380615, "learning_rate": 2e-05, "loss": 0.03517435, "step": 11876 }, { "epoch": 23.754, "grad_norm": 3.3928565979003906, "learning_rate": 2e-05, "loss": 0.05469337, "step": 11877 }, { "epoch": 23.756, "grad_norm": 0.9056026935577393, "learning_rate": 2e-05, "loss": 0.04215866, "step": 11878 }, { "epoch": 23.758, "grad_norm": 0.9843959212303162, "learning_rate": 2e-05, "loss": 0.04789463, "step": 11879 }, { "epoch": 23.76, "grad_norm": 1.1080602407455444, "learning_rate": 2e-05, "loss": 0.05793828, "step": 11880 }, { "epoch": 23.762, "grad_norm": 4.302292823791504, "learning_rate": 2e-05, "loss": 0.05191319, "step": 11881 }, { "epoch": 23.764, "grad_norm": 1.3174184560775757, "learning_rate": 2e-05, "loss": 0.07129567, "step": 11882 }, { "epoch": 23.766, "grad_norm": 1.0062744617462158, "learning_rate": 2e-05, "loss": 0.04534592, "step": 11883 }, { "epoch": 23.768, "grad_norm": 1.857393741607666, "learning_rate": 2e-05, "loss": 0.04138853, "step": 11884 }, { "epoch": 23.77, "grad_norm": 2.4198994636535645, "learning_rate": 2e-05, "loss": 0.04509052, "step": 11885 }, { "epoch": 23.772, "grad_norm": 1.1149333715438843, "learning_rate": 2e-05, "loss": 0.04891042, "step": 11886 }, { "epoch": 23.774, "grad_norm": 1.0539963245391846, "learning_rate": 2e-05, "loss": 0.04189912, "step": 11887 }, { "epoch": 23.776, "grad_norm": 1.492944598197937, "learning_rate": 2e-05, "loss": 0.04360221, "step": 11888 }, { "epoch": 23.778, "grad_norm": 1.2657629251480103, "learning_rate": 2e-05, "loss": 0.05082548, "step": 11889 }, { "epoch": 23.78, "grad_norm": 1.0405988693237305, "learning_rate": 2e-05, "loss": 0.05276818, "step": 11890 }, { "epoch": 23.782, "grad_norm": 1.5013107061386108, "learning_rate": 2e-05, "loss": 0.04806553, "step": 11891 }, { "epoch": 23.784, "grad_norm": 1.1761319637298584, "learning_rate": 2e-05, "loss": 0.03785075, "step": 11892 }, { "epoch": 23.786, "grad_norm": 1.2787251472473145, "learning_rate": 2e-05, "loss": 0.04691942, "step": 11893 }, { "epoch": 23.788, "grad_norm": 1.1957424879074097, "learning_rate": 2e-05, "loss": 0.04961814, "step": 11894 }, { "epoch": 23.79, "grad_norm": 1.2349203824996948, "learning_rate": 2e-05, "loss": 0.05131783, "step": 11895 }, { "epoch": 23.792, "grad_norm": 1.2640782594680786, "learning_rate": 2e-05, "loss": 0.0484584, "step": 11896 }, { "epoch": 23.794, "grad_norm": 1.1216305494308472, "learning_rate": 2e-05, "loss": 0.05318472, "step": 11897 }, { "epoch": 23.796, "grad_norm": 1.0679423809051514, "learning_rate": 2e-05, "loss": 0.04826304, "step": 11898 }, { "epoch": 23.798000000000002, "grad_norm": 1.3413798809051514, "learning_rate": 2e-05, "loss": 0.0390526, "step": 11899 }, { "epoch": 23.8, "grad_norm": 1.5896494388580322, "learning_rate": 2e-05, "loss": 0.06722755, "step": 11900 }, { "epoch": 23.802, "grad_norm": 1.069589376449585, "learning_rate": 2e-05, "loss": 0.04511658, "step": 11901 }, { "epoch": 23.804, "grad_norm": 1.047317385673523, "learning_rate": 2e-05, "loss": 0.04551696, "step": 11902 }, { "epoch": 23.806, "grad_norm": 1.6583493947982788, "learning_rate": 2e-05, "loss": 0.05105167, "step": 11903 }, { "epoch": 23.808, "grad_norm": 0.7742308378219604, "learning_rate": 2e-05, "loss": 0.02657994, "step": 11904 }, { "epoch": 23.81, "grad_norm": 0.9878618121147156, "learning_rate": 2e-05, "loss": 0.04328071, "step": 11905 }, { "epoch": 23.812, "grad_norm": 1.1335018873214722, "learning_rate": 2e-05, "loss": 0.02694547, "step": 11906 }, { "epoch": 23.814, "grad_norm": 1.267176866531372, "learning_rate": 2e-05, "loss": 0.04324171, "step": 11907 }, { "epoch": 23.816, "grad_norm": 1.0298340320587158, "learning_rate": 2e-05, "loss": 0.03797846, "step": 11908 }, { "epoch": 23.818, "grad_norm": 1.1247707605361938, "learning_rate": 2e-05, "loss": 0.04231847, "step": 11909 }, { "epoch": 23.82, "grad_norm": 1.139298439025879, "learning_rate": 2e-05, "loss": 0.0485975, "step": 11910 }, { "epoch": 23.822, "grad_norm": 0.9817166328430176, "learning_rate": 2e-05, "loss": 0.04429562, "step": 11911 }, { "epoch": 23.824, "grad_norm": 1.2149447202682495, "learning_rate": 2e-05, "loss": 0.04330052, "step": 11912 }, { "epoch": 23.826, "grad_norm": 1.6649208068847656, "learning_rate": 2e-05, "loss": 0.04536702, "step": 11913 }, { "epoch": 23.828, "grad_norm": 1.2904493808746338, "learning_rate": 2e-05, "loss": 0.04782952, "step": 11914 }, { "epoch": 23.83, "grad_norm": 0.8702771067619324, "learning_rate": 2e-05, "loss": 0.03365371, "step": 11915 }, { "epoch": 23.832, "grad_norm": 1.2357491254806519, "learning_rate": 2e-05, "loss": 0.05077953, "step": 11916 }, { "epoch": 23.834, "grad_norm": 1.2721084356307983, "learning_rate": 2e-05, "loss": 0.06241481, "step": 11917 }, { "epoch": 23.836, "grad_norm": 1.3370394706726074, "learning_rate": 2e-05, "loss": 0.04384567, "step": 11918 }, { "epoch": 23.838, "grad_norm": 1.7228367328643799, "learning_rate": 2e-05, "loss": 0.05038276, "step": 11919 }, { "epoch": 23.84, "grad_norm": 1.3409298658370972, "learning_rate": 2e-05, "loss": 0.05658997, "step": 11920 }, { "epoch": 23.842, "grad_norm": 0.9199585318565369, "learning_rate": 2e-05, "loss": 0.03119808, "step": 11921 }, { "epoch": 23.844, "grad_norm": 1.469512939453125, "learning_rate": 2e-05, "loss": 0.03764893, "step": 11922 }, { "epoch": 23.846, "grad_norm": 1.3349125385284424, "learning_rate": 2e-05, "loss": 0.0509558, "step": 11923 }, { "epoch": 23.848, "grad_norm": 1.7401875257492065, "learning_rate": 2e-05, "loss": 0.04575799, "step": 11924 }, { "epoch": 23.85, "grad_norm": 2.0595452785491943, "learning_rate": 2e-05, "loss": 0.05447005, "step": 11925 }, { "epoch": 23.852, "grad_norm": 1.3824564218521118, "learning_rate": 2e-05, "loss": 0.0622148, "step": 11926 }, { "epoch": 23.854, "grad_norm": 1.3096190690994263, "learning_rate": 2e-05, "loss": 0.05290907, "step": 11927 }, { "epoch": 23.856, "grad_norm": 1.2597686052322388, "learning_rate": 2e-05, "loss": 0.03451017, "step": 11928 }, { "epoch": 23.858, "grad_norm": 1.2595397233963013, "learning_rate": 2e-05, "loss": 0.04568423, "step": 11929 }, { "epoch": 23.86, "grad_norm": 0.9130263924598694, "learning_rate": 2e-05, "loss": 0.03933413, "step": 11930 }, { "epoch": 23.862, "grad_norm": 0.9221342206001282, "learning_rate": 2e-05, "loss": 0.02932182, "step": 11931 }, { "epoch": 23.864, "grad_norm": 1.0367698669433594, "learning_rate": 2e-05, "loss": 0.04635197, "step": 11932 }, { "epoch": 23.866, "grad_norm": 1.4612818956375122, "learning_rate": 2e-05, "loss": 0.04787438, "step": 11933 }, { "epoch": 23.868, "grad_norm": 1.5755794048309326, "learning_rate": 2e-05, "loss": 0.04672352, "step": 11934 }, { "epoch": 23.87, "grad_norm": 1.3383821249008179, "learning_rate": 2e-05, "loss": 0.05725973, "step": 11935 }, { "epoch": 23.872, "grad_norm": 1.0424549579620361, "learning_rate": 2e-05, "loss": 0.04141665, "step": 11936 }, { "epoch": 23.874, "grad_norm": 2.813077211380005, "learning_rate": 2e-05, "loss": 0.03540419, "step": 11937 }, { "epoch": 23.876, "grad_norm": 1.1614508628845215, "learning_rate": 2e-05, "loss": 0.05224334, "step": 11938 }, { "epoch": 23.878, "grad_norm": 0.9924662709236145, "learning_rate": 2e-05, "loss": 0.03554678, "step": 11939 }, { "epoch": 23.88, "grad_norm": 1.9446020126342773, "learning_rate": 2e-05, "loss": 0.05558904, "step": 11940 }, { "epoch": 23.882, "grad_norm": 0.8060851097106934, "learning_rate": 2e-05, "loss": 0.03003936, "step": 11941 }, { "epoch": 23.884, "grad_norm": 0.9981212615966797, "learning_rate": 2e-05, "loss": 0.04622617, "step": 11942 }, { "epoch": 23.886, "grad_norm": 0.9862149357795715, "learning_rate": 2e-05, "loss": 0.03979708, "step": 11943 }, { "epoch": 23.888, "grad_norm": 1.810752034187317, "learning_rate": 2e-05, "loss": 0.06706151, "step": 11944 }, { "epoch": 23.89, "grad_norm": 1.2348815202713013, "learning_rate": 2e-05, "loss": 0.0331397, "step": 11945 }, { "epoch": 23.892, "grad_norm": 1.382412075996399, "learning_rate": 2e-05, "loss": 0.05707578, "step": 11946 }, { "epoch": 23.894, "grad_norm": 1.0103840827941895, "learning_rate": 2e-05, "loss": 0.04262855, "step": 11947 }, { "epoch": 23.896, "grad_norm": 0.9143460988998413, "learning_rate": 2e-05, "loss": 0.03730785, "step": 11948 }, { "epoch": 23.898, "grad_norm": 1.260980248451233, "learning_rate": 2e-05, "loss": 0.04130136, "step": 11949 }, { "epoch": 23.9, "grad_norm": 1.064088225364685, "learning_rate": 2e-05, "loss": 0.0411904, "step": 11950 }, { "epoch": 23.902, "grad_norm": 1.0175310373306274, "learning_rate": 2e-05, "loss": 0.04659095, "step": 11951 }, { "epoch": 23.904, "grad_norm": 1.177068829536438, "learning_rate": 2e-05, "loss": 0.03499759, "step": 11952 }, { "epoch": 23.906, "grad_norm": 1.0480049848556519, "learning_rate": 2e-05, "loss": 0.04051728, "step": 11953 }, { "epoch": 23.908, "grad_norm": 1.0933537483215332, "learning_rate": 2e-05, "loss": 0.03915006, "step": 11954 }, { "epoch": 23.91, "grad_norm": 1.6226089000701904, "learning_rate": 2e-05, "loss": 0.03677177, "step": 11955 }, { "epoch": 23.912, "grad_norm": 1.2117199897766113, "learning_rate": 2e-05, "loss": 0.03211826, "step": 11956 }, { "epoch": 23.914, "grad_norm": 0.9914155006408691, "learning_rate": 2e-05, "loss": 0.04490528, "step": 11957 }, { "epoch": 23.916, "grad_norm": 1.0526952743530273, "learning_rate": 2e-05, "loss": 0.04135715, "step": 11958 }, { "epoch": 23.918, "grad_norm": 2.5510969161987305, "learning_rate": 2e-05, "loss": 0.0545825, "step": 11959 }, { "epoch": 23.92, "grad_norm": 1.051850438117981, "learning_rate": 2e-05, "loss": 0.04907841, "step": 11960 }, { "epoch": 23.922, "grad_norm": 1.063859224319458, "learning_rate": 2e-05, "loss": 0.04774335, "step": 11961 }, { "epoch": 23.924, "grad_norm": 1.4770852327346802, "learning_rate": 2e-05, "loss": 0.04725373, "step": 11962 }, { "epoch": 23.926, "grad_norm": 1.5697652101516724, "learning_rate": 2e-05, "loss": 0.03698351, "step": 11963 }, { "epoch": 23.928, "grad_norm": 1.4465527534484863, "learning_rate": 2e-05, "loss": 0.04310424, "step": 11964 }, { "epoch": 23.93, "grad_norm": 2.845811367034912, "learning_rate": 2e-05, "loss": 0.05226666, "step": 11965 }, { "epoch": 23.932, "grad_norm": 1.4219388961791992, "learning_rate": 2e-05, "loss": 0.0405181, "step": 11966 }, { "epoch": 23.934, "grad_norm": 1.1188178062438965, "learning_rate": 2e-05, "loss": 0.0376815, "step": 11967 }, { "epoch": 23.936, "grad_norm": 1.8365991115570068, "learning_rate": 2e-05, "loss": 0.0682129, "step": 11968 }, { "epoch": 23.938, "grad_norm": 1.6770678758621216, "learning_rate": 2e-05, "loss": 0.04739454, "step": 11969 }, { "epoch": 23.94, "grad_norm": 4.062193393707275, "learning_rate": 2e-05, "loss": 0.04746066, "step": 11970 }, { "epoch": 23.942, "grad_norm": 2.133936882019043, "learning_rate": 2e-05, "loss": 0.06084609, "step": 11971 }, { "epoch": 23.944, "grad_norm": 0.7797431349754333, "learning_rate": 2e-05, "loss": 0.02618004, "step": 11972 }, { "epoch": 23.946, "grad_norm": 1.351352334022522, "learning_rate": 2e-05, "loss": 0.05850387, "step": 11973 }, { "epoch": 23.948, "grad_norm": 1.3763724565505981, "learning_rate": 2e-05, "loss": 0.04893008, "step": 11974 }, { "epoch": 23.95, "grad_norm": 1.1071304082870483, "learning_rate": 2e-05, "loss": 0.04671605, "step": 11975 }, { "epoch": 23.951999999999998, "grad_norm": 1.0481212139129639, "learning_rate": 2e-05, "loss": 0.04112995, "step": 11976 }, { "epoch": 23.954, "grad_norm": 1.2804176807403564, "learning_rate": 2e-05, "loss": 0.04361865, "step": 11977 }, { "epoch": 23.956, "grad_norm": 2.2980878353118896, "learning_rate": 2e-05, "loss": 0.06331573, "step": 11978 }, { "epoch": 23.958, "grad_norm": 1.1036052703857422, "learning_rate": 2e-05, "loss": 0.04371598, "step": 11979 }, { "epoch": 23.96, "grad_norm": 1.9425326585769653, "learning_rate": 2e-05, "loss": 0.05798445, "step": 11980 }, { "epoch": 23.962, "grad_norm": 1.1282036304473877, "learning_rate": 2e-05, "loss": 0.04912503, "step": 11981 }, { "epoch": 23.964, "grad_norm": 1.0347181558609009, "learning_rate": 2e-05, "loss": 0.04752541, "step": 11982 }, { "epoch": 23.966, "grad_norm": 1.747428297996521, "learning_rate": 2e-05, "loss": 0.04747181, "step": 11983 }, { "epoch": 23.968, "grad_norm": 1.9715676307678223, "learning_rate": 2e-05, "loss": 0.04411424, "step": 11984 }, { "epoch": 23.97, "grad_norm": 1.4136850833892822, "learning_rate": 2e-05, "loss": 0.04269623, "step": 11985 }, { "epoch": 23.972, "grad_norm": 0.9330921173095703, "learning_rate": 2e-05, "loss": 0.0407118, "step": 11986 }, { "epoch": 23.974, "grad_norm": 1.0181400775909424, "learning_rate": 2e-05, "loss": 0.04231521, "step": 11987 }, { "epoch": 23.976, "grad_norm": 1.2224994897842407, "learning_rate": 2e-05, "loss": 0.03689064, "step": 11988 }, { "epoch": 23.978, "grad_norm": 0.9783885478973389, "learning_rate": 2e-05, "loss": 0.03860429, "step": 11989 }, { "epoch": 23.98, "grad_norm": 1.6571763753890991, "learning_rate": 2e-05, "loss": 0.05188219, "step": 11990 }, { "epoch": 23.982, "grad_norm": 1.1738743782043457, "learning_rate": 2e-05, "loss": 0.03369866, "step": 11991 }, { "epoch": 23.984, "grad_norm": 1.2980939149856567, "learning_rate": 2e-05, "loss": 0.04903428, "step": 11992 }, { "epoch": 23.986, "grad_norm": 1.3776237964630127, "learning_rate": 2e-05, "loss": 0.04843323, "step": 11993 }, { "epoch": 23.988, "grad_norm": 1.0764142274856567, "learning_rate": 2e-05, "loss": 0.03755047, "step": 11994 }, { "epoch": 23.99, "grad_norm": 1.2749308347702026, "learning_rate": 2e-05, "loss": 0.04293784, "step": 11995 }, { "epoch": 23.992, "grad_norm": 0.8950538039207458, "learning_rate": 2e-05, "loss": 0.03797248, "step": 11996 }, { "epoch": 23.994, "grad_norm": 1.486971378326416, "learning_rate": 2e-05, "loss": 0.05342013, "step": 11997 }, { "epoch": 23.996, "grad_norm": 1.137190341949463, "learning_rate": 2e-05, "loss": 0.04190537, "step": 11998 }, { "epoch": 23.998, "grad_norm": 1.5152031183242798, "learning_rate": 2e-05, "loss": 0.0522173, "step": 11999 }, { "epoch": 24.0, "grad_norm": 1.0603041648864746, "learning_rate": 2e-05, "loss": 0.03656108, "step": 12000 }, { "epoch": 24.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9900199600798403, "Equal_1": 0.994, "Equal_2": 0.9700598802395209, "Equal_3": 0.9740518962075848, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9819639278557114, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.98, "Perpendicular_1": 1.0, "Perpendicular_2": 0.996, "Perpendicular_3": 0.875751503006012, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.992, "PointLiesOnCircle_3": 0.9892000000000001, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 227.1403, "eval_samples_per_second": 46.227, "eval_steps_per_second": 0.925, "step": 12000 }, { "epoch": 24.002, "grad_norm": 1.418009877204895, "learning_rate": 2e-05, "loss": 0.04424626, "step": 12001 }, { "epoch": 24.004, "grad_norm": 2.735375165939331, "learning_rate": 2e-05, "loss": 0.05092252, "step": 12002 }, { "epoch": 24.006, "grad_norm": 1.374438762664795, "learning_rate": 2e-05, "loss": 0.07002582, "step": 12003 }, { "epoch": 24.008, "grad_norm": 1.6659278869628906, "learning_rate": 2e-05, "loss": 0.05949622, "step": 12004 }, { "epoch": 24.01, "grad_norm": 1.0070431232452393, "learning_rate": 2e-05, "loss": 0.03810965, "step": 12005 }, { "epoch": 24.012, "grad_norm": 1.6485041379928589, "learning_rate": 2e-05, "loss": 0.03306497, "step": 12006 }, { "epoch": 24.014, "grad_norm": 1.1681909561157227, "learning_rate": 2e-05, "loss": 0.04544924, "step": 12007 }, { "epoch": 24.016, "grad_norm": 1.1167724132537842, "learning_rate": 2e-05, "loss": 0.04656851, "step": 12008 }, { "epoch": 24.018, "grad_norm": 1.1538599729537964, "learning_rate": 2e-05, "loss": 0.0492052, "step": 12009 }, { "epoch": 24.02, "grad_norm": 2.4941914081573486, "learning_rate": 2e-05, "loss": 0.06774968, "step": 12010 }, { "epoch": 24.022, "grad_norm": 2.1789300441741943, "learning_rate": 2e-05, "loss": 0.07845649, "step": 12011 }, { "epoch": 24.024, "grad_norm": 1.5828388929367065, "learning_rate": 2e-05, "loss": 0.06355821, "step": 12012 }, { "epoch": 24.026, "grad_norm": 1.3506202697753906, "learning_rate": 2e-05, "loss": 0.05469031, "step": 12013 }, { "epoch": 24.028, "grad_norm": 2.1041972637176514, "learning_rate": 2e-05, "loss": 0.06644633, "step": 12014 }, { "epoch": 24.03, "grad_norm": 1.9267951250076294, "learning_rate": 2e-05, "loss": 0.04986584, "step": 12015 }, { "epoch": 24.032, "grad_norm": 0.9378467202186584, "learning_rate": 2e-05, "loss": 0.04120469, "step": 12016 }, { "epoch": 24.034, "grad_norm": 1.1145087480545044, "learning_rate": 2e-05, "loss": 0.04812005, "step": 12017 }, { "epoch": 24.036, "grad_norm": 1.151256799697876, "learning_rate": 2e-05, "loss": 0.04657951, "step": 12018 }, { "epoch": 24.038, "grad_norm": 1.5107696056365967, "learning_rate": 2e-05, "loss": 0.06528657, "step": 12019 }, { "epoch": 24.04, "grad_norm": 1.878957986831665, "learning_rate": 2e-05, "loss": 0.07331221, "step": 12020 }, { "epoch": 24.042, "grad_norm": 1.3734325170516968, "learning_rate": 2e-05, "loss": 0.04563019, "step": 12021 }, { "epoch": 24.044, "grad_norm": 1.1970868110656738, "learning_rate": 2e-05, "loss": 0.06270801, "step": 12022 }, { "epoch": 24.046, "grad_norm": 1.0476535558700562, "learning_rate": 2e-05, "loss": 0.04935879, "step": 12023 }, { "epoch": 24.048, "grad_norm": 0.9679855704307556, "learning_rate": 2e-05, "loss": 0.04734359, "step": 12024 }, { "epoch": 24.05, "grad_norm": 1.3976882696151733, "learning_rate": 2e-05, "loss": 0.05529315, "step": 12025 }, { "epoch": 24.052, "grad_norm": 1.683671474456787, "learning_rate": 2e-05, "loss": 0.06260026, "step": 12026 }, { "epoch": 24.054, "grad_norm": 1.0987430810928345, "learning_rate": 2e-05, "loss": 0.04450016, "step": 12027 }, { "epoch": 24.056, "grad_norm": 1.2585513591766357, "learning_rate": 2e-05, "loss": 0.06503977, "step": 12028 }, { "epoch": 24.058, "grad_norm": 0.9826270937919617, "learning_rate": 2e-05, "loss": 0.04451761, "step": 12029 }, { "epoch": 24.06, "grad_norm": 1.3388910293579102, "learning_rate": 2e-05, "loss": 0.05457458, "step": 12030 }, { "epoch": 24.062, "grad_norm": 1.3059197664260864, "learning_rate": 2e-05, "loss": 0.04839645, "step": 12031 }, { "epoch": 24.064, "grad_norm": 2.5404107570648193, "learning_rate": 2e-05, "loss": 0.0595944, "step": 12032 }, { "epoch": 24.066, "grad_norm": 1.2393258810043335, "learning_rate": 2e-05, "loss": 0.05328018, "step": 12033 }, { "epoch": 24.068, "grad_norm": 3.7880096435546875, "learning_rate": 2e-05, "loss": 0.05491441, "step": 12034 }, { "epoch": 24.07, "grad_norm": 0.9895697236061096, "learning_rate": 2e-05, "loss": 0.04805033, "step": 12035 }, { "epoch": 24.072, "grad_norm": 1.159761905670166, "learning_rate": 2e-05, "loss": 0.07412002, "step": 12036 }, { "epoch": 24.074, "grad_norm": 1.4580849409103394, "learning_rate": 2e-05, "loss": 0.06479968, "step": 12037 }, { "epoch": 24.076, "grad_norm": 1.4561867713928223, "learning_rate": 2e-05, "loss": 0.06198963, "step": 12038 }, { "epoch": 24.078, "grad_norm": 1.4811429977416992, "learning_rate": 2e-05, "loss": 0.04521628, "step": 12039 }, { "epoch": 24.08, "grad_norm": 1.1370453834533691, "learning_rate": 2e-05, "loss": 0.05279215, "step": 12040 }, { "epoch": 24.082, "grad_norm": 2.09609317779541, "learning_rate": 2e-05, "loss": 0.04807444, "step": 12041 }, { "epoch": 24.084, "grad_norm": 1.4663184881210327, "learning_rate": 2e-05, "loss": 0.0751625, "step": 12042 }, { "epoch": 24.086, "grad_norm": 1.378034234046936, "learning_rate": 2e-05, "loss": 0.05994118, "step": 12043 }, { "epoch": 24.088, "grad_norm": 0.9875769019126892, "learning_rate": 2e-05, "loss": 0.0466931, "step": 12044 }, { "epoch": 24.09, "grad_norm": 1.2662602663040161, "learning_rate": 2e-05, "loss": 0.04789248, "step": 12045 }, { "epoch": 24.092, "grad_norm": 1.066401481628418, "learning_rate": 2e-05, "loss": 0.05700656, "step": 12046 }, { "epoch": 24.094, "grad_norm": 1.403649926185608, "learning_rate": 2e-05, "loss": 0.04785908, "step": 12047 }, { "epoch": 24.096, "grad_norm": 1.1198090314865112, "learning_rate": 2e-05, "loss": 0.05103398, "step": 12048 }, { "epoch": 24.098, "grad_norm": 1.5846776962280273, "learning_rate": 2e-05, "loss": 0.05311922, "step": 12049 }, { "epoch": 24.1, "grad_norm": 1.140979290008545, "learning_rate": 2e-05, "loss": 0.03832048, "step": 12050 }, { "epoch": 24.102, "grad_norm": 1.025484323501587, "learning_rate": 2e-05, "loss": 0.04976434, "step": 12051 }, { "epoch": 24.104, "grad_norm": 1.0325844287872314, "learning_rate": 2e-05, "loss": 0.04236849, "step": 12052 }, { "epoch": 24.106, "grad_norm": 1.9084160327911377, "learning_rate": 2e-05, "loss": 0.05677504, "step": 12053 }, { "epoch": 24.108, "grad_norm": 0.9093396663665771, "learning_rate": 2e-05, "loss": 0.03594617, "step": 12054 }, { "epoch": 24.11, "grad_norm": 0.9545243978500366, "learning_rate": 2e-05, "loss": 0.03141427, "step": 12055 }, { "epoch": 24.112, "grad_norm": 1.3837798833847046, "learning_rate": 2e-05, "loss": 0.0720195, "step": 12056 }, { "epoch": 24.114, "grad_norm": 1.2092723846435547, "learning_rate": 2e-05, "loss": 0.06160586, "step": 12057 }, { "epoch": 24.116, "grad_norm": 1.3201096057891846, "learning_rate": 2e-05, "loss": 0.05828308, "step": 12058 }, { "epoch": 24.118, "grad_norm": 2.313624382019043, "learning_rate": 2e-05, "loss": 0.0592609, "step": 12059 }, { "epoch": 24.12, "grad_norm": 1.4943830966949463, "learning_rate": 2e-05, "loss": 0.06300571, "step": 12060 }, { "epoch": 24.122, "grad_norm": 1.4670895338058472, "learning_rate": 2e-05, "loss": 0.05162621, "step": 12061 }, { "epoch": 24.124, "grad_norm": 2.7152469158172607, "learning_rate": 2e-05, "loss": 0.06348031, "step": 12062 }, { "epoch": 24.126, "grad_norm": 2.317094087600708, "learning_rate": 2e-05, "loss": 0.05741784, "step": 12063 }, { "epoch": 24.128, "grad_norm": 1.0244359970092773, "learning_rate": 2e-05, "loss": 0.04701255, "step": 12064 }, { "epoch": 24.13, "grad_norm": 1.3237030506134033, "learning_rate": 2e-05, "loss": 0.05226139, "step": 12065 }, { "epoch": 24.132, "grad_norm": 1.2171692848205566, "learning_rate": 2e-05, "loss": 0.04815545, "step": 12066 }, { "epoch": 24.134, "grad_norm": 1.1863187551498413, "learning_rate": 2e-05, "loss": 0.05712821, "step": 12067 }, { "epoch": 24.136, "grad_norm": 1.1170647144317627, "learning_rate": 2e-05, "loss": 0.0410592, "step": 12068 }, { "epoch": 24.138, "grad_norm": 1.1774200201034546, "learning_rate": 2e-05, "loss": 0.05794724, "step": 12069 }, { "epoch": 24.14, "grad_norm": 1.204297661781311, "learning_rate": 2e-05, "loss": 0.06121973, "step": 12070 }, { "epoch": 24.142, "grad_norm": 1.376940131187439, "learning_rate": 2e-05, "loss": 0.05056715, "step": 12071 }, { "epoch": 24.144, "grad_norm": 0.9747425317764282, "learning_rate": 2e-05, "loss": 0.03461857, "step": 12072 }, { "epoch": 24.146, "grad_norm": 1.163657784461975, "learning_rate": 2e-05, "loss": 0.03686737, "step": 12073 }, { "epoch": 24.148, "grad_norm": 1.0939643383026123, "learning_rate": 2e-05, "loss": 0.03931914, "step": 12074 }, { "epoch": 24.15, "grad_norm": 1.071172833442688, "learning_rate": 2e-05, "loss": 0.03818498, "step": 12075 }, { "epoch": 24.152, "grad_norm": 1.2956905364990234, "learning_rate": 2e-05, "loss": 0.04989437, "step": 12076 }, { "epoch": 24.154, "grad_norm": 1.5849953889846802, "learning_rate": 2e-05, "loss": 0.04744985, "step": 12077 }, { "epoch": 24.156, "grad_norm": 1.9284089803695679, "learning_rate": 2e-05, "loss": 0.06131367, "step": 12078 }, { "epoch": 24.158, "grad_norm": 1.091200351715088, "learning_rate": 2e-05, "loss": 0.04417513, "step": 12079 }, { "epoch": 24.16, "grad_norm": 3.763362407684326, "learning_rate": 2e-05, "loss": 0.06018006, "step": 12080 }, { "epoch": 24.162, "grad_norm": 1.2813291549682617, "learning_rate": 2e-05, "loss": 0.04047482, "step": 12081 }, { "epoch": 24.164, "grad_norm": 1.4561235904693604, "learning_rate": 2e-05, "loss": 0.05000281, "step": 12082 }, { "epoch": 24.166, "grad_norm": 2.2342355251312256, "learning_rate": 2e-05, "loss": 0.05439704, "step": 12083 }, { "epoch": 24.168, "grad_norm": 1.005120873451233, "learning_rate": 2e-05, "loss": 0.04398833, "step": 12084 }, { "epoch": 24.17, "grad_norm": 1.3008320331573486, "learning_rate": 2e-05, "loss": 0.04179512, "step": 12085 }, { "epoch": 24.172, "grad_norm": 0.900972843170166, "learning_rate": 2e-05, "loss": 0.03352999, "step": 12086 }, { "epoch": 24.174, "grad_norm": 1.152077555656433, "learning_rate": 2e-05, "loss": 0.0404734, "step": 12087 }, { "epoch": 24.176, "grad_norm": 0.9443712830543518, "learning_rate": 2e-05, "loss": 0.03086133, "step": 12088 }, { "epoch": 24.178, "grad_norm": 0.9387164115905762, "learning_rate": 2e-05, "loss": 0.04161924, "step": 12089 }, { "epoch": 24.18, "grad_norm": 1.2218292951583862, "learning_rate": 2e-05, "loss": 0.04807502, "step": 12090 }, { "epoch": 24.182, "grad_norm": 1.830248236656189, "learning_rate": 2e-05, "loss": 0.05023896, "step": 12091 }, { "epoch": 24.184, "grad_norm": 1.4031398296356201, "learning_rate": 2e-05, "loss": 0.05108761, "step": 12092 }, { "epoch": 24.186, "grad_norm": 1.2805626392364502, "learning_rate": 2e-05, "loss": 0.05273551, "step": 12093 }, { "epoch": 24.188, "grad_norm": 1.0230255126953125, "learning_rate": 2e-05, "loss": 0.03723209, "step": 12094 }, { "epoch": 24.19, "grad_norm": 1.391746163368225, "learning_rate": 2e-05, "loss": 0.03793499, "step": 12095 }, { "epoch": 24.192, "grad_norm": 1.0989409685134888, "learning_rate": 2e-05, "loss": 0.0528475, "step": 12096 }, { "epoch": 24.194, "grad_norm": 1.5381815433502197, "learning_rate": 2e-05, "loss": 0.0483861, "step": 12097 }, { "epoch": 24.196, "grad_norm": 1.3061126470565796, "learning_rate": 2e-05, "loss": 0.04735707, "step": 12098 }, { "epoch": 24.198, "grad_norm": 1.0894309282302856, "learning_rate": 2e-05, "loss": 0.05208131, "step": 12099 }, { "epoch": 24.2, "grad_norm": 1.3014709949493408, "learning_rate": 2e-05, "loss": 0.04770598, "step": 12100 }, { "epoch": 24.202, "grad_norm": 1.0045878887176514, "learning_rate": 2e-05, "loss": 0.04024687, "step": 12101 }, { "epoch": 24.204, "grad_norm": 1.2252531051635742, "learning_rate": 2e-05, "loss": 0.04864157, "step": 12102 }, { "epoch": 24.206, "grad_norm": 1.1308965682983398, "learning_rate": 2e-05, "loss": 0.03852432, "step": 12103 }, { "epoch": 24.208, "grad_norm": 1.4248325824737549, "learning_rate": 2e-05, "loss": 0.05086708, "step": 12104 }, { "epoch": 24.21, "grad_norm": 2.705193519592285, "learning_rate": 2e-05, "loss": 0.0596527, "step": 12105 }, { "epoch": 24.212, "grad_norm": 1.5626585483551025, "learning_rate": 2e-05, "loss": 0.06278218, "step": 12106 }, { "epoch": 24.214, "grad_norm": 1.2125219106674194, "learning_rate": 2e-05, "loss": 0.05346653, "step": 12107 }, { "epoch": 24.216, "grad_norm": 1.2232730388641357, "learning_rate": 2e-05, "loss": 0.03839267, "step": 12108 }, { "epoch": 24.218, "grad_norm": 1.9111217260360718, "learning_rate": 2e-05, "loss": 0.05157979, "step": 12109 }, { "epoch": 24.22, "grad_norm": 1.2244060039520264, "learning_rate": 2e-05, "loss": 0.05252042, "step": 12110 }, { "epoch": 24.222, "grad_norm": 2.1850805282592773, "learning_rate": 2e-05, "loss": 0.03560099, "step": 12111 }, { "epoch": 24.224, "grad_norm": 1.9973645210266113, "learning_rate": 2e-05, "loss": 0.05020345, "step": 12112 }, { "epoch": 24.226, "grad_norm": 1.1204603910446167, "learning_rate": 2e-05, "loss": 0.04161956, "step": 12113 }, { "epoch": 24.228, "grad_norm": 4.099585056304932, "learning_rate": 2e-05, "loss": 0.04665416, "step": 12114 }, { "epoch": 24.23, "grad_norm": 0.9738839864730835, "learning_rate": 2e-05, "loss": 0.02821814, "step": 12115 }, { "epoch": 24.232, "grad_norm": 1.9362106323242188, "learning_rate": 2e-05, "loss": 0.04581959, "step": 12116 }, { "epoch": 24.234, "grad_norm": 1.1132409572601318, "learning_rate": 2e-05, "loss": 0.03754843, "step": 12117 }, { "epoch": 24.236, "grad_norm": 1.4642243385314941, "learning_rate": 2e-05, "loss": 0.05159188, "step": 12118 }, { "epoch": 24.238, "grad_norm": 1.1961348056793213, "learning_rate": 2e-05, "loss": 0.05810984, "step": 12119 }, { "epoch": 24.24, "grad_norm": 1.1930533647537231, "learning_rate": 2e-05, "loss": 0.04192116, "step": 12120 }, { "epoch": 24.242, "grad_norm": 1.009221076965332, "learning_rate": 2e-05, "loss": 0.0402488, "step": 12121 }, { "epoch": 24.244, "grad_norm": 1.2493488788604736, "learning_rate": 2e-05, "loss": 0.04511616, "step": 12122 }, { "epoch": 24.246, "grad_norm": 1.7660053968429565, "learning_rate": 2e-05, "loss": 0.06709216, "step": 12123 }, { "epoch": 24.248, "grad_norm": 0.8922458291053772, "learning_rate": 2e-05, "loss": 0.03310864, "step": 12124 }, { "epoch": 24.25, "grad_norm": 1.9102340936660767, "learning_rate": 2e-05, "loss": 0.04356401, "step": 12125 }, { "epoch": 24.252, "grad_norm": 1.4493619203567505, "learning_rate": 2e-05, "loss": 0.05649861, "step": 12126 }, { "epoch": 24.254, "grad_norm": 1.3809674978256226, "learning_rate": 2e-05, "loss": 0.06866549, "step": 12127 }, { "epoch": 24.256, "grad_norm": 1.2042522430419922, "learning_rate": 2e-05, "loss": 0.0554885, "step": 12128 }, { "epoch": 24.258, "grad_norm": 1.8319530487060547, "learning_rate": 2e-05, "loss": 0.05050048, "step": 12129 }, { "epoch": 24.26, "grad_norm": 2.0881710052490234, "learning_rate": 2e-05, "loss": 0.06359955, "step": 12130 }, { "epoch": 24.262, "grad_norm": 1.1970436573028564, "learning_rate": 2e-05, "loss": 0.04791938, "step": 12131 }, { "epoch": 24.264, "grad_norm": 1.1830439567565918, "learning_rate": 2e-05, "loss": 0.06226282, "step": 12132 }, { "epoch": 24.266, "grad_norm": 1.6287788152694702, "learning_rate": 2e-05, "loss": 0.06041766, "step": 12133 }, { "epoch": 24.268, "grad_norm": 1.9986791610717773, "learning_rate": 2e-05, "loss": 0.04296382, "step": 12134 }, { "epoch": 24.27, "grad_norm": 1.163145661354065, "learning_rate": 2e-05, "loss": 0.05023701, "step": 12135 }, { "epoch": 24.272, "grad_norm": 1.0789096355438232, "learning_rate": 2e-05, "loss": 0.04329016, "step": 12136 }, { "epoch": 24.274, "grad_norm": 1.6199994087219238, "learning_rate": 2e-05, "loss": 0.04534511, "step": 12137 }, { "epoch": 24.276, "grad_norm": 1.2608959674835205, "learning_rate": 2e-05, "loss": 0.05191808, "step": 12138 }, { "epoch": 24.278, "grad_norm": 0.914844810962677, "learning_rate": 2e-05, "loss": 0.03914467, "step": 12139 }, { "epoch": 24.28, "grad_norm": 1.293208360671997, "learning_rate": 2e-05, "loss": 0.05589554, "step": 12140 }, { "epoch": 24.282, "grad_norm": 1.0767313241958618, "learning_rate": 2e-05, "loss": 0.04703413, "step": 12141 }, { "epoch": 24.284, "grad_norm": 2.198896646499634, "learning_rate": 2e-05, "loss": 0.05974286, "step": 12142 }, { "epoch": 24.286, "grad_norm": 1.4246108531951904, "learning_rate": 2e-05, "loss": 0.04786093, "step": 12143 }, { "epoch": 24.288, "grad_norm": 1.0523242950439453, "learning_rate": 2e-05, "loss": 0.04882574, "step": 12144 }, { "epoch": 24.29, "grad_norm": 1.1754931211471558, "learning_rate": 2e-05, "loss": 0.06298013, "step": 12145 }, { "epoch": 24.292, "grad_norm": 0.8617599010467529, "learning_rate": 2e-05, "loss": 0.03025228, "step": 12146 }, { "epoch": 24.294, "grad_norm": 4.488042831420898, "learning_rate": 2e-05, "loss": 0.0531662, "step": 12147 }, { "epoch": 24.296, "grad_norm": 1.1208933591842651, "learning_rate": 2e-05, "loss": 0.04774851, "step": 12148 }, { "epoch": 24.298, "grad_norm": 0.9299934506416321, "learning_rate": 2e-05, "loss": 0.04053638, "step": 12149 }, { "epoch": 24.3, "grad_norm": 1.4511997699737549, "learning_rate": 2e-05, "loss": 0.07936293, "step": 12150 }, { "epoch": 24.302, "grad_norm": 1.0642434358596802, "learning_rate": 2e-05, "loss": 0.04380853, "step": 12151 }, { "epoch": 24.304, "grad_norm": 1.2939056158065796, "learning_rate": 2e-05, "loss": 0.04103017, "step": 12152 }, { "epoch": 24.306, "grad_norm": 1.013724446296692, "learning_rate": 2e-05, "loss": 0.0356735, "step": 12153 }, { "epoch": 24.308, "grad_norm": 1.3339943885803223, "learning_rate": 2e-05, "loss": 0.0519697, "step": 12154 }, { "epoch": 24.31, "grad_norm": 0.941684365272522, "learning_rate": 2e-05, "loss": 0.03232648, "step": 12155 }, { "epoch": 24.312, "grad_norm": 1.1286405324935913, "learning_rate": 2e-05, "loss": 0.0433647, "step": 12156 }, { "epoch": 24.314, "grad_norm": 1.3552032709121704, "learning_rate": 2e-05, "loss": 0.05081817, "step": 12157 }, { "epoch": 24.316, "grad_norm": 1.0344088077545166, "learning_rate": 2e-05, "loss": 0.04744478, "step": 12158 }, { "epoch": 24.318, "grad_norm": 2.1381747722625732, "learning_rate": 2e-05, "loss": 0.04312861, "step": 12159 }, { "epoch": 24.32, "grad_norm": 2.602787494659424, "learning_rate": 2e-05, "loss": 0.07970935, "step": 12160 }, { "epoch": 24.322, "grad_norm": 1.0366038084030151, "learning_rate": 2e-05, "loss": 0.04986034, "step": 12161 }, { "epoch": 24.324, "grad_norm": 1.4917079210281372, "learning_rate": 2e-05, "loss": 0.04634131, "step": 12162 }, { "epoch": 24.326, "grad_norm": 1.2529760599136353, "learning_rate": 2e-05, "loss": 0.05745469, "step": 12163 }, { "epoch": 24.328, "grad_norm": 1.0923923254013062, "learning_rate": 2e-05, "loss": 0.04401264, "step": 12164 }, { "epoch": 24.33, "grad_norm": 2.013078212738037, "learning_rate": 2e-05, "loss": 0.05565473, "step": 12165 }, { "epoch": 24.332, "grad_norm": 1.2217144966125488, "learning_rate": 2e-05, "loss": 0.05610384, "step": 12166 }, { "epoch": 24.334, "grad_norm": 1.556965708732605, "learning_rate": 2e-05, "loss": 0.06565887, "step": 12167 }, { "epoch": 24.336, "grad_norm": 1.085266351699829, "learning_rate": 2e-05, "loss": 0.0511006, "step": 12168 }, { "epoch": 24.338, "grad_norm": 1.3602137565612793, "learning_rate": 2e-05, "loss": 0.05315398, "step": 12169 }, { "epoch": 24.34, "grad_norm": 1.9204418659210205, "learning_rate": 2e-05, "loss": 0.04753064, "step": 12170 }, { "epoch": 24.342, "grad_norm": 1.4229412078857422, "learning_rate": 2e-05, "loss": 0.05819522, "step": 12171 }, { "epoch": 24.344, "grad_norm": 1.1885809898376465, "learning_rate": 2e-05, "loss": 0.05267366, "step": 12172 }, { "epoch": 24.346, "grad_norm": 0.9655250310897827, "learning_rate": 2e-05, "loss": 0.04286445, "step": 12173 }, { "epoch": 24.348, "grad_norm": 1.0591481924057007, "learning_rate": 2e-05, "loss": 0.04515869, "step": 12174 }, { "epoch": 24.35, "grad_norm": 2.049103260040283, "learning_rate": 2e-05, "loss": 0.05815766, "step": 12175 }, { "epoch": 24.352, "grad_norm": 1.1137381792068481, "learning_rate": 2e-05, "loss": 0.05041601, "step": 12176 }, { "epoch": 24.354, "grad_norm": 1.657254934310913, "learning_rate": 2e-05, "loss": 0.05933203, "step": 12177 }, { "epoch": 24.356, "grad_norm": 0.8261805772781372, "learning_rate": 2e-05, "loss": 0.02725035, "step": 12178 }, { "epoch": 24.358, "grad_norm": 1.9413121938705444, "learning_rate": 2e-05, "loss": 0.03947655, "step": 12179 }, { "epoch": 24.36, "grad_norm": 1.2028558254241943, "learning_rate": 2e-05, "loss": 0.05533362, "step": 12180 }, { "epoch": 24.362, "grad_norm": 1.6621137857437134, "learning_rate": 2e-05, "loss": 0.05744664, "step": 12181 }, { "epoch": 24.364, "grad_norm": 1.97377610206604, "learning_rate": 2e-05, "loss": 0.07104184, "step": 12182 }, { "epoch": 24.366, "grad_norm": 2.1606006622314453, "learning_rate": 2e-05, "loss": 0.05294089, "step": 12183 }, { "epoch": 24.368, "grad_norm": 1.105574369430542, "learning_rate": 2e-05, "loss": 0.03760613, "step": 12184 }, { "epoch": 24.37, "grad_norm": 1.1629221439361572, "learning_rate": 2e-05, "loss": 0.04949073, "step": 12185 }, { "epoch": 24.372, "grad_norm": 1.0816835165023804, "learning_rate": 2e-05, "loss": 0.04821149, "step": 12186 }, { "epoch": 24.374, "grad_norm": 0.9681726694107056, "learning_rate": 2e-05, "loss": 0.04430058, "step": 12187 }, { "epoch": 24.376, "grad_norm": 1.433209776878357, "learning_rate": 2e-05, "loss": 0.05545279, "step": 12188 }, { "epoch": 24.378, "grad_norm": 1.637924313545227, "learning_rate": 2e-05, "loss": 0.07548504, "step": 12189 }, { "epoch": 24.38, "grad_norm": 1.0408587455749512, "learning_rate": 2e-05, "loss": 0.04173033, "step": 12190 }, { "epoch": 24.382, "grad_norm": 1.094917893409729, "learning_rate": 2e-05, "loss": 0.04946116, "step": 12191 }, { "epoch": 24.384, "grad_norm": 1.8451581001281738, "learning_rate": 2e-05, "loss": 0.04983567, "step": 12192 }, { "epoch": 24.386, "grad_norm": 1.2243115901947021, "learning_rate": 2e-05, "loss": 0.06636763, "step": 12193 }, { "epoch": 24.388, "grad_norm": 1.1864264011383057, "learning_rate": 2e-05, "loss": 0.05584348, "step": 12194 }, { "epoch": 24.39, "grad_norm": 1.2255829572677612, "learning_rate": 2e-05, "loss": 0.0708019, "step": 12195 }, { "epoch": 24.392, "grad_norm": 1.3492624759674072, "learning_rate": 2e-05, "loss": 0.04289038, "step": 12196 }, { "epoch": 24.394, "grad_norm": 1.1682665348052979, "learning_rate": 2e-05, "loss": 0.06048699, "step": 12197 }, { "epoch": 24.396, "grad_norm": 1.0311429500579834, "learning_rate": 2e-05, "loss": 0.04498905, "step": 12198 }, { "epoch": 24.398, "grad_norm": 1.0838291645050049, "learning_rate": 2e-05, "loss": 0.04585701, "step": 12199 }, { "epoch": 24.4, "grad_norm": 0.8638426661491394, "learning_rate": 2e-05, "loss": 0.03725105, "step": 12200 }, { "epoch": 24.402, "grad_norm": 1.7420493364334106, "learning_rate": 2e-05, "loss": 0.04112425, "step": 12201 }, { "epoch": 24.404, "grad_norm": 1.1646506786346436, "learning_rate": 2e-05, "loss": 0.04923238, "step": 12202 }, { "epoch": 24.406, "grad_norm": 1.7800471782684326, "learning_rate": 2e-05, "loss": 0.06838359, "step": 12203 }, { "epoch": 24.408, "grad_norm": 1.0820610523223877, "learning_rate": 2e-05, "loss": 0.0472196, "step": 12204 }, { "epoch": 24.41, "grad_norm": 5.401629447937012, "learning_rate": 2e-05, "loss": 0.05026706, "step": 12205 }, { "epoch": 24.412, "grad_norm": 1.5610214471817017, "learning_rate": 2e-05, "loss": 0.04884662, "step": 12206 }, { "epoch": 24.414, "grad_norm": 1.678418517112732, "learning_rate": 2e-05, "loss": 0.04944184, "step": 12207 }, { "epoch": 24.416, "grad_norm": 1.3728737831115723, "learning_rate": 2e-05, "loss": 0.04036933, "step": 12208 }, { "epoch": 24.418, "grad_norm": 1.1438133716583252, "learning_rate": 2e-05, "loss": 0.04945025, "step": 12209 }, { "epoch": 24.42, "grad_norm": 0.9688357710838318, "learning_rate": 2e-05, "loss": 0.04103271, "step": 12210 }, { "epoch": 24.422, "grad_norm": 1.1126015186309814, "learning_rate": 2e-05, "loss": 0.04952687, "step": 12211 }, { "epoch": 24.424, "grad_norm": 1.4256362915039062, "learning_rate": 2e-05, "loss": 0.05656151, "step": 12212 }, { "epoch": 24.426, "grad_norm": 1.4567201137542725, "learning_rate": 2e-05, "loss": 0.05023623, "step": 12213 }, { "epoch": 24.428, "grad_norm": 1.9243433475494385, "learning_rate": 2e-05, "loss": 0.07356782, "step": 12214 }, { "epoch": 24.43, "grad_norm": 1.462647795677185, "learning_rate": 2e-05, "loss": 0.04474293, "step": 12215 }, { "epoch": 24.432, "grad_norm": 1.5156383514404297, "learning_rate": 2e-05, "loss": 0.04119217, "step": 12216 }, { "epoch": 24.434, "grad_norm": 1.3535808324813843, "learning_rate": 2e-05, "loss": 0.04600451, "step": 12217 }, { "epoch": 24.436, "grad_norm": 1.126853585243225, "learning_rate": 2e-05, "loss": 0.04988167, "step": 12218 }, { "epoch": 24.438, "grad_norm": 1.1450812816619873, "learning_rate": 2e-05, "loss": 0.04975093, "step": 12219 }, { "epoch": 24.44, "grad_norm": 1.071311593055725, "learning_rate": 2e-05, "loss": 0.04609923, "step": 12220 }, { "epoch": 24.442, "grad_norm": 1.2534888982772827, "learning_rate": 2e-05, "loss": 0.05959511, "step": 12221 }, { "epoch": 24.444, "grad_norm": 1.0460691452026367, "learning_rate": 2e-05, "loss": 0.04360256, "step": 12222 }, { "epoch": 24.446, "grad_norm": 2.1317524909973145, "learning_rate": 2e-05, "loss": 0.09364918, "step": 12223 }, { "epoch": 24.448, "grad_norm": 1.3024861812591553, "learning_rate": 2e-05, "loss": 0.05690356, "step": 12224 }, { "epoch": 24.45, "grad_norm": 1.151909589767456, "learning_rate": 2e-05, "loss": 0.04873949, "step": 12225 }, { "epoch": 24.452, "grad_norm": 0.8761228322982788, "learning_rate": 2e-05, "loss": 0.03394051, "step": 12226 }, { "epoch": 24.454, "grad_norm": 1.811702847480774, "learning_rate": 2e-05, "loss": 0.06403732, "step": 12227 }, { "epoch": 24.456, "grad_norm": 1.3536931276321411, "learning_rate": 2e-05, "loss": 0.0501513, "step": 12228 }, { "epoch": 24.458, "grad_norm": 1.2223119735717773, "learning_rate": 2e-05, "loss": 0.04542353, "step": 12229 }, { "epoch": 24.46, "grad_norm": 1.397172212600708, "learning_rate": 2e-05, "loss": 0.05849022, "step": 12230 }, { "epoch": 24.462, "grad_norm": 1.7770304679870605, "learning_rate": 2e-05, "loss": 0.06217421, "step": 12231 }, { "epoch": 24.464, "grad_norm": 1.2999236583709717, "learning_rate": 2e-05, "loss": 0.05940741, "step": 12232 }, { "epoch": 24.466, "grad_norm": 2.1342666149139404, "learning_rate": 2e-05, "loss": 0.06249571, "step": 12233 }, { "epoch": 24.468, "grad_norm": 1.2048630714416504, "learning_rate": 2e-05, "loss": 0.06207722, "step": 12234 }, { "epoch": 24.47, "grad_norm": 1.5658307075500488, "learning_rate": 2e-05, "loss": 0.0512342, "step": 12235 }, { "epoch": 24.472, "grad_norm": 1.2574323415756226, "learning_rate": 2e-05, "loss": 0.05711411, "step": 12236 }, { "epoch": 24.474, "grad_norm": 1.1493301391601562, "learning_rate": 2e-05, "loss": 0.04763763, "step": 12237 }, { "epoch": 24.476, "grad_norm": 1.3291813135147095, "learning_rate": 2e-05, "loss": 0.05095512, "step": 12238 }, { "epoch": 24.478, "grad_norm": 0.9322985410690308, "learning_rate": 2e-05, "loss": 0.03574435, "step": 12239 }, { "epoch": 24.48, "grad_norm": 1.4179059267044067, "learning_rate": 2e-05, "loss": 0.05599285, "step": 12240 }, { "epoch": 24.482, "grad_norm": 1.6319115161895752, "learning_rate": 2e-05, "loss": 0.05100554, "step": 12241 }, { "epoch": 24.484, "grad_norm": 1.005834698677063, "learning_rate": 2e-05, "loss": 0.0409047, "step": 12242 }, { "epoch": 24.486, "grad_norm": 1.0510324239730835, "learning_rate": 2e-05, "loss": 0.04888344, "step": 12243 }, { "epoch": 24.488, "grad_norm": 1.438249945640564, "learning_rate": 2e-05, "loss": 0.038957, "step": 12244 }, { "epoch": 24.49, "grad_norm": 1.8468129634857178, "learning_rate": 2e-05, "loss": 0.0606167, "step": 12245 }, { "epoch": 24.492, "grad_norm": 1.6932883262634277, "learning_rate": 2e-05, "loss": 0.05723818, "step": 12246 }, { "epoch": 24.494, "grad_norm": 1.70160710811615, "learning_rate": 2e-05, "loss": 0.05374432, "step": 12247 }, { "epoch": 24.496, "grad_norm": 1.2021385431289673, "learning_rate": 2e-05, "loss": 0.06148978, "step": 12248 }, { "epoch": 24.498, "grad_norm": 1.2681556940078735, "learning_rate": 2e-05, "loss": 0.06340459, "step": 12249 }, { "epoch": 24.5, "grad_norm": 1.2100648880004883, "learning_rate": 2e-05, "loss": 0.05049655, "step": 12250 }, { "epoch": 24.502, "grad_norm": 1.3253339529037476, "learning_rate": 2e-05, "loss": 0.04663317, "step": 12251 }, { "epoch": 24.504, "grad_norm": 1.1344722509384155, "learning_rate": 2e-05, "loss": 0.05735528, "step": 12252 }, { "epoch": 24.506, "grad_norm": 1.9279087781906128, "learning_rate": 2e-05, "loss": 0.06566287, "step": 12253 }, { "epoch": 24.508, "grad_norm": 1.401141881942749, "learning_rate": 2e-05, "loss": 0.06435329, "step": 12254 }, { "epoch": 24.51, "grad_norm": 1.21737539768219, "learning_rate": 2e-05, "loss": 0.04078498, "step": 12255 }, { "epoch": 24.512, "grad_norm": 1.451131820678711, "learning_rate": 2e-05, "loss": 0.03123941, "step": 12256 }, { "epoch": 24.514, "grad_norm": 1.2081773281097412, "learning_rate": 2e-05, "loss": 0.06357303, "step": 12257 }, { "epoch": 24.516, "grad_norm": 1.21334707736969, "learning_rate": 2e-05, "loss": 0.06550644, "step": 12258 }, { "epoch": 24.518, "grad_norm": 1.9683334827423096, "learning_rate": 2e-05, "loss": 0.04505812, "step": 12259 }, { "epoch": 24.52, "grad_norm": 1.3783949613571167, "learning_rate": 2e-05, "loss": 0.06288993, "step": 12260 }, { "epoch": 24.522, "grad_norm": 2.832237720489502, "learning_rate": 2e-05, "loss": 0.06941493, "step": 12261 }, { "epoch": 24.524, "grad_norm": 1.2820866107940674, "learning_rate": 2e-05, "loss": 0.03696114, "step": 12262 }, { "epoch": 24.526, "grad_norm": 1.0169528722763062, "learning_rate": 2e-05, "loss": 0.04451901, "step": 12263 }, { "epoch": 24.528, "grad_norm": 1.097316861152649, "learning_rate": 2e-05, "loss": 0.04056693, "step": 12264 }, { "epoch": 24.53, "grad_norm": 1.0516437292099, "learning_rate": 2e-05, "loss": 0.04753781, "step": 12265 }, { "epoch": 24.532, "grad_norm": 1.1702829599380493, "learning_rate": 2e-05, "loss": 0.05237371, "step": 12266 }, { "epoch": 24.534, "grad_norm": 1.1737743616104126, "learning_rate": 2e-05, "loss": 0.04562186, "step": 12267 }, { "epoch": 24.536, "grad_norm": 0.985040009021759, "learning_rate": 2e-05, "loss": 0.04355561, "step": 12268 }, { "epoch": 24.538, "grad_norm": 1.1432610750198364, "learning_rate": 2e-05, "loss": 0.06352413, "step": 12269 }, { "epoch": 24.54, "grad_norm": 1.6031666994094849, "learning_rate": 2e-05, "loss": 0.03673107, "step": 12270 }, { "epoch": 24.542, "grad_norm": 1.3181161880493164, "learning_rate": 2e-05, "loss": 0.03340524, "step": 12271 }, { "epoch": 24.544, "grad_norm": 1.185526728630066, "learning_rate": 2e-05, "loss": 0.04520765, "step": 12272 }, { "epoch": 24.546, "grad_norm": 1.267002820968628, "learning_rate": 2e-05, "loss": 0.04475286, "step": 12273 }, { "epoch": 24.548000000000002, "grad_norm": 0.9980287551879883, "learning_rate": 2e-05, "loss": 0.04288146, "step": 12274 }, { "epoch": 24.55, "grad_norm": 1.5016635656356812, "learning_rate": 2e-05, "loss": 0.0426567, "step": 12275 }, { "epoch": 24.552, "grad_norm": 1.386328935623169, "learning_rate": 2e-05, "loss": 0.04402653, "step": 12276 }, { "epoch": 24.554, "grad_norm": 1.2957773208618164, "learning_rate": 2e-05, "loss": 0.05877434, "step": 12277 }, { "epoch": 24.556, "grad_norm": 1.3677865266799927, "learning_rate": 2e-05, "loss": 0.05044467, "step": 12278 }, { "epoch": 24.558, "grad_norm": 1.276932716369629, "learning_rate": 2e-05, "loss": 0.04479785, "step": 12279 }, { "epoch": 24.56, "grad_norm": 0.9797831773757935, "learning_rate": 2e-05, "loss": 0.0430446, "step": 12280 }, { "epoch": 24.562, "grad_norm": 2.150264263153076, "learning_rate": 2e-05, "loss": 0.07180483, "step": 12281 }, { "epoch": 24.564, "grad_norm": 1.2323116064071655, "learning_rate": 2e-05, "loss": 0.04297125, "step": 12282 }, { "epoch": 24.566, "grad_norm": 1.7884502410888672, "learning_rate": 2e-05, "loss": 0.04419117, "step": 12283 }, { "epoch": 24.568, "grad_norm": 1.1908988952636719, "learning_rate": 2e-05, "loss": 0.04578154, "step": 12284 }, { "epoch": 24.57, "grad_norm": 1.654547929763794, "learning_rate": 2e-05, "loss": 0.03765393, "step": 12285 }, { "epoch": 24.572, "grad_norm": 1.270766019821167, "learning_rate": 2e-05, "loss": 0.03549252, "step": 12286 }, { "epoch": 24.574, "grad_norm": 1.8873577117919922, "learning_rate": 2e-05, "loss": 0.04547989, "step": 12287 }, { "epoch": 24.576, "grad_norm": 1.1582826375961304, "learning_rate": 2e-05, "loss": 0.04703826, "step": 12288 }, { "epoch": 24.578, "grad_norm": 0.9339529275894165, "learning_rate": 2e-05, "loss": 0.04434437, "step": 12289 }, { "epoch": 24.58, "grad_norm": 1.8878004550933838, "learning_rate": 2e-05, "loss": 0.06117485, "step": 12290 }, { "epoch": 24.582, "grad_norm": 1.5583950281143188, "learning_rate": 2e-05, "loss": 0.06009279, "step": 12291 }, { "epoch": 24.584, "grad_norm": 1.3457813262939453, "learning_rate": 2e-05, "loss": 0.06969124, "step": 12292 }, { "epoch": 24.586, "grad_norm": 1.1643043756484985, "learning_rate": 2e-05, "loss": 0.04943801, "step": 12293 }, { "epoch": 24.588, "grad_norm": 2.3413541316986084, "learning_rate": 2e-05, "loss": 0.03564565, "step": 12294 }, { "epoch": 24.59, "grad_norm": 1.4401907920837402, "learning_rate": 2e-05, "loss": 0.06643665, "step": 12295 }, { "epoch": 24.592, "grad_norm": 3.1254398822784424, "learning_rate": 2e-05, "loss": 0.04019414, "step": 12296 }, { "epoch": 24.594, "grad_norm": 1.023505449295044, "learning_rate": 2e-05, "loss": 0.04217444, "step": 12297 }, { "epoch": 24.596, "grad_norm": 1.1468936204910278, "learning_rate": 2e-05, "loss": 0.04574583, "step": 12298 }, { "epoch": 24.598, "grad_norm": 0.9270424246788025, "learning_rate": 2e-05, "loss": 0.03833769, "step": 12299 }, { "epoch": 24.6, "grad_norm": 1.286302924156189, "learning_rate": 2e-05, "loss": 0.06100383, "step": 12300 }, { "epoch": 24.602, "grad_norm": 1.4448425769805908, "learning_rate": 2e-05, "loss": 0.04179417, "step": 12301 }, { "epoch": 24.604, "grad_norm": 1.322953462600708, "learning_rate": 2e-05, "loss": 0.04749265, "step": 12302 }, { "epoch": 24.606, "grad_norm": 1.2852628231048584, "learning_rate": 2e-05, "loss": 0.0561444, "step": 12303 }, { "epoch": 24.608, "grad_norm": 1.7854129076004028, "learning_rate": 2e-05, "loss": 0.05308358, "step": 12304 }, { "epoch": 24.61, "grad_norm": 1.5740928649902344, "learning_rate": 2e-05, "loss": 0.05608488, "step": 12305 }, { "epoch": 24.612, "grad_norm": 1.8480031490325928, "learning_rate": 2e-05, "loss": 0.05560174, "step": 12306 }, { "epoch": 24.614, "grad_norm": 0.9975174069404602, "learning_rate": 2e-05, "loss": 0.04151163, "step": 12307 }, { "epoch": 24.616, "grad_norm": 1.436156153678894, "learning_rate": 2e-05, "loss": 0.05058396, "step": 12308 }, { "epoch": 24.618, "grad_norm": 1.4521929025650024, "learning_rate": 2e-05, "loss": 0.05367004, "step": 12309 }, { "epoch": 24.62, "grad_norm": 1.5099841356277466, "learning_rate": 2e-05, "loss": 0.05290164, "step": 12310 }, { "epoch": 24.622, "grad_norm": 1.0632920265197754, "learning_rate": 2e-05, "loss": 0.0528096, "step": 12311 }, { "epoch": 24.624, "grad_norm": 1.061439871788025, "learning_rate": 2e-05, "loss": 0.0419883, "step": 12312 }, { "epoch": 24.626, "grad_norm": 0.8904504179954529, "learning_rate": 2e-05, "loss": 0.03334689, "step": 12313 }, { "epoch": 24.628, "grad_norm": 1.1342662572860718, "learning_rate": 2e-05, "loss": 0.04276549, "step": 12314 }, { "epoch": 24.63, "grad_norm": 1.4148058891296387, "learning_rate": 2e-05, "loss": 0.04554226, "step": 12315 }, { "epoch": 24.632, "grad_norm": 2.209036350250244, "learning_rate": 2e-05, "loss": 0.03893118, "step": 12316 }, { "epoch": 24.634, "grad_norm": 1.072554588317871, "learning_rate": 2e-05, "loss": 0.04576019, "step": 12317 }, { "epoch": 24.636, "grad_norm": 1.4901772737503052, "learning_rate": 2e-05, "loss": 0.07669685, "step": 12318 }, { "epoch": 24.638, "grad_norm": 1.7460018396377563, "learning_rate": 2e-05, "loss": 0.03690163, "step": 12319 }, { "epoch": 24.64, "grad_norm": 1.181031584739685, "learning_rate": 2e-05, "loss": 0.05541149, "step": 12320 }, { "epoch": 24.642, "grad_norm": 1.28119695186615, "learning_rate": 2e-05, "loss": 0.05800177, "step": 12321 }, { "epoch": 24.644, "grad_norm": 2.0592458248138428, "learning_rate": 2e-05, "loss": 0.05538993, "step": 12322 }, { "epoch": 24.646, "grad_norm": 1.4591492414474487, "learning_rate": 2e-05, "loss": 0.05229129, "step": 12323 }, { "epoch": 24.648, "grad_norm": 1.3595259189605713, "learning_rate": 2e-05, "loss": 0.0482172, "step": 12324 }, { "epoch": 24.65, "grad_norm": 1.3629369735717773, "learning_rate": 2e-05, "loss": 0.05205503, "step": 12325 }, { "epoch": 24.652, "grad_norm": 1.3431694507598877, "learning_rate": 2e-05, "loss": 0.04965796, "step": 12326 }, { "epoch": 24.654, "grad_norm": 1.377056360244751, "learning_rate": 2e-05, "loss": 0.06131665, "step": 12327 }, { "epoch": 24.656, "grad_norm": 1.6115247011184692, "learning_rate": 2e-05, "loss": 0.07024746, "step": 12328 }, { "epoch": 24.658, "grad_norm": 1.1689660549163818, "learning_rate": 2e-05, "loss": 0.04358719, "step": 12329 }, { "epoch": 24.66, "grad_norm": 1.3219000101089478, "learning_rate": 2e-05, "loss": 0.06240706, "step": 12330 }, { "epoch": 24.662, "grad_norm": 1.5137040615081787, "learning_rate": 2e-05, "loss": 0.05935108, "step": 12331 }, { "epoch": 24.664, "grad_norm": 0.9346756935119629, "learning_rate": 2e-05, "loss": 0.04184858, "step": 12332 }, { "epoch": 24.666, "grad_norm": 1.5065889358520508, "learning_rate": 2e-05, "loss": 0.0574278, "step": 12333 }, { "epoch": 24.668, "grad_norm": 1.7345739603042603, "learning_rate": 2e-05, "loss": 0.06336014, "step": 12334 }, { "epoch": 24.67, "grad_norm": 1.5292943716049194, "learning_rate": 2e-05, "loss": 0.06692265, "step": 12335 }, { "epoch": 24.672, "grad_norm": 2.7317817211151123, "learning_rate": 2e-05, "loss": 0.05731232, "step": 12336 }, { "epoch": 24.674, "grad_norm": 1.572746753692627, "learning_rate": 2e-05, "loss": 0.06280471, "step": 12337 }, { "epoch": 24.676, "grad_norm": 1.0383738279342651, "learning_rate": 2e-05, "loss": 0.04587968, "step": 12338 }, { "epoch": 24.678, "grad_norm": 1.173978328704834, "learning_rate": 2e-05, "loss": 0.04547011, "step": 12339 }, { "epoch": 24.68, "grad_norm": 1.1547058820724487, "learning_rate": 2e-05, "loss": 0.05490729, "step": 12340 }, { "epoch": 24.682, "grad_norm": 1.0964388847351074, "learning_rate": 2e-05, "loss": 0.0413642, "step": 12341 }, { "epoch": 24.684, "grad_norm": 2.1355483531951904, "learning_rate": 2e-05, "loss": 0.05162902, "step": 12342 }, { "epoch": 24.686, "grad_norm": 1.7211798429489136, "learning_rate": 2e-05, "loss": 0.04734351, "step": 12343 }, { "epoch": 24.688, "grad_norm": 1.591632604598999, "learning_rate": 2e-05, "loss": 0.06387743, "step": 12344 }, { "epoch": 24.69, "grad_norm": 1.3127955198287964, "learning_rate": 2e-05, "loss": 0.05781527, "step": 12345 }, { "epoch": 24.692, "grad_norm": 1.4055417776107788, "learning_rate": 2e-05, "loss": 0.04625803, "step": 12346 }, { "epoch": 24.694, "grad_norm": 1.410332441329956, "learning_rate": 2e-05, "loss": 0.05247523, "step": 12347 }, { "epoch": 24.696, "grad_norm": 1.945993185043335, "learning_rate": 2e-05, "loss": 0.05659994, "step": 12348 }, { "epoch": 24.698, "grad_norm": 1.0669488906860352, "learning_rate": 2e-05, "loss": 0.0464219, "step": 12349 }, { "epoch": 24.7, "grad_norm": 2.0070691108703613, "learning_rate": 2e-05, "loss": 0.05359336, "step": 12350 }, { "epoch": 24.701999999999998, "grad_norm": 1.0812411308288574, "learning_rate": 2e-05, "loss": 0.03972121, "step": 12351 }, { "epoch": 24.704, "grad_norm": 1.1944670677185059, "learning_rate": 2e-05, "loss": 0.0463926, "step": 12352 }, { "epoch": 24.706, "grad_norm": 1.6038752794265747, "learning_rate": 2e-05, "loss": 0.05930416, "step": 12353 }, { "epoch": 24.708, "grad_norm": 2.150514602661133, "learning_rate": 2e-05, "loss": 0.06464927, "step": 12354 }, { "epoch": 24.71, "grad_norm": 1.3679577112197876, "learning_rate": 2e-05, "loss": 0.05414622, "step": 12355 }, { "epoch": 24.712, "grad_norm": 1.188789963722229, "learning_rate": 2e-05, "loss": 0.04423264, "step": 12356 }, { "epoch": 24.714, "grad_norm": 0.9374627470970154, "learning_rate": 2e-05, "loss": 0.03880904, "step": 12357 }, { "epoch": 24.716, "grad_norm": 1.4590775966644287, "learning_rate": 2e-05, "loss": 0.07228787, "step": 12358 }, { "epoch": 24.718, "grad_norm": 1.3924126625061035, "learning_rate": 2e-05, "loss": 0.05765446, "step": 12359 }, { "epoch": 24.72, "grad_norm": 1.1420016288757324, "learning_rate": 2e-05, "loss": 0.04609679, "step": 12360 }, { "epoch": 24.722, "grad_norm": 0.9809737205505371, "learning_rate": 2e-05, "loss": 0.04786153, "step": 12361 }, { "epoch": 24.724, "grad_norm": 1.2153939008712769, "learning_rate": 2e-05, "loss": 0.04521716, "step": 12362 }, { "epoch": 24.726, "grad_norm": 1.3689560890197754, "learning_rate": 2e-05, "loss": 0.05780536, "step": 12363 }, { "epoch": 24.728, "grad_norm": 1.690804362297058, "learning_rate": 2e-05, "loss": 0.05836462, "step": 12364 }, { "epoch": 24.73, "grad_norm": 2.1542282104492188, "learning_rate": 2e-05, "loss": 0.0547177, "step": 12365 }, { "epoch": 24.732, "grad_norm": 1.7756855487823486, "learning_rate": 2e-05, "loss": 0.06027528, "step": 12366 }, { "epoch": 24.734, "grad_norm": 1.140049934387207, "learning_rate": 2e-05, "loss": 0.05798963, "step": 12367 }, { "epoch": 24.736, "grad_norm": 1.0280715227127075, "learning_rate": 2e-05, "loss": 0.03869246, "step": 12368 }, { "epoch": 24.738, "grad_norm": 1.5488649606704712, "learning_rate": 2e-05, "loss": 0.07393647, "step": 12369 }, { "epoch": 24.74, "grad_norm": 0.9122893214225769, "learning_rate": 2e-05, "loss": 0.0288643, "step": 12370 }, { "epoch": 24.742, "grad_norm": 1.1025679111480713, "learning_rate": 2e-05, "loss": 0.05300673, "step": 12371 }, { "epoch": 24.744, "grad_norm": 1.2754881381988525, "learning_rate": 2e-05, "loss": 0.07259779, "step": 12372 }, { "epoch": 24.746, "grad_norm": 1.098259687423706, "learning_rate": 2e-05, "loss": 0.05312631, "step": 12373 }, { "epoch": 24.748, "grad_norm": 1.2409359216690063, "learning_rate": 2e-05, "loss": 0.03236099, "step": 12374 }, { "epoch": 24.75, "grad_norm": 3.2416577339172363, "learning_rate": 2e-05, "loss": 0.06670462, "step": 12375 }, { "epoch": 24.752, "grad_norm": 1.6567413806915283, "learning_rate": 2e-05, "loss": 0.04551793, "step": 12376 }, { "epoch": 24.754, "grad_norm": 0.9131588339805603, "learning_rate": 2e-05, "loss": 0.03293866, "step": 12377 }, { "epoch": 24.756, "grad_norm": 1.5766234397888184, "learning_rate": 2e-05, "loss": 0.05568074, "step": 12378 }, { "epoch": 24.758, "grad_norm": 1.035622239112854, "learning_rate": 2e-05, "loss": 0.05623966, "step": 12379 }, { "epoch": 24.76, "grad_norm": 1.342229962348938, "learning_rate": 2e-05, "loss": 0.07168756, "step": 12380 }, { "epoch": 24.762, "grad_norm": 0.9332346320152283, "learning_rate": 2e-05, "loss": 0.04246333, "step": 12381 }, { "epoch": 24.764, "grad_norm": 1.040676236152649, "learning_rate": 2e-05, "loss": 0.04132526, "step": 12382 }, { "epoch": 24.766, "grad_norm": 1.4659868478775024, "learning_rate": 2e-05, "loss": 0.05624117, "step": 12383 }, { "epoch": 24.768, "grad_norm": 2.316220283508301, "learning_rate": 2e-05, "loss": 0.04193213, "step": 12384 }, { "epoch": 24.77, "grad_norm": 1.2177510261535645, "learning_rate": 2e-05, "loss": 0.06743999, "step": 12385 }, { "epoch": 24.772, "grad_norm": 0.9551792740821838, "learning_rate": 2e-05, "loss": 0.03589004, "step": 12386 }, { "epoch": 24.774, "grad_norm": 1.0946069955825806, "learning_rate": 2e-05, "loss": 0.05895481, "step": 12387 }, { "epoch": 24.776, "grad_norm": 2.0356576442718506, "learning_rate": 2e-05, "loss": 0.07084911, "step": 12388 }, { "epoch": 24.778, "grad_norm": 2.1792867183685303, "learning_rate": 2e-05, "loss": 0.07902066, "step": 12389 }, { "epoch": 24.78, "grad_norm": 2.4311211109161377, "learning_rate": 2e-05, "loss": 0.04789238, "step": 12390 }, { "epoch": 24.782, "grad_norm": 0.9530346989631653, "learning_rate": 2e-05, "loss": 0.04101372, "step": 12391 }, { "epoch": 24.784, "grad_norm": 1.1075371503829956, "learning_rate": 2e-05, "loss": 0.04335552, "step": 12392 }, { "epoch": 24.786, "grad_norm": 1.2828515768051147, "learning_rate": 2e-05, "loss": 0.05172892, "step": 12393 }, { "epoch": 24.788, "grad_norm": 2.204916000366211, "learning_rate": 2e-05, "loss": 0.05955718, "step": 12394 }, { "epoch": 24.79, "grad_norm": 1.079278826713562, "learning_rate": 2e-05, "loss": 0.04416421, "step": 12395 }, { "epoch": 24.792, "grad_norm": 1.285398244857788, "learning_rate": 2e-05, "loss": 0.05010903, "step": 12396 }, { "epoch": 24.794, "grad_norm": 1.1867382526397705, "learning_rate": 2e-05, "loss": 0.05562855, "step": 12397 }, { "epoch": 24.796, "grad_norm": 2.70597243309021, "learning_rate": 2e-05, "loss": 0.06310703, "step": 12398 }, { "epoch": 24.798000000000002, "grad_norm": 1.1036860942840576, "learning_rate": 2e-05, "loss": 0.0495602, "step": 12399 }, { "epoch": 24.8, "grad_norm": 1.6513850688934326, "learning_rate": 2e-05, "loss": 0.05960392, "step": 12400 }, { "epoch": 24.802, "grad_norm": 1.1386228799819946, "learning_rate": 2e-05, "loss": 0.05625272, "step": 12401 }, { "epoch": 24.804, "grad_norm": 0.989041268825531, "learning_rate": 2e-05, "loss": 0.04103786, "step": 12402 }, { "epoch": 24.806, "grad_norm": 1.0465083122253418, "learning_rate": 2e-05, "loss": 0.0371464, "step": 12403 }, { "epoch": 24.808, "grad_norm": 5.335691928863525, "learning_rate": 2e-05, "loss": 0.06911503, "step": 12404 }, { "epoch": 24.81, "grad_norm": 0.9925184845924377, "learning_rate": 2e-05, "loss": 0.0345923, "step": 12405 }, { "epoch": 24.812, "grad_norm": 1.2212094068527222, "learning_rate": 2e-05, "loss": 0.04326582, "step": 12406 }, { "epoch": 24.814, "grad_norm": 1.647882103919983, "learning_rate": 2e-05, "loss": 0.03450688, "step": 12407 }, { "epoch": 24.816, "grad_norm": 1.4562323093414307, "learning_rate": 2e-05, "loss": 0.05225542, "step": 12408 }, { "epoch": 24.818, "grad_norm": 1.2025195360183716, "learning_rate": 2e-05, "loss": 0.03831311, "step": 12409 }, { "epoch": 24.82, "grad_norm": 1.2085515260696411, "learning_rate": 2e-05, "loss": 0.04726699, "step": 12410 }, { "epoch": 24.822, "grad_norm": 1.6415096521377563, "learning_rate": 2e-05, "loss": 0.05576058, "step": 12411 }, { "epoch": 24.824, "grad_norm": 1.547104835510254, "learning_rate": 2e-05, "loss": 0.06630287, "step": 12412 }, { "epoch": 24.826, "grad_norm": 1.276793360710144, "learning_rate": 2e-05, "loss": 0.05537467, "step": 12413 }, { "epoch": 24.828, "grad_norm": 1.3390082120895386, "learning_rate": 2e-05, "loss": 0.05545241, "step": 12414 }, { "epoch": 24.83, "grad_norm": 1.3239364624023438, "learning_rate": 2e-05, "loss": 0.04651525, "step": 12415 }, { "epoch": 24.832, "grad_norm": 1.753379225730896, "learning_rate": 2e-05, "loss": 0.05151422, "step": 12416 }, { "epoch": 24.834, "grad_norm": 1.0875189304351807, "learning_rate": 2e-05, "loss": 0.05908264, "step": 12417 }, { "epoch": 24.836, "grad_norm": 1.1352912187576294, "learning_rate": 2e-05, "loss": 0.04761673, "step": 12418 }, { "epoch": 24.838, "grad_norm": 1.066762924194336, "learning_rate": 2e-05, "loss": 0.04468984, "step": 12419 }, { "epoch": 24.84, "grad_norm": 1.2002276182174683, "learning_rate": 2e-05, "loss": 0.05177302, "step": 12420 }, { "epoch": 24.842, "grad_norm": 1.315173625946045, "learning_rate": 2e-05, "loss": 0.04798455, "step": 12421 }, { "epoch": 24.844, "grad_norm": 0.9851579666137695, "learning_rate": 2e-05, "loss": 0.0347637, "step": 12422 }, { "epoch": 24.846, "grad_norm": 0.9110061526298523, "learning_rate": 2e-05, "loss": 0.04828682, "step": 12423 }, { "epoch": 24.848, "grad_norm": 1.3550394773483276, "learning_rate": 2e-05, "loss": 0.05385119, "step": 12424 }, { "epoch": 24.85, "grad_norm": 4.069726943969727, "learning_rate": 2e-05, "loss": 0.05153431, "step": 12425 }, { "epoch": 24.852, "grad_norm": 0.9940489530563354, "learning_rate": 2e-05, "loss": 0.04242203, "step": 12426 }, { "epoch": 24.854, "grad_norm": 1.12723708152771, "learning_rate": 2e-05, "loss": 0.04468431, "step": 12427 }, { "epoch": 24.856, "grad_norm": 1.6120734214782715, "learning_rate": 2e-05, "loss": 0.04380012, "step": 12428 }, { "epoch": 24.858, "grad_norm": 1.3691272735595703, "learning_rate": 2e-05, "loss": 0.05955937, "step": 12429 }, { "epoch": 24.86, "grad_norm": 1.2361844778060913, "learning_rate": 2e-05, "loss": 0.05150126, "step": 12430 }, { "epoch": 24.862, "grad_norm": 2.706963539123535, "learning_rate": 2e-05, "loss": 0.04967477, "step": 12431 }, { "epoch": 24.864, "grad_norm": 0.9736936688423157, "learning_rate": 2e-05, "loss": 0.03772056, "step": 12432 }, { "epoch": 24.866, "grad_norm": 1.8979235887527466, "learning_rate": 2e-05, "loss": 0.07150412, "step": 12433 }, { "epoch": 24.868, "grad_norm": 1.4550870656967163, "learning_rate": 2e-05, "loss": 0.05269644, "step": 12434 }, { "epoch": 24.87, "grad_norm": 0.9215329885482788, "learning_rate": 2e-05, "loss": 0.03658146, "step": 12435 }, { "epoch": 24.872, "grad_norm": 1.3590787649154663, "learning_rate": 2e-05, "loss": 0.0567585, "step": 12436 }, { "epoch": 24.874, "grad_norm": 1.385019063949585, "learning_rate": 2e-05, "loss": 0.06552453, "step": 12437 }, { "epoch": 24.876, "grad_norm": 1.402735710144043, "learning_rate": 2e-05, "loss": 0.05374145, "step": 12438 }, { "epoch": 24.878, "grad_norm": 1.499819278717041, "learning_rate": 2e-05, "loss": 0.07054937, "step": 12439 }, { "epoch": 24.88, "grad_norm": 1.3510311841964722, "learning_rate": 2e-05, "loss": 0.04068061, "step": 12440 }, { "epoch": 24.882, "grad_norm": 0.967106282711029, "learning_rate": 2e-05, "loss": 0.03488389, "step": 12441 }, { "epoch": 24.884, "grad_norm": 2.2481534481048584, "learning_rate": 2e-05, "loss": 0.05694055, "step": 12442 }, { "epoch": 24.886, "grad_norm": 1.1655399799346924, "learning_rate": 2e-05, "loss": 0.07010768, "step": 12443 }, { "epoch": 24.888, "grad_norm": 1.2694900035858154, "learning_rate": 2e-05, "loss": 0.04710036, "step": 12444 }, { "epoch": 24.89, "grad_norm": 1.4313056468963623, "learning_rate": 2e-05, "loss": 0.05532254, "step": 12445 }, { "epoch": 24.892, "grad_norm": 1.7661042213439941, "learning_rate": 2e-05, "loss": 0.0498428, "step": 12446 }, { "epoch": 24.894, "grad_norm": 1.3332576751708984, "learning_rate": 2e-05, "loss": 0.06412599, "step": 12447 }, { "epoch": 24.896, "grad_norm": 1.2032532691955566, "learning_rate": 2e-05, "loss": 0.04637364, "step": 12448 }, { "epoch": 24.898, "grad_norm": 1.1266741752624512, "learning_rate": 2e-05, "loss": 0.04378768, "step": 12449 }, { "epoch": 24.9, "grad_norm": 2.174483299255371, "learning_rate": 2e-05, "loss": 0.05236176, "step": 12450 }, { "epoch": 24.902, "grad_norm": 1.1338982582092285, "learning_rate": 2e-05, "loss": 0.05741382, "step": 12451 }, { "epoch": 24.904, "grad_norm": 1.429327130317688, "learning_rate": 2e-05, "loss": 0.06462146, "step": 12452 }, { "epoch": 24.906, "grad_norm": 1.3515311479568481, "learning_rate": 2e-05, "loss": 0.06428777, "step": 12453 }, { "epoch": 24.908, "grad_norm": 1.2746869325637817, "learning_rate": 2e-05, "loss": 0.03591898, "step": 12454 }, { "epoch": 24.91, "grad_norm": 0.8697836399078369, "learning_rate": 2e-05, "loss": 0.04534568, "step": 12455 }, { "epoch": 24.912, "grad_norm": 2.8355445861816406, "learning_rate": 2e-05, "loss": 0.08570682, "step": 12456 }, { "epoch": 24.914, "grad_norm": 1.0572867393493652, "learning_rate": 2e-05, "loss": 0.0419245, "step": 12457 }, { "epoch": 24.916, "grad_norm": 1.1417351961135864, "learning_rate": 2e-05, "loss": 0.04117936, "step": 12458 }, { "epoch": 24.918, "grad_norm": 1.2422683238983154, "learning_rate": 2e-05, "loss": 0.06599592, "step": 12459 }, { "epoch": 24.92, "grad_norm": 1.0361512899398804, "learning_rate": 2e-05, "loss": 0.0421612, "step": 12460 }, { "epoch": 24.922, "grad_norm": 1.1876249313354492, "learning_rate": 2e-05, "loss": 0.03877359, "step": 12461 }, { "epoch": 24.924, "grad_norm": 1.1779043674468994, "learning_rate": 2e-05, "loss": 0.05586039, "step": 12462 }, { "epoch": 24.926, "grad_norm": 1.3956304788589478, "learning_rate": 2e-05, "loss": 0.06718011, "step": 12463 }, { "epoch": 24.928, "grad_norm": 1.1677271127700806, "learning_rate": 2e-05, "loss": 0.04779094, "step": 12464 }, { "epoch": 24.93, "grad_norm": 1.3010845184326172, "learning_rate": 2e-05, "loss": 0.05749074, "step": 12465 }, { "epoch": 24.932, "grad_norm": 1.7303367853164673, "learning_rate": 2e-05, "loss": 0.06215479, "step": 12466 }, { "epoch": 24.934, "grad_norm": 1.5634523630142212, "learning_rate": 2e-05, "loss": 0.04592799, "step": 12467 }, { "epoch": 24.936, "grad_norm": 1.2501870393753052, "learning_rate": 2e-05, "loss": 0.04781087, "step": 12468 }, { "epoch": 24.938, "grad_norm": 1.1738709211349487, "learning_rate": 2e-05, "loss": 0.06016637, "step": 12469 }, { "epoch": 24.94, "grad_norm": 1.6120697259902954, "learning_rate": 2e-05, "loss": 0.04477395, "step": 12470 }, { "epoch": 24.942, "grad_norm": 1.7098530530929565, "learning_rate": 2e-05, "loss": 0.04566197, "step": 12471 }, { "epoch": 24.944, "grad_norm": 1.2163127660751343, "learning_rate": 2e-05, "loss": 0.05447794, "step": 12472 }, { "epoch": 24.946, "grad_norm": 0.9320288300514221, "learning_rate": 2e-05, "loss": 0.03776973, "step": 12473 }, { "epoch": 24.948, "grad_norm": 1.041003704071045, "learning_rate": 2e-05, "loss": 0.04140902, "step": 12474 }, { "epoch": 24.95, "grad_norm": 1.0450096130371094, "learning_rate": 2e-05, "loss": 0.0510888, "step": 12475 }, { "epoch": 24.951999999999998, "grad_norm": 1.4142359495162964, "learning_rate": 2e-05, "loss": 0.05782855, "step": 12476 }, { "epoch": 24.954, "grad_norm": 1.2208404541015625, "learning_rate": 2e-05, "loss": 0.05821688, "step": 12477 }, { "epoch": 24.956, "grad_norm": 1.2004338502883911, "learning_rate": 2e-05, "loss": 0.05105543, "step": 12478 }, { "epoch": 24.958, "grad_norm": 1.1134040355682373, "learning_rate": 2e-05, "loss": 0.05632985, "step": 12479 }, { "epoch": 24.96, "grad_norm": 1.3752193450927734, "learning_rate": 2e-05, "loss": 0.05364887, "step": 12480 }, { "epoch": 24.962, "grad_norm": 0.9691943526268005, "learning_rate": 2e-05, "loss": 0.04568587, "step": 12481 }, { "epoch": 24.964, "grad_norm": 1.4496279954910278, "learning_rate": 2e-05, "loss": 0.03921795, "step": 12482 }, { "epoch": 24.966, "grad_norm": 1.336695909500122, "learning_rate": 2e-05, "loss": 0.05695701, "step": 12483 }, { "epoch": 24.968, "grad_norm": 3.207062244415283, "learning_rate": 2e-05, "loss": 0.06334863, "step": 12484 }, { "epoch": 24.97, "grad_norm": 1.1654314994812012, "learning_rate": 2e-05, "loss": 0.04864758, "step": 12485 }, { "epoch": 24.972, "grad_norm": 1.437970757484436, "learning_rate": 2e-05, "loss": 0.07023741, "step": 12486 }, { "epoch": 24.974, "grad_norm": 1.3537486791610718, "learning_rate": 2e-05, "loss": 0.05866653, "step": 12487 }, { "epoch": 24.976, "grad_norm": 1.0523929595947266, "learning_rate": 2e-05, "loss": 0.04211807, "step": 12488 }, { "epoch": 24.978, "grad_norm": 1.0875717401504517, "learning_rate": 2e-05, "loss": 0.04475028, "step": 12489 }, { "epoch": 24.98, "grad_norm": 1.0803544521331787, "learning_rate": 2e-05, "loss": 0.0435186, "step": 12490 }, { "epoch": 24.982, "grad_norm": 1.141843557357788, "learning_rate": 2e-05, "loss": 0.05398731, "step": 12491 }, { "epoch": 24.984, "grad_norm": 0.9223504662513733, "learning_rate": 2e-05, "loss": 0.03944103, "step": 12492 }, { "epoch": 24.986, "grad_norm": 1.3704251050949097, "learning_rate": 2e-05, "loss": 0.03878814, "step": 12493 }, { "epoch": 24.988, "grad_norm": 2.051528215408325, "learning_rate": 2e-05, "loss": 0.04926416, "step": 12494 }, { "epoch": 24.99, "grad_norm": 1.2882025241851807, "learning_rate": 2e-05, "loss": 0.05915561, "step": 12495 }, { "epoch": 24.992, "grad_norm": 1.3897839784622192, "learning_rate": 2e-05, "loss": 0.07913811, "step": 12496 }, { "epoch": 24.994, "grad_norm": 1.2579975128173828, "learning_rate": 2e-05, "loss": 0.07199284, "step": 12497 }, { "epoch": 24.996, "grad_norm": 1.136902928352356, "learning_rate": 2e-05, "loss": 0.04523735, "step": 12498 }, { "epoch": 24.998, "grad_norm": 1.065345287322998, "learning_rate": 2e-05, "loss": 0.03127296, "step": 12499 }, { "epoch": 25.0, "grad_norm": 1.0609544515609741, "learning_rate": 2e-05, "loss": 0.05613097, "step": 12500 }, { "epoch": 25.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.994, "Equal_2": 0.9760479041916168, "Equal_3": 0.9780439121756487, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9880239520958084, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.998, "Perpendicular_2": 1.0, "Perpendicular_3": 0.87374749498998, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9912000000000001, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 226.2399, "eval_samples_per_second": 46.411, "eval_steps_per_second": 0.928, "step": 12500 }, { "epoch": 25.002, "grad_norm": 1.425715684890747, "learning_rate": 2e-05, "loss": 0.06134047, "step": 12501 }, { "epoch": 25.004, "grad_norm": 1.2900484800338745, "learning_rate": 2e-05, "loss": 0.05005276, "step": 12502 }, { "epoch": 25.006, "grad_norm": 0.9104924201965332, "learning_rate": 2e-05, "loss": 0.03685468, "step": 12503 }, { "epoch": 25.008, "grad_norm": 1.1765451431274414, "learning_rate": 2e-05, "loss": 0.05778497, "step": 12504 }, { "epoch": 25.01, "grad_norm": 1.2537585496902466, "learning_rate": 2e-05, "loss": 0.06120408, "step": 12505 }, { "epoch": 25.012, "grad_norm": 0.9956067204475403, "learning_rate": 2e-05, "loss": 0.0410351, "step": 12506 }, { "epoch": 25.014, "grad_norm": 1.318725347518921, "learning_rate": 2e-05, "loss": 0.06352052, "step": 12507 }, { "epoch": 25.016, "grad_norm": 1.0027438402175903, "learning_rate": 2e-05, "loss": 0.04642474, "step": 12508 }, { "epoch": 25.018, "grad_norm": 1.3175755739212036, "learning_rate": 2e-05, "loss": 0.03771981, "step": 12509 }, { "epoch": 25.02, "grad_norm": 1.5995951890945435, "learning_rate": 2e-05, "loss": 0.05020139, "step": 12510 }, { "epoch": 25.022, "grad_norm": 1.1397231817245483, "learning_rate": 2e-05, "loss": 0.04389757, "step": 12511 }, { "epoch": 25.024, "grad_norm": 1.8505711555480957, "learning_rate": 2e-05, "loss": 0.05464513, "step": 12512 }, { "epoch": 25.026, "grad_norm": 1.0824058055877686, "learning_rate": 2e-05, "loss": 0.04698636, "step": 12513 }, { "epoch": 25.028, "grad_norm": 1.1208677291870117, "learning_rate": 2e-05, "loss": 0.06447575, "step": 12514 }, { "epoch": 25.03, "grad_norm": 0.947784960269928, "learning_rate": 2e-05, "loss": 0.03796426, "step": 12515 }, { "epoch": 25.032, "grad_norm": 1.0535982847213745, "learning_rate": 2e-05, "loss": 0.04684829, "step": 12516 }, { "epoch": 25.034, "grad_norm": 0.9957151412963867, "learning_rate": 2e-05, "loss": 0.04004215, "step": 12517 }, { "epoch": 25.036, "grad_norm": 1.3921725749969482, "learning_rate": 2e-05, "loss": 0.05759081, "step": 12518 }, { "epoch": 25.038, "grad_norm": 1.3522593975067139, "learning_rate": 2e-05, "loss": 0.05172538, "step": 12519 }, { "epoch": 25.04, "grad_norm": 1.475589632987976, "learning_rate": 2e-05, "loss": 0.05882673, "step": 12520 }, { "epoch": 25.042, "grad_norm": 1.1035587787628174, "learning_rate": 2e-05, "loss": 0.05213946, "step": 12521 }, { "epoch": 25.044, "grad_norm": 1.1994236707687378, "learning_rate": 2e-05, "loss": 0.04174181, "step": 12522 }, { "epoch": 25.046, "grad_norm": 1.2844736576080322, "learning_rate": 2e-05, "loss": 0.05479585, "step": 12523 }, { "epoch": 25.048, "grad_norm": 1.0577770471572876, "learning_rate": 2e-05, "loss": 0.04158094, "step": 12524 }, { "epoch": 25.05, "grad_norm": 3.2212443351745605, "learning_rate": 2e-05, "loss": 0.05103163, "step": 12525 }, { "epoch": 25.052, "grad_norm": 1.1451666355133057, "learning_rate": 2e-05, "loss": 0.0525579, "step": 12526 }, { "epoch": 25.054, "grad_norm": 1.662766695022583, "learning_rate": 2e-05, "loss": 0.04766974, "step": 12527 }, { "epoch": 25.056, "grad_norm": 2.3434860706329346, "learning_rate": 2e-05, "loss": 0.06002715, "step": 12528 }, { "epoch": 25.058, "grad_norm": 1.9326956272125244, "learning_rate": 2e-05, "loss": 0.05762844, "step": 12529 }, { "epoch": 25.06, "grad_norm": 1.1619752645492554, "learning_rate": 2e-05, "loss": 0.04226045, "step": 12530 }, { "epoch": 25.062, "grad_norm": 1.1006190776824951, "learning_rate": 2e-05, "loss": 0.05237936, "step": 12531 }, { "epoch": 25.064, "grad_norm": 1.780943751335144, "learning_rate": 2e-05, "loss": 0.06937633, "step": 12532 }, { "epoch": 25.066, "grad_norm": 1.3734432458877563, "learning_rate": 2e-05, "loss": 0.05111995, "step": 12533 }, { "epoch": 25.068, "grad_norm": 0.9654284119606018, "learning_rate": 2e-05, "loss": 0.04022484, "step": 12534 }, { "epoch": 25.07, "grad_norm": 1.1616833209991455, "learning_rate": 2e-05, "loss": 0.03959507, "step": 12535 }, { "epoch": 25.072, "grad_norm": 1.3682682514190674, "learning_rate": 2e-05, "loss": 0.04857575, "step": 12536 }, { "epoch": 25.074, "grad_norm": 1.0579208135604858, "learning_rate": 2e-05, "loss": 0.04151409, "step": 12537 }, { "epoch": 25.076, "grad_norm": 2.0756044387817383, "learning_rate": 2e-05, "loss": 0.06575799, "step": 12538 }, { "epoch": 25.078, "grad_norm": 1.7740479707717896, "learning_rate": 2e-05, "loss": 0.07429114, "step": 12539 }, { "epoch": 25.08, "grad_norm": 1.6758410930633545, "learning_rate": 2e-05, "loss": 0.03940992, "step": 12540 }, { "epoch": 25.082, "grad_norm": 1.026105284690857, "learning_rate": 2e-05, "loss": 0.03893386, "step": 12541 }, { "epoch": 25.084, "grad_norm": 1.3593101501464844, "learning_rate": 2e-05, "loss": 0.05561619, "step": 12542 }, { "epoch": 25.086, "grad_norm": 2.656904458999634, "learning_rate": 2e-05, "loss": 0.07361579, "step": 12543 }, { "epoch": 25.088, "grad_norm": 1.0980315208435059, "learning_rate": 2e-05, "loss": 0.03986768, "step": 12544 }, { "epoch": 25.09, "grad_norm": 1.1834790706634521, "learning_rate": 2e-05, "loss": 0.0560291, "step": 12545 }, { "epoch": 25.092, "grad_norm": 1.1375064849853516, "learning_rate": 2e-05, "loss": 0.04850543, "step": 12546 }, { "epoch": 25.094, "grad_norm": 1.3515863418579102, "learning_rate": 2e-05, "loss": 0.04888011, "step": 12547 }, { "epoch": 25.096, "grad_norm": 0.9566068649291992, "learning_rate": 2e-05, "loss": 0.03617322, "step": 12548 }, { "epoch": 25.098, "grad_norm": 1.1905452013015747, "learning_rate": 2e-05, "loss": 0.05990719, "step": 12549 }, { "epoch": 25.1, "grad_norm": 1.3843622207641602, "learning_rate": 2e-05, "loss": 0.07800481, "step": 12550 }, { "epoch": 25.102, "grad_norm": 1.4753957986831665, "learning_rate": 2e-05, "loss": 0.04985467, "step": 12551 }, { "epoch": 25.104, "grad_norm": 1.096551537513733, "learning_rate": 2e-05, "loss": 0.04852128, "step": 12552 }, { "epoch": 25.106, "grad_norm": 1.706013560295105, "learning_rate": 2e-05, "loss": 0.06387548, "step": 12553 }, { "epoch": 25.108, "grad_norm": 1.7288285493850708, "learning_rate": 2e-05, "loss": 0.07150245, "step": 12554 }, { "epoch": 25.11, "grad_norm": 1.1662979125976562, "learning_rate": 2e-05, "loss": 0.04167096, "step": 12555 }, { "epoch": 25.112, "grad_norm": 1.8005623817443848, "learning_rate": 2e-05, "loss": 0.05115997, "step": 12556 }, { "epoch": 25.114, "grad_norm": 1.8382339477539062, "learning_rate": 2e-05, "loss": 0.03514276, "step": 12557 }, { "epoch": 25.116, "grad_norm": 1.1764848232269287, "learning_rate": 2e-05, "loss": 0.06012421, "step": 12558 }, { "epoch": 25.118, "grad_norm": 1.7232075929641724, "learning_rate": 2e-05, "loss": 0.06067395, "step": 12559 }, { "epoch": 25.12, "grad_norm": 1.3636465072631836, "learning_rate": 2e-05, "loss": 0.05937435, "step": 12560 }, { "epoch": 25.122, "grad_norm": 1.7067391872406006, "learning_rate": 2e-05, "loss": 0.05294079, "step": 12561 }, { "epoch": 25.124, "grad_norm": 1.546687364578247, "learning_rate": 2e-05, "loss": 0.05781291, "step": 12562 }, { "epoch": 25.126, "grad_norm": 1.2537044286727905, "learning_rate": 2e-05, "loss": 0.05506508, "step": 12563 }, { "epoch": 25.128, "grad_norm": 1.586074948310852, "learning_rate": 2e-05, "loss": 0.05425185, "step": 12564 }, { "epoch": 25.13, "grad_norm": 1.0422202348709106, "learning_rate": 2e-05, "loss": 0.03463748, "step": 12565 }, { "epoch": 25.132, "grad_norm": 0.9970985651016235, "learning_rate": 2e-05, "loss": 0.03827235, "step": 12566 }, { "epoch": 25.134, "grad_norm": 1.2309911251068115, "learning_rate": 2e-05, "loss": 0.0526417, "step": 12567 }, { "epoch": 25.136, "grad_norm": 1.073454737663269, "learning_rate": 2e-05, "loss": 0.04796216, "step": 12568 }, { "epoch": 25.138, "grad_norm": 1.2036497592926025, "learning_rate": 2e-05, "loss": 0.05618038, "step": 12569 }, { "epoch": 25.14, "grad_norm": 2.3004140853881836, "learning_rate": 2e-05, "loss": 0.05867749, "step": 12570 }, { "epoch": 25.142, "grad_norm": 1.6045141220092773, "learning_rate": 2e-05, "loss": 0.05064381, "step": 12571 }, { "epoch": 25.144, "grad_norm": 0.8704143166542053, "learning_rate": 2e-05, "loss": 0.03309072, "step": 12572 }, { "epoch": 25.146, "grad_norm": 1.015468716621399, "learning_rate": 2e-05, "loss": 0.04305565, "step": 12573 }, { "epoch": 25.148, "grad_norm": 1.7779539823532104, "learning_rate": 2e-05, "loss": 0.04993201, "step": 12574 }, { "epoch": 25.15, "grad_norm": 1.9122910499572754, "learning_rate": 2e-05, "loss": 0.05920298, "step": 12575 }, { "epoch": 25.152, "grad_norm": 1.8597750663757324, "learning_rate": 2e-05, "loss": 0.04809675, "step": 12576 }, { "epoch": 25.154, "grad_norm": 1.3296782970428467, "learning_rate": 2e-05, "loss": 0.05094036, "step": 12577 }, { "epoch": 25.156, "grad_norm": 1.946590542793274, "learning_rate": 2e-05, "loss": 0.07196194, "step": 12578 }, { "epoch": 25.158, "grad_norm": 1.1013623476028442, "learning_rate": 2e-05, "loss": 0.03726883, "step": 12579 }, { "epoch": 25.16, "grad_norm": 1.1674823760986328, "learning_rate": 2e-05, "loss": 0.04712621, "step": 12580 }, { "epoch": 25.162, "grad_norm": 1.19246506690979, "learning_rate": 2e-05, "loss": 0.04659901, "step": 12581 }, { "epoch": 25.164, "grad_norm": 1.0725971460342407, "learning_rate": 2e-05, "loss": 0.04137574, "step": 12582 }, { "epoch": 25.166, "grad_norm": 1.2272346019744873, "learning_rate": 2e-05, "loss": 0.049952, "step": 12583 }, { "epoch": 25.168, "grad_norm": 1.2743947505950928, "learning_rate": 2e-05, "loss": 0.04440939, "step": 12584 }, { "epoch": 25.17, "grad_norm": 1.2129122018814087, "learning_rate": 2e-05, "loss": 0.04122353, "step": 12585 }, { "epoch": 25.172, "grad_norm": 1.0951646566390991, "learning_rate": 2e-05, "loss": 0.0513562, "step": 12586 }, { "epoch": 25.174, "grad_norm": 1.3861701488494873, "learning_rate": 2e-05, "loss": 0.04384424, "step": 12587 }, { "epoch": 25.176, "grad_norm": 1.3205156326293945, "learning_rate": 2e-05, "loss": 0.05407259, "step": 12588 }, { "epoch": 25.178, "grad_norm": 0.9771292209625244, "learning_rate": 2e-05, "loss": 0.03984643, "step": 12589 }, { "epoch": 25.18, "grad_norm": 1.100267767906189, "learning_rate": 2e-05, "loss": 0.05573936, "step": 12590 }, { "epoch": 25.182, "grad_norm": 1.2173435688018799, "learning_rate": 2e-05, "loss": 0.03959444, "step": 12591 }, { "epoch": 25.184, "grad_norm": 1.5524941682815552, "learning_rate": 2e-05, "loss": 0.05593769, "step": 12592 }, { "epoch": 25.186, "grad_norm": 1.542313575744629, "learning_rate": 2e-05, "loss": 0.07544073, "step": 12593 }, { "epoch": 25.188, "grad_norm": 3.778968572616577, "learning_rate": 2e-05, "loss": 0.05602132, "step": 12594 }, { "epoch": 25.19, "grad_norm": 1.0212326049804688, "learning_rate": 2e-05, "loss": 0.04845678, "step": 12595 }, { "epoch": 25.192, "grad_norm": 1.4829176664352417, "learning_rate": 2e-05, "loss": 0.05568189, "step": 12596 }, { "epoch": 25.194, "grad_norm": 2.563568592071533, "learning_rate": 2e-05, "loss": 0.0583737, "step": 12597 }, { "epoch": 25.196, "grad_norm": 1.834092140197754, "learning_rate": 2e-05, "loss": 0.07924025, "step": 12598 }, { "epoch": 25.198, "grad_norm": 1.4551931619644165, "learning_rate": 2e-05, "loss": 0.06113656, "step": 12599 }, { "epoch": 25.2, "grad_norm": 2.2312159538269043, "learning_rate": 2e-05, "loss": 0.05965286, "step": 12600 }, { "epoch": 25.202, "grad_norm": 2.048811674118042, "learning_rate": 2e-05, "loss": 0.0518702, "step": 12601 }, { "epoch": 25.204, "grad_norm": 1.5694825649261475, "learning_rate": 2e-05, "loss": 0.03933308, "step": 12602 }, { "epoch": 25.206, "grad_norm": 1.2032784223556519, "learning_rate": 2e-05, "loss": 0.0613999, "step": 12603 }, { "epoch": 25.208, "grad_norm": 1.162183165550232, "learning_rate": 2e-05, "loss": 0.04622652, "step": 12604 }, { "epoch": 25.21, "grad_norm": 1.0905358791351318, "learning_rate": 2e-05, "loss": 0.04642868, "step": 12605 }, { "epoch": 25.212, "grad_norm": 1.3495539426803589, "learning_rate": 2e-05, "loss": 0.05017714, "step": 12606 }, { "epoch": 25.214, "grad_norm": 1.1227021217346191, "learning_rate": 2e-05, "loss": 0.06425351, "step": 12607 }, { "epoch": 25.216, "grad_norm": 1.4750159978866577, "learning_rate": 2e-05, "loss": 0.05119936, "step": 12608 }, { "epoch": 25.218, "grad_norm": 2.0690102577209473, "learning_rate": 2e-05, "loss": 0.03707226, "step": 12609 }, { "epoch": 25.22, "grad_norm": 1.1899744272232056, "learning_rate": 2e-05, "loss": 0.06661716, "step": 12610 }, { "epoch": 25.222, "grad_norm": 1.1248366832733154, "learning_rate": 2e-05, "loss": 0.03522804, "step": 12611 }, { "epoch": 25.224, "grad_norm": 2.226879119873047, "learning_rate": 2e-05, "loss": 0.04442178, "step": 12612 }, { "epoch": 25.226, "grad_norm": 1.4254546165466309, "learning_rate": 2e-05, "loss": 0.07015112, "step": 12613 }, { "epoch": 25.228, "grad_norm": 1.2931039333343506, "learning_rate": 2e-05, "loss": 0.05845713, "step": 12614 }, { "epoch": 25.23, "grad_norm": 1.0710176229476929, "learning_rate": 2e-05, "loss": 0.04817979, "step": 12615 }, { "epoch": 25.232, "grad_norm": 1.3159974813461304, "learning_rate": 2e-05, "loss": 0.05791353, "step": 12616 }, { "epoch": 25.234, "grad_norm": 3.9899251461029053, "learning_rate": 2e-05, "loss": 0.04513327, "step": 12617 }, { "epoch": 25.236, "grad_norm": 1.7770776748657227, "learning_rate": 2e-05, "loss": 0.06699249, "step": 12618 }, { "epoch": 25.238, "grad_norm": 1.515300989151001, "learning_rate": 2e-05, "loss": 0.03843363, "step": 12619 }, { "epoch": 25.24, "grad_norm": 1.2055643796920776, "learning_rate": 2e-05, "loss": 0.04187072, "step": 12620 }, { "epoch": 25.242, "grad_norm": 1.2208244800567627, "learning_rate": 2e-05, "loss": 0.05198207, "step": 12621 }, { "epoch": 25.244, "grad_norm": 1.3119043111801147, "learning_rate": 2e-05, "loss": 0.06348589, "step": 12622 }, { "epoch": 25.246, "grad_norm": 1.0554064512252808, "learning_rate": 2e-05, "loss": 0.0389347, "step": 12623 }, { "epoch": 25.248, "grad_norm": 1.4631547927856445, "learning_rate": 2e-05, "loss": 0.05191807, "step": 12624 }, { "epoch": 25.25, "grad_norm": 1.1568636894226074, "learning_rate": 2e-05, "loss": 0.05211708, "step": 12625 }, { "epoch": 25.252, "grad_norm": 1.4335490465164185, "learning_rate": 2e-05, "loss": 0.0558568, "step": 12626 }, { "epoch": 25.254, "grad_norm": 0.9383687376976013, "learning_rate": 2e-05, "loss": 0.04501093, "step": 12627 }, { "epoch": 25.256, "grad_norm": 1.3434826135635376, "learning_rate": 2e-05, "loss": 0.05749689, "step": 12628 }, { "epoch": 25.258, "grad_norm": 1.2343528270721436, "learning_rate": 2e-05, "loss": 0.06191377, "step": 12629 }, { "epoch": 25.26, "grad_norm": 1.0411930084228516, "learning_rate": 2e-05, "loss": 0.04297394, "step": 12630 }, { "epoch": 25.262, "grad_norm": 1.7457959651947021, "learning_rate": 2e-05, "loss": 0.04678249, "step": 12631 }, { "epoch": 25.264, "grad_norm": 1.4824542999267578, "learning_rate": 2e-05, "loss": 0.05779995, "step": 12632 }, { "epoch": 25.266, "grad_norm": 1.9669220447540283, "learning_rate": 2e-05, "loss": 0.08582433, "step": 12633 }, { "epoch": 25.268, "grad_norm": 1.6175854206085205, "learning_rate": 2e-05, "loss": 0.07360069, "step": 12634 }, { "epoch": 25.27, "grad_norm": 1.2788103818893433, "learning_rate": 2e-05, "loss": 0.05685135, "step": 12635 }, { "epoch": 25.272, "grad_norm": 1.2258474826812744, "learning_rate": 2e-05, "loss": 0.0516485, "step": 12636 }, { "epoch": 25.274, "grad_norm": 1.2428290843963623, "learning_rate": 2e-05, "loss": 0.04637352, "step": 12637 }, { "epoch": 25.276, "grad_norm": 1.3296626806259155, "learning_rate": 2e-05, "loss": 0.05118944, "step": 12638 }, { "epoch": 25.278, "grad_norm": 1.1303017139434814, "learning_rate": 2e-05, "loss": 0.0524886, "step": 12639 }, { "epoch": 25.28, "grad_norm": 1.238074779510498, "learning_rate": 2e-05, "loss": 0.05969185, "step": 12640 }, { "epoch": 25.282, "grad_norm": 1.1553701162338257, "learning_rate": 2e-05, "loss": 0.04252638, "step": 12641 }, { "epoch": 25.284, "grad_norm": 1.0768141746520996, "learning_rate": 2e-05, "loss": 0.05180396, "step": 12642 }, { "epoch": 25.286, "grad_norm": 1.986037015914917, "learning_rate": 2e-05, "loss": 0.05125058, "step": 12643 }, { "epoch": 25.288, "grad_norm": 1.086488962173462, "learning_rate": 2e-05, "loss": 0.0477225, "step": 12644 }, { "epoch": 25.29, "grad_norm": 1.5222396850585938, "learning_rate": 2e-05, "loss": 0.04933942, "step": 12645 }, { "epoch": 25.292, "grad_norm": 10.120864868164062, "learning_rate": 2e-05, "loss": 0.04941028, "step": 12646 }, { "epoch": 25.294, "grad_norm": 1.2820665836334229, "learning_rate": 2e-05, "loss": 0.06298565, "step": 12647 }, { "epoch": 25.296, "grad_norm": 1.0873534679412842, "learning_rate": 2e-05, "loss": 0.05225194, "step": 12648 }, { "epoch": 25.298, "grad_norm": 0.9621022343635559, "learning_rate": 2e-05, "loss": 0.03225259, "step": 12649 }, { "epoch": 25.3, "grad_norm": 1.5267809629440308, "learning_rate": 2e-05, "loss": 0.05504768, "step": 12650 }, { "epoch": 25.302, "grad_norm": 1.266568899154663, "learning_rate": 2e-05, "loss": 0.06148052, "step": 12651 }, { "epoch": 25.304, "grad_norm": 1.7587610483169556, "learning_rate": 2e-05, "loss": 0.05041414, "step": 12652 }, { "epoch": 25.306, "grad_norm": 1.018044352531433, "learning_rate": 2e-05, "loss": 0.04399976, "step": 12653 }, { "epoch": 25.308, "grad_norm": 1.2122786045074463, "learning_rate": 2e-05, "loss": 0.05793189, "step": 12654 }, { "epoch": 25.31, "grad_norm": 1.0140517950057983, "learning_rate": 2e-05, "loss": 0.04603332, "step": 12655 }, { "epoch": 25.312, "grad_norm": 1.1391375064849854, "learning_rate": 2e-05, "loss": 0.05528952, "step": 12656 }, { "epoch": 25.314, "grad_norm": 1.2921862602233887, "learning_rate": 2e-05, "loss": 0.05289361, "step": 12657 }, { "epoch": 25.316, "grad_norm": 0.8794735074043274, "learning_rate": 2e-05, "loss": 0.02773062, "step": 12658 }, { "epoch": 25.318, "grad_norm": 1.76371431350708, "learning_rate": 2e-05, "loss": 0.04772287, "step": 12659 }, { "epoch": 25.32, "grad_norm": 0.9986810684204102, "learning_rate": 2e-05, "loss": 0.04108698, "step": 12660 }, { "epoch": 25.322, "grad_norm": 1.0789239406585693, "learning_rate": 2e-05, "loss": 0.04397911, "step": 12661 }, { "epoch": 25.324, "grad_norm": 1.6456670761108398, "learning_rate": 2e-05, "loss": 0.04270291, "step": 12662 }, { "epoch": 25.326, "grad_norm": 1.4813755750656128, "learning_rate": 2e-05, "loss": 0.05182194, "step": 12663 }, { "epoch": 25.328, "grad_norm": 1.0695228576660156, "learning_rate": 2e-05, "loss": 0.05125644, "step": 12664 }, { "epoch": 25.33, "grad_norm": 1.1046431064605713, "learning_rate": 2e-05, "loss": 0.0507373, "step": 12665 }, { "epoch": 25.332, "grad_norm": 1.4391566514968872, "learning_rate": 2e-05, "loss": 0.05663792, "step": 12666 }, { "epoch": 25.334, "grad_norm": 1.1097612380981445, "learning_rate": 2e-05, "loss": 0.03972208, "step": 12667 }, { "epoch": 25.336, "grad_norm": 1.5825376510620117, "learning_rate": 2e-05, "loss": 0.06583906, "step": 12668 }, { "epoch": 25.338, "grad_norm": 1.1241800785064697, "learning_rate": 2e-05, "loss": 0.0494065, "step": 12669 }, { "epoch": 25.34, "grad_norm": 2.1177639961242676, "learning_rate": 2e-05, "loss": 0.05350515, "step": 12670 }, { "epoch": 25.342, "grad_norm": 1.2628130912780762, "learning_rate": 2e-05, "loss": 0.04704548, "step": 12671 }, { "epoch": 25.344, "grad_norm": 1.1926730871200562, "learning_rate": 2e-05, "loss": 0.05114765, "step": 12672 }, { "epoch": 25.346, "grad_norm": 0.8333731293678284, "learning_rate": 2e-05, "loss": 0.02912606, "step": 12673 }, { "epoch": 25.348, "grad_norm": 1.6974793672561646, "learning_rate": 2e-05, "loss": 0.05681619, "step": 12674 }, { "epoch": 25.35, "grad_norm": 1.1853028535842896, "learning_rate": 2e-05, "loss": 0.04518053, "step": 12675 }, { "epoch": 25.352, "grad_norm": 1.049663782119751, "learning_rate": 2e-05, "loss": 0.04833854, "step": 12676 }, { "epoch": 25.354, "grad_norm": 1.1964635848999023, "learning_rate": 2e-05, "loss": 0.06861034, "step": 12677 }, { "epoch": 25.356, "grad_norm": 0.9890965223312378, "learning_rate": 2e-05, "loss": 0.04193068, "step": 12678 }, { "epoch": 25.358, "grad_norm": 1.1136928796768188, "learning_rate": 2e-05, "loss": 0.06178171, "step": 12679 }, { "epoch": 25.36, "grad_norm": 1.3247332572937012, "learning_rate": 2e-05, "loss": 0.05393716, "step": 12680 }, { "epoch": 25.362, "grad_norm": 2.4889371395111084, "learning_rate": 2e-05, "loss": 0.05470654, "step": 12681 }, { "epoch": 25.364, "grad_norm": 0.9511756300926208, "learning_rate": 2e-05, "loss": 0.03818203, "step": 12682 }, { "epoch": 25.366, "grad_norm": 1.3237273693084717, "learning_rate": 2e-05, "loss": 0.05691194, "step": 12683 }, { "epoch": 25.368, "grad_norm": 1.0649863481521606, "learning_rate": 2e-05, "loss": 0.04523048, "step": 12684 }, { "epoch": 25.37, "grad_norm": 1.1887733936309814, "learning_rate": 2e-05, "loss": 0.05220554, "step": 12685 }, { "epoch": 25.372, "grad_norm": 2.2650814056396484, "learning_rate": 2e-05, "loss": 0.04387804, "step": 12686 }, { "epoch": 25.374, "grad_norm": 1.0778383016586304, "learning_rate": 2e-05, "loss": 0.04208272, "step": 12687 }, { "epoch": 25.376, "grad_norm": 1.1874090433120728, "learning_rate": 2e-05, "loss": 0.07064945, "step": 12688 }, { "epoch": 25.378, "grad_norm": 1.0907158851623535, "learning_rate": 2e-05, "loss": 0.04148529, "step": 12689 }, { "epoch": 25.38, "grad_norm": 1.149124026298523, "learning_rate": 2e-05, "loss": 0.04476409, "step": 12690 }, { "epoch": 25.382, "grad_norm": 1.1236010789871216, "learning_rate": 2e-05, "loss": 0.05449494, "step": 12691 }, { "epoch": 25.384, "grad_norm": 0.8474858403205872, "learning_rate": 2e-05, "loss": 0.03595953, "step": 12692 }, { "epoch": 25.386, "grad_norm": 1.158517599105835, "learning_rate": 2e-05, "loss": 0.05441153, "step": 12693 }, { "epoch": 25.388, "grad_norm": 1.2441701889038086, "learning_rate": 2e-05, "loss": 0.04905082, "step": 12694 }, { "epoch": 25.39, "grad_norm": 1.200832724571228, "learning_rate": 2e-05, "loss": 0.04876531, "step": 12695 }, { "epoch": 25.392, "grad_norm": 1.3955540657043457, "learning_rate": 2e-05, "loss": 0.07045322, "step": 12696 }, { "epoch": 25.394, "grad_norm": 2.924276351928711, "learning_rate": 2e-05, "loss": 0.05881097, "step": 12697 }, { "epoch": 25.396, "grad_norm": 1.6612582206726074, "learning_rate": 2e-05, "loss": 0.06038381, "step": 12698 }, { "epoch": 25.398, "grad_norm": 1.303591012954712, "learning_rate": 2e-05, "loss": 0.06044409, "step": 12699 }, { "epoch": 25.4, "grad_norm": 1.5894290208816528, "learning_rate": 2e-05, "loss": 0.05581897, "step": 12700 }, { "epoch": 25.402, "grad_norm": 1.608234167098999, "learning_rate": 2e-05, "loss": 0.05291351, "step": 12701 }, { "epoch": 25.404, "grad_norm": 0.9458881616592407, "learning_rate": 2e-05, "loss": 0.03647695, "step": 12702 }, { "epoch": 25.406, "grad_norm": 1.2273818254470825, "learning_rate": 2e-05, "loss": 0.05713904, "step": 12703 }, { "epoch": 25.408, "grad_norm": 1.468031883239746, "learning_rate": 2e-05, "loss": 0.07614683, "step": 12704 }, { "epoch": 25.41, "grad_norm": 1.3028733730316162, "learning_rate": 2e-05, "loss": 0.06430078, "step": 12705 }, { "epoch": 25.412, "grad_norm": 1.3269321918487549, "learning_rate": 2e-05, "loss": 0.07427183, "step": 12706 }, { "epoch": 25.414, "grad_norm": 1.8167970180511475, "learning_rate": 2e-05, "loss": 0.04074918, "step": 12707 }, { "epoch": 25.416, "grad_norm": 1.4029529094696045, "learning_rate": 2e-05, "loss": 0.06068564, "step": 12708 }, { "epoch": 25.418, "grad_norm": 1.931858777999878, "learning_rate": 2e-05, "loss": 0.0437213, "step": 12709 }, { "epoch": 25.42, "grad_norm": 1.361970067024231, "learning_rate": 2e-05, "loss": 0.0516948, "step": 12710 }, { "epoch": 25.422, "grad_norm": 1.3697633743286133, "learning_rate": 2e-05, "loss": 0.04759203, "step": 12711 }, { "epoch": 25.424, "grad_norm": 3.9781877994537354, "learning_rate": 2e-05, "loss": 0.06740682, "step": 12712 }, { "epoch": 25.426, "grad_norm": 1.095296025276184, "learning_rate": 2e-05, "loss": 0.03909215, "step": 12713 }, { "epoch": 25.428, "grad_norm": 0.7827847003936768, "learning_rate": 2e-05, "loss": 0.02573121, "step": 12714 }, { "epoch": 25.43, "grad_norm": 1.5295050144195557, "learning_rate": 2e-05, "loss": 0.05307978, "step": 12715 }, { "epoch": 25.432, "grad_norm": 1.42865788936615, "learning_rate": 2e-05, "loss": 0.04561988, "step": 12716 }, { "epoch": 25.434, "grad_norm": 1.558946132659912, "learning_rate": 2e-05, "loss": 0.0545474, "step": 12717 }, { "epoch": 25.436, "grad_norm": 1.0636688470840454, "learning_rate": 2e-05, "loss": 0.04249337, "step": 12718 }, { "epoch": 25.438, "grad_norm": 10.508774757385254, "learning_rate": 2e-05, "loss": 0.04336645, "step": 12719 }, { "epoch": 25.44, "grad_norm": 1.0607038736343384, "learning_rate": 2e-05, "loss": 0.04370258, "step": 12720 }, { "epoch": 25.442, "grad_norm": 1.5483680963516235, "learning_rate": 2e-05, "loss": 0.06137222, "step": 12721 }, { "epoch": 25.444, "grad_norm": 1.6224528551101685, "learning_rate": 2e-05, "loss": 0.05630852, "step": 12722 }, { "epoch": 25.446, "grad_norm": 1.7314530611038208, "learning_rate": 2e-05, "loss": 0.04401193, "step": 12723 }, { "epoch": 25.448, "grad_norm": 1.8034086227416992, "learning_rate": 2e-05, "loss": 0.06886746, "step": 12724 }, { "epoch": 25.45, "grad_norm": 1.1106936931610107, "learning_rate": 2e-05, "loss": 0.0446757, "step": 12725 }, { "epoch": 25.452, "grad_norm": 1.678541660308838, "learning_rate": 2e-05, "loss": 0.05245058, "step": 12726 }, { "epoch": 25.454, "grad_norm": 0.9703069925308228, "learning_rate": 2e-05, "loss": 0.04368993, "step": 12727 }, { "epoch": 25.456, "grad_norm": 1.3546900749206543, "learning_rate": 2e-05, "loss": 0.05767547, "step": 12728 }, { "epoch": 25.458, "grad_norm": 1.1527262926101685, "learning_rate": 2e-05, "loss": 0.04236142, "step": 12729 }, { "epoch": 25.46, "grad_norm": 1.0238726139068604, "learning_rate": 2e-05, "loss": 0.04246419, "step": 12730 }, { "epoch": 25.462, "grad_norm": 1.3482915163040161, "learning_rate": 2e-05, "loss": 0.059407, "step": 12731 }, { "epoch": 25.464, "grad_norm": 1.3935139179229736, "learning_rate": 2e-05, "loss": 0.05849931, "step": 12732 }, { "epoch": 25.466, "grad_norm": 1.8440157175064087, "learning_rate": 2e-05, "loss": 0.0537977, "step": 12733 }, { "epoch": 25.468, "grad_norm": 3.4142637252807617, "learning_rate": 2e-05, "loss": 0.06638809, "step": 12734 }, { "epoch": 25.47, "grad_norm": 1.484055995941162, "learning_rate": 2e-05, "loss": 0.06258446, "step": 12735 }, { "epoch": 25.472, "grad_norm": 1.3089537620544434, "learning_rate": 2e-05, "loss": 0.04889432, "step": 12736 }, { "epoch": 25.474, "grad_norm": 1.295206069946289, "learning_rate": 2e-05, "loss": 0.04382516, "step": 12737 }, { "epoch": 25.476, "grad_norm": 1.5523864030838013, "learning_rate": 2e-05, "loss": 0.046129, "step": 12738 }, { "epoch": 25.478, "grad_norm": 1.075905680656433, "learning_rate": 2e-05, "loss": 0.04017866, "step": 12739 }, { "epoch": 25.48, "grad_norm": 1.2651946544647217, "learning_rate": 2e-05, "loss": 0.05966161, "step": 12740 }, { "epoch": 25.482, "grad_norm": 1.375027060508728, "learning_rate": 2e-05, "loss": 0.04522739, "step": 12741 }, { "epoch": 25.484, "grad_norm": 1.2064933776855469, "learning_rate": 2e-05, "loss": 0.06820923, "step": 12742 }, { "epoch": 25.486, "grad_norm": 1.2810351848602295, "learning_rate": 2e-05, "loss": 0.04269933, "step": 12743 }, { "epoch": 25.488, "grad_norm": 1.4875726699829102, "learning_rate": 2e-05, "loss": 0.04897683, "step": 12744 }, { "epoch": 25.49, "grad_norm": 1.9540343284606934, "learning_rate": 2e-05, "loss": 0.05846009, "step": 12745 }, { "epoch": 25.492, "grad_norm": 1.1057013273239136, "learning_rate": 2e-05, "loss": 0.03755689, "step": 12746 }, { "epoch": 25.494, "grad_norm": 1.1067478656768799, "learning_rate": 2e-05, "loss": 0.04683592, "step": 12747 }, { "epoch": 25.496, "grad_norm": 1.1736292839050293, "learning_rate": 2e-05, "loss": 0.04908927, "step": 12748 }, { "epoch": 25.498, "grad_norm": 1.4488857984542847, "learning_rate": 2e-05, "loss": 0.06244729, "step": 12749 }, { "epoch": 25.5, "grad_norm": 2.46125864982605, "learning_rate": 2e-05, "loss": 0.03913538, "step": 12750 }, { "epoch": 25.502, "grad_norm": 1.4381468296051025, "learning_rate": 2e-05, "loss": 0.048606, "step": 12751 }, { "epoch": 25.504, "grad_norm": 1.2452380657196045, "learning_rate": 2e-05, "loss": 0.04722155, "step": 12752 }, { "epoch": 25.506, "grad_norm": 2.4952425956726074, "learning_rate": 2e-05, "loss": 0.06059352, "step": 12753 }, { "epoch": 25.508, "grad_norm": 1.3867218494415283, "learning_rate": 2e-05, "loss": 0.05034525, "step": 12754 }, { "epoch": 25.51, "grad_norm": 1.7903037071228027, "learning_rate": 2e-05, "loss": 0.05293264, "step": 12755 }, { "epoch": 25.512, "grad_norm": 0.9718890190124512, "learning_rate": 2e-05, "loss": 0.04322246, "step": 12756 }, { "epoch": 25.514, "grad_norm": 1.1655280590057373, "learning_rate": 2e-05, "loss": 0.04413677, "step": 12757 }, { "epoch": 25.516, "grad_norm": 1.025194764137268, "learning_rate": 2e-05, "loss": 0.05315691, "step": 12758 }, { "epoch": 25.518, "grad_norm": 1.3715314865112305, "learning_rate": 2e-05, "loss": 0.06204192, "step": 12759 }, { "epoch": 25.52, "grad_norm": 1.927597165107727, "learning_rate": 2e-05, "loss": 0.05913546, "step": 12760 }, { "epoch": 25.522, "grad_norm": 1.1153978109359741, "learning_rate": 2e-05, "loss": 0.03583356, "step": 12761 }, { "epoch": 25.524, "grad_norm": 1.5045865774154663, "learning_rate": 2e-05, "loss": 0.0572304, "step": 12762 }, { "epoch": 25.526, "grad_norm": 1.32245934009552, "learning_rate": 2e-05, "loss": 0.06094477, "step": 12763 }, { "epoch": 25.528, "grad_norm": 2.9368999004364014, "learning_rate": 2e-05, "loss": 0.06650227, "step": 12764 }, { "epoch": 25.53, "grad_norm": 1.3673670291900635, "learning_rate": 2e-05, "loss": 0.04919039, "step": 12765 }, { "epoch": 25.532, "grad_norm": 1.4779621362686157, "learning_rate": 2e-05, "loss": 0.04854913, "step": 12766 }, { "epoch": 25.534, "grad_norm": 1.1605792045593262, "learning_rate": 2e-05, "loss": 0.04798715, "step": 12767 }, { "epoch": 25.536, "grad_norm": 1.6110693216323853, "learning_rate": 2e-05, "loss": 0.04709861, "step": 12768 }, { "epoch": 25.538, "grad_norm": 1.0017633438110352, "learning_rate": 2e-05, "loss": 0.04606114, "step": 12769 }, { "epoch": 25.54, "grad_norm": 4.130495071411133, "learning_rate": 2e-05, "loss": 0.04814644, "step": 12770 }, { "epoch": 25.542, "grad_norm": 1.8513188362121582, "learning_rate": 2e-05, "loss": 0.05693479, "step": 12771 }, { "epoch": 25.544, "grad_norm": 1.310679316520691, "learning_rate": 2e-05, "loss": 0.05217213, "step": 12772 }, { "epoch": 25.546, "grad_norm": 1.0654090642929077, "learning_rate": 2e-05, "loss": 0.04553785, "step": 12773 }, { "epoch": 25.548000000000002, "grad_norm": 1.1297792196273804, "learning_rate": 2e-05, "loss": 0.04842915, "step": 12774 }, { "epoch": 25.55, "grad_norm": 1.0475399494171143, "learning_rate": 2e-05, "loss": 0.04836237, "step": 12775 }, { "epoch": 25.552, "grad_norm": 2.017730474472046, "learning_rate": 2e-05, "loss": 0.06072921, "step": 12776 }, { "epoch": 25.554, "grad_norm": 1.1817578077316284, "learning_rate": 2e-05, "loss": 0.04273828, "step": 12777 }, { "epoch": 25.556, "grad_norm": 1.6500682830810547, "learning_rate": 2e-05, "loss": 0.05331226, "step": 12778 }, { "epoch": 25.558, "grad_norm": 1.3448277711868286, "learning_rate": 2e-05, "loss": 0.05775458, "step": 12779 }, { "epoch": 25.56, "grad_norm": 1.4511606693267822, "learning_rate": 2e-05, "loss": 0.05537773, "step": 12780 }, { "epoch": 25.562, "grad_norm": 1.2815492153167725, "learning_rate": 2e-05, "loss": 0.06243032, "step": 12781 }, { "epoch": 25.564, "grad_norm": 1.0833344459533691, "learning_rate": 2e-05, "loss": 0.04432148, "step": 12782 }, { "epoch": 25.566, "grad_norm": 1.7925783395767212, "learning_rate": 2e-05, "loss": 0.07335056, "step": 12783 }, { "epoch": 25.568, "grad_norm": 1.426572322845459, "learning_rate": 2e-05, "loss": 0.04290257, "step": 12784 }, { "epoch": 25.57, "grad_norm": 1.0916504859924316, "learning_rate": 2e-05, "loss": 0.05387871, "step": 12785 }, { "epoch": 25.572, "grad_norm": 1.99884831905365, "learning_rate": 2e-05, "loss": 0.07001382, "step": 12786 }, { "epoch": 25.574, "grad_norm": 1.2419377565383911, "learning_rate": 2e-05, "loss": 0.05605885, "step": 12787 }, { "epoch": 25.576, "grad_norm": 2.3259329795837402, "learning_rate": 2e-05, "loss": 0.06815036, "step": 12788 }, { "epoch": 25.578, "grad_norm": 1.428133249282837, "learning_rate": 2e-05, "loss": 0.07325581, "step": 12789 }, { "epoch": 25.58, "grad_norm": 1.7279250621795654, "learning_rate": 2e-05, "loss": 0.0637247, "step": 12790 }, { "epoch": 25.582, "grad_norm": 1.3596068620681763, "learning_rate": 2e-05, "loss": 0.0648721, "step": 12791 }, { "epoch": 25.584, "grad_norm": 1.727096676826477, "learning_rate": 2e-05, "loss": 0.05025814, "step": 12792 }, { "epoch": 25.586, "grad_norm": 0.9729359745979309, "learning_rate": 2e-05, "loss": 0.02993171, "step": 12793 }, { "epoch": 25.588, "grad_norm": 1.8768314123153687, "learning_rate": 2e-05, "loss": 0.05920444, "step": 12794 }, { "epoch": 25.59, "grad_norm": 1.2742987871170044, "learning_rate": 2e-05, "loss": 0.04502718, "step": 12795 }, { "epoch": 25.592, "grad_norm": 1.144529938697815, "learning_rate": 2e-05, "loss": 0.05125488, "step": 12796 }, { "epoch": 25.594, "grad_norm": 0.8300951719284058, "learning_rate": 2e-05, "loss": 0.03681937, "step": 12797 }, { "epoch": 25.596, "grad_norm": 0.9887096881866455, "learning_rate": 2e-05, "loss": 0.04033134, "step": 12798 }, { "epoch": 25.598, "grad_norm": 0.9891025424003601, "learning_rate": 2e-05, "loss": 0.05005992, "step": 12799 }, { "epoch": 25.6, "grad_norm": 3.133301258087158, "learning_rate": 2e-05, "loss": 0.05023361, "step": 12800 }, { "epoch": 25.602, "grad_norm": 0.9976722002029419, "learning_rate": 2e-05, "loss": 0.03790678, "step": 12801 }, { "epoch": 25.604, "grad_norm": 1.3413395881652832, "learning_rate": 2e-05, "loss": 0.04435261, "step": 12802 }, { "epoch": 25.606, "grad_norm": 1.0702431201934814, "learning_rate": 2e-05, "loss": 0.04605814, "step": 12803 }, { "epoch": 25.608, "grad_norm": 1.2439792156219482, "learning_rate": 2e-05, "loss": 0.04582006, "step": 12804 }, { "epoch": 25.61, "grad_norm": 0.9139171242713928, "learning_rate": 2e-05, "loss": 0.05181838, "step": 12805 }, { "epoch": 25.612, "grad_norm": 1.5499550104141235, "learning_rate": 2e-05, "loss": 0.06178095, "step": 12806 }, { "epoch": 25.614, "grad_norm": 1.25875723361969, "learning_rate": 2e-05, "loss": 0.0612697, "step": 12807 }, { "epoch": 25.616, "grad_norm": 1.1952266693115234, "learning_rate": 2e-05, "loss": 0.06704476, "step": 12808 }, { "epoch": 25.618, "grad_norm": 1.0385154485702515, "learning_rate": 2e-05, "loss": 0.04724877, "step": 12809 }, { "epoch": 25.62, "grad_norm": 1.1362289190292358, "learning_rate": 2e-05, "loss": 0.05113536, "step": 12810 }, { "epoch": 25.622, "grad_norm": 1.2640019655227661, "learning_rate": 2e-05, "loss": 0.05139344, "step": 12811 }, { "epoch": 25.624, "grad_norm": 1.2295875549316406, "learning_rate": 2e-05, "loss": 0.0677508, "step": 12812 }, { "epoch": 25.626, "grad_norm": 1.027230143547058, "learning_rate": 2e-05, "loss": 0.04624181, "step": 12813 }, { "epoch": 25.628, "grad_norm": 2.1069400310516357, "learning_rate": 2e-05, "loss": 0.07423867, "step": 12814 }, { "epoch": 25.63, "grad_norm": 1.2326492071151733, "learning_rate": 2e-05, "loss": 0.04707569, "step": 12815 }, { "epoch": 25.632, "grad_norm": 1.2456989288330078, "learning_rate": 2e-05, "loss": 0.05056888, "step": 12816 }, { "epoch": 25.634, "grad_norm": 1.2406045198440552, "learning_rate": 2e-05, "loss": 0.05671804, "step": 12817 }, { "epoch": 25.636, "grad_norm": 3.0739352703094482, "learning_rate": 2e-05, "loss": 0.05880243, "step": 12818 }, { "epoch": 25.638, "grad_norm": 0.9895063638687134, "learning_rate": 2e-05, "loss": 0.04489042, "step": 12819 }, { "epoch": 25.64, "grad_norm": 1.721880555152893, "learning_rate": 2e-05, "loss": 0.05679232, "step": 12820 }, { "epoch": 25.642, "grad_norm": 1.4406098127365112, "learning_rate": 2e-05, "loss": 0.04239009, "step": 12821 }, { "epoch": 25.644, "grad_norm": 1.1674944162368774, "learning_rate": 2e-05, "loss": 0.03778373, "step": 12822 }, { "epoch": 25.646, "grad_norm": 2.030735969543457, "learning_rate": 2e-05, "loss": 0.06437752, "step": 12823 }, { "epoch": 25.648, "grad_norm": 1.0487240552902222, "learning_rate": 2e-05, "loss": 0.04022495, "step": 12824 }, { "epoch": 25.65, "grad_norm": 1.2484827041625977, "learning_rate": 2e-05, "loss": 0.04734844, "step": 12825 }, { "epoch": 25.652, "grad_norm": 1.0691560506820679, "learning_rate": 2e-05, "loss": 0.05544395, "step": 12826 }, { "epoch": 25.654, "grad_norm": 1.5978327989578247, "learning_rate": 2e-05, "loss": 0.04276916, "step": 12827 }, { "epoch": 25.656, "grad_norm": 1.8572219610214233, "learning_rate": 2e-05, "loss": 0.04466252, "step": 12828 }, { "epoch": 25.658, "grad_norm": 1.1055822372436523, "learning_rate": 2e-05, "loss": 0.04597035, "step": 12829 }, { "epoch": 25.66, "grad_norm": 1.084515929222107, "learning_rate": 2e-05, "loss": 0.05470392, "step": 12830 }, { "epoch": 25.662, "grad_norm": 1.1045705080032349, "learning_rate": 2e-05, "loss": 0.04466307, "step": 12831 }, { "epoch": 25.664, "grad_norm": 1.823007583618164, "learning_rate": 2e-05, "loss": 0.05045185, "step": 12832 }, { "epoch": 25.666, "grad_norm": 1.1061551570892334, "learning_rate": 2e-05, "loss": 0.05104139, "step": 12833 }, { "epoch": 25.668, "grad_norm": 1.2293810844421387, "learning_rate": 2e-05, "loss": 0.05810113, "step": 12834 }, { "epoch": 25.67, "grad_norm": 1.1663942337036133, "learning_rate": 2e-05, "loss": 0.0486173, "step": 12835 }, { "epoch": 25.672, "grad_norm": 1.2538223266601562, "learning_rate": 2e-05, "loss": 0.04860168, "step": 12836 }, { "epoch": 25.674, "grad_norm": 1.1970294713974, "learning_rate": 2e-05, "loss": 0.05864847, "step": 12837 }, { "epoch": 25.676, "grad_norm": 1.0881503820419312, "learning_rate": 2e-05, "loss": 0.04209738, "step": 12838 }, { "epoch": 25.678, "grad_norm": 1.1965652704238892, "learning_rate": 2e-05, "loss": 0.0481148, "step": 12839 }, { "epoch": 25.68, "grad_norm": 1.397071361541748, "learning_rate": 2e-05, "loss": 0.05877994, "step": 12840 }, { "epoch": 25.682, "grad_norm": 2.3181111812591553, "learning_rate": 2e-05, "loss": 0.04869503, "step": 12841 }, { "epoch": 25.684, "grad_norm": 1.8109030723571777, "learning_rate": 2e-05, "loss": 0.05009746, "step": 12842 }, { "epoch": 25.686, "grad_norm": 1.0651922225952148, "learning_rate": 2e-05, "loss": 0.03928246, "step": 12843 }, { "epoch": 25.688, "grad_norm": 1.0781910419464111, "learning_rate": 2e-05, "loss": 0.04467912, "step": 12844 }, { "epoch": 25.69, "grad_norm": 1.1410558223724365, "learning_rate": 2e-05, "loss": 0.04627768, "step": 12845 }, { "epoch": 25.692, "grad_norm": 1.2201945781707764, "learning_rate": 2e-05, "loss": 0.05280259, "step": 12846 }, { "epoch": 25.694, "grad_norm": 2.310019016265869, "learning_rate": 2e-05, "loss": 0.05168546, "step": 12847 }, { "epoch": 25.696, "grad_norm": 1.1586430072784424, "learning_rate": 2e-05, "loss": 0.04406252, "step": 12848 }, { "epoch": 25.698, "grad_norm": 0.9945669174194336, "learning_rate": 2e-05, "loss": 0.04793625, "step": 12849 }, { "epoch": 25.7, "grad_norm": 1.3950247764587402, "learning_rate": 2e-05, "loss": 0.05298424, "step": 12850 }, { "epoch": 25.701999999999998, "grad_norm": 1.0889809131622314, "learning_rate": 2e-05, "loss": 0.04711286, "step": 12851 }, { "epoch": 25.704, "grad_norm": 0.904662549495697, "learning_rate": 2e-05, "loss": 0.03547686, "step": 12852 }, { "epoch": 25.706, "grad_norm": 1.993645429611206, "learning_rate": 2e-05, "loss": 0.04876352, "step": 12853 }, { "epoch": 25.708, "grad_norm": 1.7392280101776123, "learning_rate": 2e-05, "loss": 0.0687768, "step": 12854 }, { "epoch": 25.71, "grad_norm": 1.073009967803955, "learning_rate": 2e-05, "loss": 0.02985471, "step": 12855 }, { "epoch": 25.712, "grad_norm": 1.136964201927185, "learning_rate": 2e-05, "loss": 0.04382104, "step": 12856 }, { "epoch": 25.714, "grad_norm": 1.5751549005508423, "learning_rate": 2e-05, "loss": 0.0709632, "step": 12857 }, { "epoch": 25.716, "grad_norm": 1.5142931938171387, "learning_rate": 2e-05, "loss": 0.04703892, "step": 12858 }, { "epoch": 25.718, "grad_norm": 1.112041711807251, "learning_rate": 2e-05, "loss": 0.05210464, "step": 12859 }, { "epoch": 25.72, "grad_norm": 1.2692893743515015, "learning_rate": 2e-05, "loss": 0.04232005, "step": 12860 }, { "epoch": 25.722, "grad_norm": 1.7280734777450562, "learning_rate": 2e-05, "loss": 0.05406114, "step": 12861 }, { "epoch": 25.724, "grad_norm": 1.8244057893753052, "learning_rate": 2e-05, "loss": 0.0683555, "step": 12862 }, { "epoch": 25.726, "grad_norm": 2.2683568000793457, "learning_rate": 2e-05, "loss": 0.07910827, "step": 12863 }, { "epoch": 25.728, "grad_norm": 1.1440143585205078, "learning_rate": 2e-05, "loss": 0.04792085, "step": 12864 }, { "epoch": 25.73, "grad_norm": 1.2368404865264893, "learning_rate": 2e-05, "loss": 0.05498112, "step": 12865 }, { "epoch": 25.732, "grad_norm": 0.8835970759391785, "learning_rate": 2e-05, "loss": 0.03530611, "step": 12866 }, { "epoch": 25.734, "grad_norm": 2.336962938308716, "learning_rate": 2e-05, "loss": 0.04984652, "step": 12867 }, { "epoch": 25.736, "grad_norm": 1.1306811571121216, "learning_rate": 2e-05, "loss": 0.05902038, "step": 12868 }, { "epoch": 25.738, "grad_norm": 2.107285737991333, "learning_rate": 2e-05, "loss": 0.04503857, "step": 12869 }, { "epoch": 25.74, "grad_norm": 1.420448899269104, "learning_rate": 2e-05, "loss": 0.04862865, "step": 12870 }, { "epoch": 25.742, "grad_norm": 0.8986669182777405, "learning_rate": 2e-05, "loss": 0.0349273, "step": 12871 }, { "epoch": 25.744, "grad_norm": 1.161797285079956, "learning_rate": 2e-05, "loss": 0.05496646, "step": 12872 }, { "epoch": 25.746, "grad_norm": 1.4078707695007324, "learning_rate": 2e-05, "loss": 0.06351984, "step": 12873 }, { "epoch": 25.748, "grad_norm": 1.495318055152893, "learning_rate": 2e-05, "loss": 0.04551301, "step": 12874 }, { "epoch": 25.75, "grad_norm": 1.2200729846954346, "learning_rate": 2e-05, "loss": 0.05727559, "step": 12875 }, { "epoch": 25.752, "grad_norm": 1.326443076133728, "learning_rate": 2e-05, "loss": 0.05589775, "step": 12876 }, { "epoch": 25.754, "grad_norm": 0.969882071018219, "learning_rate": 2e-05, "loss": 0.04136576, "step": 12877 }, { "epoch": 25.756, "grad_norm": 1.690650224685669, "learning_rate": 2e-05, "loss": 0.05331571, "step": 12878 }, { "epoch": 25.758, "grad_norm": 1.1655311584472656, "learning_rate": 2e-05, "loss": 0.05825135, "step": 12879 }, { "epoch": 25.76, "grad_norm": 1.3209490776062012, "learning_rate": 2e-05, "loss": 0.04668522, "step": 12880 }, { "epoch": 25.762, "grad_norm": 1.6116498708724976, "learning_rate": 2e-05, "loss": 0.06680355, "step": 12881 }, { "epoch": 25.764, "grad_norm": 1.2072117328643799, "learning_rate": 2e-05, "loss": 0.05509059, "step": 12882 }, { "epoch": 25.766, "grad_norm": 1.202558994293213, "learning_rate": 2e-05, "loss": 0.06296343, "step": 12883 }, { "epoch": 25.768, "grad_norm": 1.148883581161499, "learning_rate": 2e-05, "loss": 0.05604894, "step": 12884 }, { "epoch": 25.77, "grad_norm": 1.40285325050354, "learning_rate": 2e-05, "loss": 0.06594862, "step": 12885 }, { "epoch": 25.772, "grad_norm": 1.166197657585144, "learning_rate": 2e-05, "loss": 0.04254821, "step": 12886 }, { "epoch": 25.774, "grad_norm": 1.4295483827590942, "learning_rate": 2e-05, "loss": 0.06688876, "step": 12887 }, { "epoch": 25.776, "grad_norm": 1.4319037199020386, "learning_rate": 2e-05, "loss": 0.0556851, "step": 12888 }, { "epoch": 25.778, "grad_norm": 1.286808967590332, "learning_rate": 2e-05, "loss": 0.04182938, "step": 12889 }, { "epoch": 25.78, "grad_norm": 1.6528675556182861, "learning_rate": 2e-05, "loss": 0.04781533, "step": 12890 }, { "epoch": 25.782, "grad_norm": 1.1469407081604004, "learning_rate": 2e-05, "loss": 0.04668975, "step": 12891 }, { "epoch": 25.784, "grad_norm": 2.39813232421875, "learning_rate": 2e-05, "loss": 0.04818932, "step": 12892 }, { "epoch": 25.786, "grad_norm": 1.1801865100860596, "learning_rate": 2e-05, "loss": 0.04814564, "step": 12893 }, { "epoch": 25.788, "grad_norm": 1.1123950481414795, "learning_rate": 2e-05, "loss": 0.04692183, "step": 12894 }, { "epoch": 25.79, "grad_norm": 1.5134515762329102, "learning_rate": 2e-05, "loss": 0.05328212, "step": 12895 }, { "epoch": 25.792, "grad_norm": 1.1126987934112549, "learning_rate": 2e-05, "loss": 0.05004511, "step": 12896 }, { "epoch": 25.794, "grad_norm": 1.392031192779541, "learning_rate": 2e-05, "loss": 0.05430735, "step": 12897 }, { "epoch": 25.796, "grad_norm": 1.0170400142669678, "learning_rate": 2e-05, "loss": 0.04751647, "step": 12898 }, { "epoch": 25.798000000000002, "grad_norm": 2.021364688873291, "learning_rate": 2e-05, "loss": 0.06566989, "step": 12899 }, { "epoch": 25.8, "grad_norm": 1.2070749998092651, "learning_rate": 2e-05, "loss": 0.0589318, "step": 12900 }, { "epoch": 25.802, "grad_norm": 1.102105975151062, "learning_rate": 2e-05, "loss": 0.04911103, "step": 12901 }, { "epoch": 25.804, "grad_norm": 1.21110200881958, "learning_rate": 2e-05, "loss": 0.05951696, "step": 12902 }, { "epoch": 25.806, "grad_norm": 1.0811165571212769, "learning_rate": 2e-05, "loss": 0.04575203, "step": 12903 }, { "epoch": 25.808, "grad_norm": 1.075434684753418, "learning_rate": 2e-05, "loss": 0.03958637, "step": 12904 }, { "epoch": 25.81, "grad_norm": 1.0207839012145996, "learning_rate": 2e-05, "loss": 0.0437893, "step": 12905 }, { "epoch": 25.812, "grad_norm": 1.075128436088562, "learning_rate": 2e-05, "loss": 0.04912503, "step": 12906 }, { "epoch": 25.814, "grad_norm": 1.1556148529052734, "learning_rate": 2e-05, "loss": 0.05578942, "step": 12907 }, { "epoch": 25.816, "grad_norm": 1.2452220916748047, "learning_rate": 2e-05, "loss": 0.04613642, "step": 12908 }, { "epoch": 25.818, "grad_norm": 1.2133173942565918, "learning_rate": 2e-05, "loss": 0.04734804, "step": 12909 }, { "epoch": 25.82, "grad_norm": 1.0162991285324097, "learning_rate": 2e-05, "loss": 0.04290106, "step": 12910 }, { "epoch": 25.822, "grad_norm": 1.3354029655456543, "learning_rate": 2e-05, "loss": 0.03995002, "step": 12911 }, { "epoch": 25.824, "grad_norm": 1.113986849784851, "learning_rate": 2e-05, "loss": 0.06275769, "step": 12912 }, { "epoch": 25.826, "grad_norm": 2.402036190032959, "learning_rate": 2e-05, "loss": 0.04922744, "step": 12913 }, { "epoch": 25.828, "grad_norm": 1.4154717922210693, "learning_rate": 2e-05, "loss": 0.05232061, "step": 12914 }, { "epoch": 25.83, "grad_norm": 2.0207324028015137, "learning_rate": 2e-05, "loss": 0.047973, "step": 12915 }, { "epoch": 25.832, "grad_norm": 1.918625831604004, "learning_rate": 2e-05, "loss": 0.04773705, "step": 12916 }, { "epoch": 25.834, "grad_norm": 1.6294420957565308, "learning_rate": 2e-05, "loss": 0.07772642, "step": 12917 }, { "epoch": 25.836, "grad_norm": 1.1776214838027954, "learning_rate": 2e-05, "loss": 0.05444412, "step": 12918 }, { "epoch": 25.838, "grad_norm": 1.3414961099624634, "learning_rate": 2e-05, "loss": 0.04533607, "step": 12919 }, { "epoch": 25.84, "grad_norm": 1.2505340576171875, "learning_rate": 2e-05, "loss": 0.04217423, "step": 12920 }, { "epoch": 25.842, "grad_norm": 1.0406416654586792, "learning_rate": 2e-05, "loss": 0.04207191, "step": 12921 }, { "epoch": 25.844, "grad_norm": 1.4956090450286865, "learning_rate": 2e-05, "loss": 0.05411403, "step": 12922 }, { "epoch": 25.846, "grad_norm": 2.2896435260772705, "learning_rate": 2e-05, "loss": 0.04936294, "step": 12923 }, { "epoch": 25.848, "grad_norm": 1.5992838144302368, "learning_rate": 2e-05, "loss": 0.04748482, "step": 12924 }, { "epoch": 25.85, "grad_norm": 1.1285362243652344, "learning_rate": 2e-05, "loss": 0.05085773, "step": 12925 }, { "epoch": 25.852, "grad_norm": 1.288512945175171, "learning_rate": 2e-05, "loss": 0.05263177, "step": 12926 }, { "epoch": 25.854, "grad_norm": 1.4199323654174805, "learning_rate": 2e-05, "loss": 0.06531711, "step": 12927 }, { "epoch": 25.856, "grad_norm": 1.6077710390090942, "learning_rate": 2e-05, "loss": 0.04645553, "step": 12928 }, { "epoch": 25.858, "grad_norm": 1.129436731338501, "learning_rate": 2e-05, "loss": 0.05730057, "step": 12929 }, { "epoch": 25.86, "grad_norm": 4.665754795074463, "learning_rate": 2e-05, "loss": 0.04131242, "step": 12930 }, { "epoch": 25.862, "grad_norm": 1.0362974405288696, "learning_rate": 2e-05, "loss": 0.03944635, "step": 12931 }, { "epoch": 25.864, "grad_norm": 1.0900228023529053, "learning_rate": 2e-05, "loss": 0.04132649, "step": 12932 }, { "epoch": 25.866, "grad_norm": 1.3118113279342651, "learning_rate": 2e-05, "loss": 0.07417099, "step": 12933 }, { "epoch": 25.868, "grad_norm": 1.4437681436538696, "learning_rate": 2e-05, "loss": 0.04851513, "step": 12934 }, { "epoch": 25.87, "grad_norm": 1.0352909564971924, "learning_rate": 2e-05, "loss": 0.05197804, "step": 12935 }, { "epoch": 25.872, "grad_norm": 1.8231605291366577, "learning_rate": 2e-05, "loss": 0.06293781, "step": 12936 }, { "epoch": 25.874, "grad_norm": 1.667567491531372, "learning_rate": 2e-05, "loss": 0.05975756, "step": 12937 }, { "epoch": 25.876, "grad_norm": 1.5929611921310425, "learning_rate": 2e-05, "loss": 0.05494578, "step": 12938 }, { "epoch": 25.878, "grad_norm": 1.2994763851165771, "learning_rate": 2e-05, "loss": 0.06043943, "step": 12939 }, { "epoch": 25.88, "grad_norm": 1.237607717514038, "learning_rate": 2e-05, "loss": 0.05413928, "step": 12940 }, { "epoch": 25.882, "grad_norm": 1.2618831396102905, "learning_rate": 2e-05, "loss": 0.04188006, "step": 12941 }, { "epoch": 25.884, "grad_norm": 1.9431605339050293, "learning_rate": 2e-05, "loss": 0.0461794, "step": 12942 }, { "epoch": 25.886, "grad_norm": 1.2015745639801025, "learning_rate": 2e-05, "loss": 0.055432, "step": 12943 }, { "epoch": 25.888, "grad_norm": 1.1186437606811523, "learning_rate": 2e-05, "loss": 0.0378814, "step": 12944 }, { "epoch": 25.89, "grad_norm": 1.3302794694900513, "learning_rate": 2e-05, "loss": 0.03606445, "step": 12945 }, { "epoch": 25.892, "grad_norm": 1.2092205286026, "learning_rate": 2e-05, "loss": 0.05607365, "step": 12946 }, { "epoch": 25.894, "grad_norm": 1.2788810729980469, "learning_rate": 2e-05, "loss": 0.0567233, "step": 12947 }, { "epoch": 25.896, "grad_norm": 2.554314374923706, "learning_rate": 2e-05, "loss": 0.07296786, "step": 12948 }, { "epoch": 25.898, "grad_norm": 8.54755973815918, "learning_rate": 2e-05, "loss": 0.06331183, "step": 12949 }, { "epoch": 25.9, "grad_norm": 1.516268253326416, "learning_rate": 2e-05, "loss": 0.06048638, "step": 12950 }, { "epoch": 25.902, "grad_norm": 1.0964257717132568, "learning_rate": 2e-05, "loss": 0.04988034, "step": 12951 }, { "epoch": 25.904, "grad_norm": 1.0793359279632568, "learning_rate": 2e-05, "loss": 0.03984427, "step": 12952 }, { "epoch": 25.906, "grad_norm": 1.3223340511322021, "learning_rate": 2e-05, "loss": 0.06367067, "step": 12953 }, { "epoch": 25.908, "grad_norm": 1.1271212100982666, "learning_rate": 2e-05, "loss": 0.03948434, "step": 12954 }, { "epoch": 25.91, "grad_norm": 1.0466126203536987, "learning_rate": 2e-05, "loss": 0.04741898, "step": 12955 }, { "epoch": 25.912, "grad_norm": 1.4527355432510376, "learning_rate": 2e-05, "loss": 0.04346454, "step": 12956 }, { "epoch": 25.914, "grad_norm": 2.187269926071167, "learning_rate": 2e-05, "loss": 0.06226395, "step": 12957 }, { "epoch": 25.916, "grad_norm": 1.0472018718719482, "learning_rate": 2e-05, "loss": 0.04111593, "step": 12958 }, { "epoch": 25.918, "grad_norm": 1.393111228942871, "learning_rate": 2e-05, "loss": 0.0482638, "step": 12959 }, { "epoch": 25.92, "grad_norm": 0.9244487285614014, "learning_rate": 2e-05, "loss": 0.04231826, "step": 12960 }, { "epoch": 25.922, "grad_norm": 0.9578429460525513, "learning_rate": 2e-05, "loss": 0.03926083, "step": 12961 }, { "epoch": 25.924, "grad_norm": 1.0807175636291504, "learning_rate": 2e-05, "loss": 0.04108343, "step": 12962 }, { "epoch": 25.926, "grad_norm": 1.110276460647583, "learning_rate": 2e-05, "loss": 0.05082192, "step": 12963 }, { "epoch": 25.928, "grad_norm": 1.1799793243408203, "learning_rate": 2e-05, "loss": 0.0494792, "step": 12964 }, { "epoch": 25.93, "grad_norm": 1.0465432405471802, "learning_rate": 2e-05, "loss": 0.03947487, "step": 12965 }, { "epoch": 25.932, "grad_norm": 1.153640627861023, "learning_rate": 2e-05, "loss": 0.05583987, "step": 12966 }, { "epoch": 25.934, "grad_norm": 1.2299213409423828, "learning_rate": 2e-05, "loss": 0.05019006, "step": 12967 }, { "epoch": 25.936, "grad_norm": 1.0663444995880127, "learning_rate": 2e-05, "loss": 0.03344033, "step": 12968 }, { "epoch": 25.938, "grad_norm": 1.078837513923645, "learning_rate": 2e-05, "loss": 0.05011968, "step": 12969 }, { "epoch": 25.94, "grad_norm": 1.0863316059112549, "learning_rate": 2e-05, "loss": 0.04517816, "step": 12970 }, { "epoch": 25.942, "grad_norm": 1.1652551889419556, "learning_rate": 2e-05, "loss": 0.04341657, "step": 12971 }, { "epoch": 25.944, "grad_norm": 1.3127986192703247, "learning_rate": 2e-05, "loss": 0.04925394, "step": 12972 }, { "epoch": 25.946, "grad_norm": 1.014568567276001, "learning_rate": 2e-05, "loss": 0.04084335, "step": 12973 }, { "epoch": 25.948, "grad_norm": 1.240275263786316, "learning_rate": 2e-05, "loss": 0.0608517, "step": 12974 }, { "epoch": 25.95, "grad_norm": 1.5109710693359375, "learning_rate": 2e-05, "loss": 0.05465719, "step": 12975 }, { "epoch": 25.951999999999998, "grad_norm": 1.656043291091919, "learning_rate": 2e-05, "loss": 0.05748964, "step": 12976 }, { "epoch": 25.954, "grad_norm": 1.4241423606872559, "learning_rate": 2e-05, "loss": 0.05818785, "step": 12977 }, { "epoch": 25.956, "grad_norm": 1.16642165184021, "learning_rate": 2e-05, "loss": 0.05530277, "step": 12978 }, { "epoch": 25.958, "grad_norm": 1.135559320449829, "learning_rate": 2e-05, "loss": 0.05437966, "step": 12979 }, { "epoch": 25.96, "grad_norm": 1.0939438343048096, "learning_rate": 2e-05, "loss": 0.04102438, "step": 12980 }, { "epoch": 25.962, "grad_norm": 1.118964433670044, "learning_rate": 2e-05, "loss": 0.0569177, "step": 12981 }, { "epoch": 25.964, "grad_norm": 1.0235003232955933, "learning_rate": 2e-05, "loss": 0.04667676, "step": 12982 }, { "epoch": 25.966, "grad_norm": 2.417015552520752, "learning_rate": 2e-05, "loss": 0.04739671, "step": 12983 }, { "epoch": 25.968, "grad_norm": 1.1218820810317993, "learning_rate": 2e-05, "loss": 0.04615018, "step": 12984 }, { "epoch": 25.97, "grad_norm": 1.4444812536239624, "learning_rate": 2e-05, "loss": 0.06115863, "step": 12985 }, { "epoch": 25.972, "grad_norm": 1.2405178546905518, "learning_rate": 2e-05, "loss": 0.0483456, "step": 12986 }, { "epoch": 25.974, "grad_norm": 0.8787335157394409, "learning_rate": 2e-05, "loss": 0.03661456, "step": 12987 }, { "epoch": 25.976, "grad_norm": 1.8190350532531738, "learning_rate": 2e-05, "loss": 0.05351233, "step": 12988 }, { "epoch": 25.978, "grad_norm": 1.4539055824279785, "learning_rate": 2e-05, "loss": 0.06576534, "step": 12989 }, { "epoch": 25.98, "grad_norm": 1.3353135585784912, "learning_rate": 2e-05, "loss": 0.03229643, "step": 12990 }, { "epoch": 25.982, "grad_norm": 1.533454418182373, "learning_rate": 2e-05, "loss": 0.06074094, "step": 12991 }, { "epoch": 25.984, "grad_norm": 1.3816373348236084, "learning_rate": 2e-05, "loss": 0.04852141, "step": 12992 }, { "epoch": 25.986, "grad_norm": 1.4561913013458252, "learning_rate": 2e-05, "loss": 0.04319498, "step": 12993 }, { "epoch": 25.988, "grad_norm": 1.1283880472183228, "learning_rate": 2e-05, "loss": 0.04203261, "step": 12994 }, { "epoch": 25.99, "grad_norm": 1.3209444284439087, "learning_rate": 2e-05, "loss": 0.03877352, "step": 12995 }, { "epoch": 25.992, "grad_norm": 3.3635120391845703, "learning_rate": 2e-05, "loss": 0.05657326, "step": 12996 }, { "epoch": 25.994, "grad_norm": 1.208309292793274, "learning_rate": 2e-05, "loss": 0.05318328, "step": 12997 }, { "epoch": 25.996, "grad_norm": 1.090011477470398, "learning_rate": 2e-05, "loss": 0.03680216, "step": 12998 }, { "epoch": 25.998, "grad_norm": 1.8554240465164185, "learning_rate": 2e-05, "loss": 0.06863742, "step": 12999 }, { "epoch": 26.0, "grad_norm": 1.1974444389343262, "learning_rate": 2e-05, "loss": 0.05356851, "step": 13000 }, { "epoch": 26.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9800399201596807, "Equal_1": 0.992, "Equal_2": 0.9700598802395209, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9900199600798403, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.988, "Perpendicular_1": 0.994, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8907815631262525, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9932000000000001, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 226.0534, "eval_samples_per_second": 46.449, "eval_steps_per_second": 0.929, "step": 13000 }, { "epoch": 26.002, "grad_norm": 2.2640161514282227, "learning_rate": 2e-05, "loss": 0.04607668, "step": 13001 }, { "epoch": 26.004, "grad_norm": 0.9687132239341736, "learning_rate": 2e-05, "loss": 0.02846185, "step": 13002 }, { "epoch": 26.006, "grad_norm": 2.0622293949127197, "learning_rate": 2e-05, "loss": 0.06130286, "step": 13003 }, { "epoch": 26.008, "grad_norm": 1.1753813028335571, "learning_rate": 2e-05, "loss": 0.0431699, "step": 13004 }, { "epoch": 26.01, "grad_norm": 2.72263240814209, "learning_rate": 2e-05, "loss": 0.04025402, "step": 13005 }, { "epoch": 26.012, "grad_norm": 1.1225314140319824, "learning_rate": 2e-05, "loss": 0.04466118, "step": 13006 }, { "epoch": 26.014, "grad_norm": 1.1700783967971802, "learning_rate": 2e-05, "loss": 0.04696696, "step": 13007 }, { "epoch": 26.016, "grad_norm": 1.3788491487503052, "learning_rate": 2e-05, "loss": 0.056755, "step": 13008 }, { "epoch": 26.018, "grad_norm": 1.1374354362487793, "learning_rate": 2e-05, "loss": 0.05593617, "step": 13009 }, { "epoch": 26.02, "grad_norm": 3.024582624435425, "learning_rate": 2e-05, "loss": 0.0765273, "step": 13010 }, { "epoch": 26.022, "grad_norm": 2.8222572803497314, "learning_rate": 2e-05, "loss": 0.06038096, "step": 13011 }, { "epoch": 26.024, "grad_norm": 1.3721390962600708, "learning_rate": 2e-05, "loss": 0.04497147, "step": 13012 }, { "epoch": 26.026, "grad_norm": 1.1049742698669434, "learning_rate": 2e-05, "loss": 0.03711478, "step": 13013 }, { "epoch": 26.028, "grad_norm": 1.0819664001464844, "learning_rate": 2e-05, "loss": 0.04570344, "step": 13014 }, { "epoch": 26.03, "grad_norm": 1.005324363708496, "learning_rate": 2e-05, "loss": 0.03682058, "step": 13015 }, { "epoch": 26.032, "grad_norm": 2.190463066101074, "learning_rate": 2e-05, "loss": 0.05066787, "step": 13016 }, { "epoch": 26.034, "grad_norm": 2.133492946624756, "learning_rate": 2e-05, "loss": 0.04975044, "step": 13017 }, { "epoch": 26.036, "grad_norm": 1.1801435947418213, "learning_rate": 2e-05, "loss": 0.03991776, "step": 13018 }, { "epoch": 26.038, "grad_norm": 1.1678171157836914, "learning_rate": 2e-05, "loss": 0.05303773, "step": 13019 }, { "epoch": 26.04, "grad_norm": 1.0145256519317627, "learning_rate": 2e-05, "loss": 0.05301607, "step": 13020 }, { "epoch": 26.042, "grad_norm": 1.4599593877792358, "learning_rate": 2e-05, "loss": 0.03963199, "step": 13021 }, { "epoch": 26.044, "grad_norm": 1.7386794090270996, "learning_rate": 2e-05, "loss": 0.06184332, "step": 13022 }, { "epoch": 26.046, "grad_norm": 2.5070903301239014, "learning_rate": 2e-05, "loss": 0.0565906, "step": 13023 }, { "epoch": 26.048, "grad_norm": 3.591169595718384, "learning_rate": 2e-05, "loss": 0.05255976, "step": 13024 }, { "epoch": 26.05, "grad_norm": 1.2141801118850708, "learning_rate": 2e-05, "loss": 0.04010753, "step": 13025 }, { "epoch": 26.052, "grad_norm": 1.0931274890899658, "learning_rate": 2e-05, "loss": 0.06071039, "step": 13026 }, { "epoch": 26.054, "grad_norm": 1.2154799699783325, "learning_rate": 2e-05, "loss": 0.03915733, "step": 13027 }, { "epoch": 26.056, "grad_norm": 1.5110702514648438, "learning_rate": 2e-05, "loss": 0.05358441, "step": 13028 }, { "epoch": 26.058, "grad_norm": 2.055340051651001, "learning_rate": 2e-05, "loss": 0.0636664, "step": 13029 }, { "epoch": 26.06, "grad_norm": 1.6155766248703003, "learning_rate": 2e-05, "loss": 0.05680309, "step": 13030 }, { "epoch": 26.062, "grad_norm": 1.2514642477035522, "learning_rate": 2e-05, "loss": 0.04993298, "step": 13031 }, { "epoch": 26.064, "grad_norm": 1.1973069906234741, "learning_rate": 2e-05, "loss": 0.04057794, "step": 13032 }, { "epoch": 26.066, "grad_norm": 1.1734708547592163, "learning_rate": 2e-05, "loss": 0.04549327, "step": 13033 }, { "epoch": 26.068, "grad_norm": 1.2759863138198853, "learning_rate": 2e-05, "loss": 0.05127523, "step": 13034 }, { "epoch": 26.07, "grad_norm": 1.401794672012329, "learning_rate": 2e-05, "loss": 0.04455247, "step": 13035 }, { "epoch": 26.072, "grad_norm": 1.2099015712738037, "learning_rate": 2e-05, "loss": 0.03769619, "step": 13036 }, { "epoch": 26.074, "grad_norm": 1.5846632719039917, "learning_rate": 2e-05, "loss": 0.0453898, "step": 13037 }, { "epoch": 26.076, "grad_norm": 1.5003308057785034, "learning_rate": 2e-05, "loss": 0.04709727, "step": 13038 }, { "epoch": 26.078, "grad_norm": 1.4149280786514282, "learning_rate": 2e-05, "loss": 0.04616783, "step": 13039 }, { "epoch": 26.08, "grad_norm": 1.4467313289642334, "learning_rate": 2e-05, "loss": 0.04626878, "step": 13040 }, { "epoch": 26.082, "grad_norm": 1.1100996732711792, "learning_rate": 2e-05, "loss": 0.04105768, "step": 13041 }, { "epoch": 26.084, "grad_norm": 0.8764375448226929, "learning_rate": 2e-05, "loss": 0.03852883, "step": 13042 }, { "epoch": 26.086, "grad_norm": 1.6585164070129395, "learning_rate": 2e-05, "loss": 0.05972163, "step": 13043 }, { "epoch": 26.088, "grad_norm": 1.7479039430618286, "learning_rate": 2e-05, "loss": 0.05720647, "step": 13044 }, { "epoch": 26.09, "grad_norm": 1.7044578790664673, "learning_rate": 2e-05, "loss": 0.0516377, "step": 13045 }, { "epoch": 26.092, "grad_norm": 1.1574060916900635, "learning_rate": 2e-05, "loss": 0.05296007, "step": 13046 }, { "epoch": 26.094, "grad_norm": 1.2622594833374023, "learning_rate": 2e-05, "loss": 0.05052954, "step": 13047 }, { "epoch": 26.096, "grad_norm": 2.5884244441986084, "learning_rate": 2e-05, "loss": 0.04769097, "step": 13048 }, { "epoch": 26.098, "grad_norm": 2.2298460006713867, "learning_rate": 2e-05, "loss": 0.05569545, "step": 13049 }, { "epoch": 26.1, "grad_norm": 1.1135103702545166, "learning_rate": 2e-05, "loss": 0.04933055, "step": 13050 }, { "epoch": 26.102, "grad_norm": 1.0379488468170166, "learning_rate": 2e-05, "loss": 0.03042669, "step": 13051 }, { "epoch": 26.104, "grad_norm": 1.1127338409423828, "learning_rate": 2e-05, "loss": 0.03613283, "step": 13052 }, { "epoch": 26.106, "grad_norm": 1.1242481470108032, "learning_rate": 2e-05, "loss": 0.04261899, "step": 13053 }, { "epoch": 26.108, "grad_norm": 1.2181111574172974, "learning_rate": 2e-05, "loss": 0.04719329, "step": 13054 }, { "epoch": 26.11, "grad_norm": 1.2058982849121094, "learning_rate": 2e-05, "loss": 0.06653761, "step": 13055 }, { "epoch": 26.112, "grad_norm": 1.4276351928710938, "learning_rate": 2e-05, "loss": 0.05610083, "step": 13056 }, { "epoch": 26.114, "grad_norm": 0.9905567765235901, "learning_rate": 2e-05, "loss": 0.04845155, "step": 13057 }, { "epoch": 26.116, "grad_norm": 1.3336819410324097, "learning_rate": 2e-05, "loss": 0.05312827, "step": 13058 }, { "epoch": 26.118, "grad_norm": 0.9444303512573242, "learning_rate": 2e-05, "loss": 0.03482283, "step": 13059 }, { "epoch": 26.12, "grad_norm": 0.996250331401825, "learning_rate": 2e-05, "loss": 0.04614938, "step": 13060 }, { "epoch": 26.122, "grad_norm": 1.7572993040084839, "learning_rate": 2e-05, "loss": 0.05452738, "step": 13061 }, { "epoch": 26.124, "grad_norm": 2.4778409004211426, "learning_rate": 2e-05, "loss": 0.04612068, "step": 13062 }, { "epoch": 26.126, "grad_norm": 3.1171796321868896, "learning_rate": 2e-05, "loss": 0.05271629, "step": 13063 }, { "epoch": 26.128, "grad_norm": 1.1955784559249878, "learning_rate": 2e-05, "loss": 0.05702571, "step": 13064 }, { "epoch": 26.13, "grad_norm": 1.1531009674072266, "learning_rate": 2e-05, "loss": 0.07428885, "step": 13065 }, { "epoch": 26.132, "grad_norm": 1.5899512767791748, "learning_rate": 2e-05, "loss": 0.05650626, "step": 13066 }, { "epoch": 26.134, "grad_norm": 2.5569217205047607, "learning_rate": 2e-05, "loss": 0.04320998, "step": 13067 }, { "epoch": 26.136, "grad_norm": 2.7545104026794434, "learning_rate": 2e-05, "loss": 0.05874462, "step": 13068 }, { "epoch": 26.138, "grad_norm": 1.7379029989242554, "learning_rate": 2e-05, "loss": 0.0431629, "step": 13069 }, { "epoch": 26.14, "grad_norm": 1.4623173475265503, "learning_rate": 2e-05, "loss": 0.05259592, "step": 13070 }, { "epoch": 26.142, "grad_norm": 1.5194517374038696, "learning_rate": 2e-05, "loss": 0.05410079, "step": 13071 }, { "epoch": 26.144, "grad_norm": 1.8581264019012451, "learning_rate": 2e-05, "loss": 0.0636259, "step": 13072 }, { "epoch": 26.146, "grad_norm": 1.996687889099121, "learning_rate": 2e-05, "loss": 0.05046812, "step": 13073 }, { "epoch": 26.148, "grad_norm": 1.1012266874313354, "learning_rate": 2e-05, "loss": 0.04685819, "step": 13074 }, { "epoch": 26.15, "grad_norm": 1.0161882638931274, "learning_rate": 2e-05, "loss": 0.03953772, "step": 13075 }, { "epoch": 26.152, "grad_norm": 1.2321478128433228, "learning_rate": 2e-05, "loss": 0.05106909, "step": 13076 }, { "epoch": 26.154, "grad_norm": 1.870470404624939, "learning_rate": 2e-05, "loss": 0.04648306, "step": 13077 }, { "epoch": 26.156, "grad_norm": 1.3896266222000122, "learning_rate": 2e-05, "loss": 0.05001019, "step": 13078 }, { "epoch": 26.158, "grad_norm": 1.4038615226745605, "learning_rate": 2e-05, "loss": 0.04493055, "step": 13079 }, { "epoch": 26.16, "grad_norm": 1.6230852603912354, "learning_rate": 2e-05, "loss": 0.0647686, "step": 13080 }, { "epoch": 26.162, "grad_norm": 1.161685585975647, "learning_rate": 2e-05, "loss": 0.05160481, "step": 13081 }, { "epoch": 26.164, "grad_norm": 1.4229129552841187, "learning_rate": 2e-05, "loss": 0.06063813, "step": 13082 }, { "epoch": 26.166, "grad_norm": 1.1569585800170898, "learning_rate": 2e-05, "loss": 0.05432572, "step": 13083 }, { "epoch": 26.168, "grad_norm": 1.037567377090454, "learning_rate": 2e-05, "loss": 0.04245497, "step": 13084 }, { "epoch": 26.17, "grad_norm": 2.2897512912750244, "learning_rate": 2e-05, "loss": 0.05925653, "step": 13085 }, { "epoch": 26.172, "grad_norm": 1.180833339691162, "learning_rate": 2e-05, "loss": 0.05951401, "step": 13086 }, { "epoch": 26.174, "grad_norm": 1.4828424453735352, "learning_rate": 2e-05, "loss": 0.0503644, "step": 13087 }, { "epoch": 26.176, "grad_norm": 1.269155740737915, "learning_rate": 2e-05, "loss": 0.05013175, "step": 13088 }, { "epoch": 26.178, "grad_norm": 1.2004210948944092, "learning_rate": 2e-05, "loss": 0.03948895, "step": 13089 }, { "epoch": 26.18, "grad_norm": 1.4316322803497314, "learning_rate": 2e-05, "loss": 0.06729488, "step": 13090 }, { "epoch": 26.182, "grad_norm": 2.2641303539276123, "learning_rate": 2e-05, "loss": 0.04448916, "step": 13091 }, { "epoch": 26.184, "grad_norm": 0.9553017616271973, "learning_rate": 2e-05, "loss": 0.0371742, "step": 13092 }, { "epoch": 26.186, "grad_norm": 1.1905176639556885, "learning_rate": 2e-05, "loss": 0.05304726, "step": 13093 }, { "epoch": 26.188, "grad_norm": 1.2717342376708984, "learning_rate": 2e-05, "loss": 0.05022329, "step": 13094 }, { "epoch": 26.19, "grad_norm": 1.2106961011886597, "learning_rate": 2e-05, "loss": 0.05375263, "step": 13095 }, { "epoch": 26.192, "grad_norm": 1.5705169439315796, "learning_rate": 2e-05, "loss": 0.0691887, "step": 13096 }, { "epoch": 26.194, "grad_norm": 1.2496750354766846, "learning_rate": 2e-05, "loss": 0.06671438, "step": 13097 }, { "epoch": 26.196, "grad_norm": 0.9870383739471436, "learning_rate": 2e-05, "loss": 0.03938806, "step": 13098 }, { "epoch": 26.198, "grad_norm": 1.0491816997528076, "learning_rate": 2e-05, "loss": 0.06126578, "step": 13099 }, { "epoch": 26.2, "grad_norm": 1.2023404836654663, "learning_rate": 2e-05, "loss": 0.05826823, "step": 13100 }, { "epoch": 26.202, "grad_norm": 1.0294982194900513, "learning_rate": 2e-05, "loss": 0.04186146, "step": 13101 }, { "epoch": 26.204, "grad_norm": 1.2625393867492676, "learning_rate": 2e-05, "loss": 0.05626705, "step": 13102 }, { "epoch": 26.206, "grad_norm": 1.6225744485855103, "learning_rate": 2e-05, "loss": 0.04541206, "step": 13103 }, { "epoch": 26.208, "grad_norm": 1.15347158908844, "learning_rate": 2e-05, "loss": 0.04946512, "step": 13104 }, { "epoch": 26.21, "grad_norm": 1.0879091024398804, "learning_rate": 2e-05, "loss": 0.05317451, "step": 13105 }, { "epoch": 26.212, "grad_norm": 1.7244552373886108, "learning_rate": 2e-05, "loss": 0.05838979, "step": 13106 }, { "epoch": 26.214, "grad_norm": 1.2458078861236572, "learning_rate": 2e-05, "loss": 0.04441869, "step": 13107 }, { "epoch": 26.216, "grad_norm": 1.0543304681777954, "learning_rate": 2e-05, "loss": 0.04496064, "step": 13108 }, { "epoch": 26.218, "grad_norm": 1.8887250423431396, "learning_rate": 2e-05, "loss": 0.05064189, "step": 13109 }, { "epoch": 26.22, "grad_norm": 1.2699052095413208, "learning_rate": 2e-05, "loss": 0.05022573, "step": 13110 }, { "epoch": 26.222, "grad_norm": 5.265373706817627, "learning_rate": 2e-05, "loss": 0.05604435, "step": 13111 }, { "epoch": 26.224, "grad_norm": 1.399698257446289, "learning_rate": 2e-05, "loss": 0.05014798, "step": 13112 }, { "epoch": 26.226, "grad_norm": 1.7079144716262817, "learning_rate": 2e-05, "loss": 0.04994708, "step": 13113 }, { "epoch": 26.228, "grad_norm": 1.1072089672088623, "learning_rate": 2e-05, "loss": 0.04147284, "step": 13114 }, { "epoch": 26.23, "grad_norm": 3.7190628051757812, "learning_rate": 2e-05, "loss": 0.04572514, "step": 13115 }, { "epoch": 26.232, "grad_norm": 1.077842116355896, "learning_rate": 2e-05, "loss": 0.0508615, "step": 13116 }, { "epoch": 26.234, "grad_norm": 1.2654563188552856, "learning_rate": 2e-05, "loss": 0.04813949, "step": 13117 }, { "epoch": 26.236, "grad_norm": 1.7438644170761108, "learning_rate": 2e-05, "loss": 0.08645959, "step": 13118 }, { "epoch": 26.238, "grad_norm": 1.9379938840866089, "learning_rate": 2e-05, "loss": 0.05991484, "step": 13119 }, { "epoch": 26.24, "grad_norm": 1.3786555528640747, "learning_rate": 2e-05, "loss": 0.05386643, "step": 13120 }, { "epoch": 26.242, "grad_norm": 1.3097126483917236, "learning_rate": 2e-05, "loss": 0.03937486, "step": 13121 }, { "epoch": 26.244, "grad_norm": 2.849011182785034, "learning_rate": 2e-05, "loss": 0.05412893, "step": 13122 }, { "epoch": 26.246, "grad_norm": 1.4321476221084595, "learning_rate": 2e-05, "loss": 0.04866327, "step": 13123 }, { "epoch": 26.248, "grad_norm": 1.1432344913482666, "learning_rate": 2e-05, "loss": 0.04498943, "step": 13124 }, { "epoch": 26.25, "grad_norm": 2.6234500408172607, "learning_rate": 2e-05, "loss": 0.07459951, "step": 13125 }, { "epoch": 26.252, "grad_norm": 1.1573379039764404, "learning_rate": 2e-05, "loss": 0.0550356, "step": 13126 }, { "epoch": 26.254, "grad_norm": 1.1036912202835083, "learning_rate": 2e-05, "loss": 0.0487242, "step": 13127 }, { "epoch": 26.256, "grad_norm": 1.0618441104888916, "learning_rate": 2e-05, "loss": 0.04143997, "step": 13128 }, { "epoch": 26.258, "grad_norm": 1.5669845342636108, "learning_rate": 2e-05, "loss": 0.04925434, "step": 13129 }, { "epoch": 26.26, "grad_norm": 1.3181036710739136, "learning_rate": 2e-05, "loss": 0.06127752, "step": 13130 }, { "epoch": 26.262, "grad_norm": 1.6169180870056152, "learning_rate": 2e-05, "loss": 0.03284564, "step": 13131 }, { "epoch": 26.264, "grad_norm": 1.0267077684402466, "learning_rate": 2e-05, "loss": 0.04716894, "step": 13132 }, { "epoch": 26.266, "grad_norm": 2.5498225688934326, "learning_rate": 2e-05, "loss": 0.05674269, "step": 13133 }, { "epoch": 26.268, "grad_norm": 1.1167256832122803, "learning_rate": 2e-05, "loss": 0.04536742, "step": 13134 }, { "epoch": 26.27, "grad_norm": 1.4899424314498901, "learning_rate": 2e-05, "loss": 0.06690214, "step": 13135 }, { "epoch": 26.272, "grad_norm": 1.8139880895614624, "learning_rate": 2e-05, "loss": 0.05363541, "step": 13136 }, { "epoch": 26.274, "grad_norm": 1.0848556756973267, "learning_rate": 2e-05, "loss": 0.03838192, "step": 13137 }, { "epoch": 26.276, "grad_norm": 1.632729172706604, "learning_rate": 2e-05, "loss": 0.05203764, "step": 13138 }, { "epoch": 26.278, "grad_norm": 1.3395700454711914, "learning_rate": 2e-05, "loss": 0.05462583, "step": 13139 }, { "epoch": 26.28, "grad_norm": 1.2405112981796265, "learning_rate": 2e-05, "loss": 0.06025987, "step": 13140 }, { "epoch": 26.282, "grad_norm": 1.0379074811935425, "learning_rate": 2e-05, "loss": 0.04501361, "step": 13141 }, { "epoch": 26.284, "grad_norm": 1.495490312576294, "learning_rate": 2e-05, "loss": 0.05465606, "step": 13142 }, { "epoch": 26.286, "grad_norm": 1.4001567363739014, "learning_rate": 2e-05, "loss": 0.06254378, "step": 13143 }, { "epoch": 26.288, "grad_norm": 2.180147409439087, "learning_rate": 2e-05, "loss": 0.05687723, "step": 13144 }, { "epoch": 26.29, "grad_norm": 1.0900766849517822, "learning_rate": 2e-05, "loss": 0.04095289, "step": 13145 }, { "epoch": 26.292, "grad_norm": 1.2467963695526123, "learning_rate": 2e-05, "loss": 0.04755872, "step": 13146 }, { "epoch": 26.294, "grad_norm": 1.3111648559570312, "learning_rate": 2e-05, "loss": 0.04797219, "step": 13147 }, { "epoch": 26.296, "grad_norm": 1.745369791984558, "learning_rate": 2e-05, "loss": 0.06120455, "step": 13148 }, { "epoch": 26.298, "grad_norm": 1.995434045791626, "learning_rate": 2e-05, "loss": 0.05725945, "step": 13149 }, { "epoch": 26.3, "grad_norm": 1.4909435510635376, "learning_rate": 2e-05, "loss": 0.03530233, "step": 13150 }, { "epoch": 26.302, "grad_norm": 1.9177268743515015, "learning_rate": 2e-05, "loss": 0.04429528, "step": 13151 }, { "epoch": 26.304, "grad_norm": 0.9509882926940918, "learning_rate": 2e-05, "loss": 0.04121961, "step": 13152 }, { "epoch": 26.306, "grad_norm": 1.1726635694503784, "learning_rate": 2e-05, "loss": 0.05946973, "step": 13153 }, { "epoch": 26.308, "grad_norm": 0.9578458070755005, "learning_rate": 2e-05, "loss": 0.03323229, "step": 13154 }, { "epoch": 26.31, "grad_norm": 1.4311693906784058, "learning_rate": 2e-05, "loss": 0.0650827, "step": 13155 }, { "epoch": 26.312, "grad_norm": 1.4815912246704102, "learning_rate": 2e-05, "loss": 0.04780266, "step": 13156 }, { "epoch": 26.314, "grad_norm": 0.9288123846054077, "learning_rate": 2e-05, "loss": 0.04919485, "step": 13157 }, { "epoch": 26.316, "grad_norm": 1.0259450674057007, "learning_rate": 2e-05, "loss": 0.05327821, "step": 13158 }, { "epoch": 26.318, "grad_norm": 1.3516197204589844, "learning_rate": 2e-05, "loss": 0.06006586, "step": 13159 }, { "epoch": 26.32, "grad_norm": 1.0829652547836304, "learning_rate": 2e-05, "loss": 0.03472528, "step": 13160 }, { "epoch": 26.322, "grad_norm": 1.1358028650283813, "learning_rate": 2e-05, "loss": 0.05279462, "step": 13161 }, { "epoch": 26.324, "grad_norm": 1.2773475646972656, "learning_rate": 2e-05, "loss": 0.05088441, "step": 13162 }, { "epoch": 26.326, "grad_norm": 2.0304408073425293, "learning_rate": 2e-05, "loss": 0.07043212, "step": 13163 }, { "epoch": 26.328, "grad_norm": 1.2063853740692139, "learning_rate": 2e-05, "loss": 0.05226305, "step": 13164 }, { "epoch": 26.33, "grad_norm": 0.9307627081871033, "learning_rate": 2e-05, "loss": 0.03685163, "step": 13165 }, { "epoch": 26.332, "grad_norm": 1.0387613773345947, "learning_rate": 2e-05, "loss": 0.04054015, "step": 13166 }, { "epoch": 26.334, "grad_norm": 1.1233134269714355, "learning_rate": 2e-05, "loss": 0.04951068, "step": 13167 }, { "epoch": 26.336, "grad_norm": 1.0664430856704712, "learning_rate": 2e-05, "loss": 0.04318389, "step": 13168 }, { "epoch": 26.338, "grad_norm": 2.2993507385253906, "learning_rate": 2e-05, "loss": 0.06765731, "step": 13169 }, { "epoch": 26.34, "grad_norm": 1.4607069492340088, "learning_rate": 2e-05, "loss": 0.05935891, "step": 13170 }, { "epoch": 26.342, "grad_norm": 1.092958927154541, "learning_rate": 2e-05, "loss": 0.04553217, "step": 13171 }, { "epoch": 26.344, "grad_norm": 1.0128450393676758, "learning_rate": 2e-05, "loss": 0.04868485, "step": 13172 }, { "epoch": 26.346, "grad_norm": 2.5389187335968018, "learning_rate": 2e-05, "loss": 0.05133006, "step": 13173 }, { "epoch": 26.348, "grad_norm": 1.9317764043807983, "learning_rate": 2e-05, "loss": 0.05514234, "step": 13174 }, { "epoch": 26.35, "grad_norm": 1.0142396688461304, "learning_rate": 2e-05, "loss": 0.03379548, "step": 13175 }, { "epoch": 26.352, "grad_norm": 0.981251060962677, "learning_rate": 2e-05, "loss": 0.03744878, "step": 13176 }, { "epoch": 26.354, "grad_norm": 1.2132534980773926, "learning_rate": 2e-05, "loss": 0.05565378, "step": 13177 }, { "epoch": 26.356, "grad_norm": 1.1087462902069092, "learning_rate": 2e-05, "loss": 0.05265976, "step": 13178 }, { "epoch": 26.358, "grad_norm": 1.3253639936447144, "learning_rate": 2e-05, "loss": 0.07169472, "step": 13179 }, { "epoch": 26.36, "grad_norm": 1.3717025518417358, "learning_rate": 2e-05, "loss": 0.07439954, "step": 13180 }, { "epoch": 26.362, "grad_norm": 1.5265077352523804, "learning_rate": 2e-05, "loss": 0.05934888, "step": 13181 }, { "epoch": 26.364, "grad_norm": 1.1080888509750366, "learning_rate": 2e-05, "loss": 0.03896499, "step": 13182 }, { "epoch": 26.366, "grad_norm": 1.3141381740570068, "learning_rate": 2e-05, "loss": 0.04777165, "step": 13183 }, { "epoch": 26.368, "grad_norm": 1.1650301218032837, "learning_rate": 2e-05, "loss": 0.06241257, "step": 13184 }, { "epoch": 26.37, "grad_norm": 1.2549761533737183, "learning_rate": 2e-05, "loss": 0.04697741, "step": 13185 }, { "epoch": 26.372, "grad_norm": 1.1545727252960205, "learning_rate": 2e-05, "loss": 0.0393183, "step": 13186 }, { "epoch": 26.374, "grad_norm": 1.2154042720794678, "learning_rate": 2e-05, "loss": 0.0445575, "step": 13187 }, { "epoch": 26.376, "grad_norm": 1.3717633485794067, "learning_rate": 2e-05, "loss": 0.07547009, "step": 13188 }, { "epoch": 26.378, "grad_norm": 1.2593660354614258, "learning_rate": 2e-05, "loss": 0.05768249, "step": 13189 }, { "epoch": 26.38, "grad_norm": 1.025618076324463, "learning_rate": 2e-05, "loss": 0.03844924, "step": 13190 }, { "epoch": 26.382, "grad_norm": 1.175331950187683, "learning_rate": 2e-05, "loss": 0.06221271, "step": 13191 }, { "epoch": 26.384, "grad_norm": 1.0403271913528442, "learning_rate": 2e-05, "loss": 0.05244515, "step": 13192 }, { "epoch": 26.386, "grad_norm": 1.1324750185012817, "learning_rate": 2e-05, "loss": 0.05097087, "step": 13193 }, { "epoch": 26.388, "grad_norm": 1.597510576248169, "learning_rate": 2e-05, "loss": 0.04272082, "step": 13194 }, { "epoch": 26.39, "grad_norm": 1.3717741966247559, "learning_rate": 2e-05, "loss": 0.05225078, "step": 13195 }, { "epoch": 26.392, "grad_norm": 1.117010474205017, "learning_rate": 2e-05, "loss": 0.05863439, "step": 13196 }, { "epoch": 26.394, "grad_norm": 1.0307581424713135, "learning_rate": 2e-05, "loss": 0.04025673, "step": 13197 }, { "epoch": 26.396, "grad_norm": 1.1895451545715332, "learning_rate": 2e-05, "loss": 0.04541435, "step": 13198 }, { "epoch": 26.398, "grad_norm": 1.0766724348068237, "learning_rate": 2e-05, "loss": 0.04858733, "step": 13199 }, { "epoch": 26.4, "grad_norm": 1.266336441040039, "learning_rate": 2e-05, "loss": 0.04933401, "step": 13200 }, { "epoch": 26.402, "grad_norm": 0.9119465947151184, "learning_rate": 2e-05, "loss": 0.04185937, "step": 13201 }, { "epoch": 26.404, "grad_norm": 2.0946035385131836, "learning_rate": 2e-05, "loss": 0.06359234, "step": 13202 }, { "epoch": 26.406, "grad_norm": 2.2637856006622314, "learning_rate": 2e-05, "loss": 0.04783437, "step": 13203 }, { "epoch": 26.408, "grad_norm": 1.1380088329315186, "learning_rate": 2e-05, "loss": 0.04492367, "step": 13204 }, { "epoch": 26.41, "grad_norm": 1.2385838031768799, "learning_rate": 2e-05, "loss": 0.05938327, "step": 13205 }, { "epoch": 26.412, "grad_norm": 1.6331820487976074, "learning_rate": 2e-05, "loss": 0.06207812, "step": 13206 }, { "epoch": 26.414, "grad_norm": 1.2377910614013672, "learning_rate": 2e-05, "loss": 0.05784852, "step": 13207 }, { "epoch": 26.416, "grad_norm": 1.3663805723190308, "learning_rate": 2e-05, "loss": 0.05090876, "step": 13208 }, { "epoch": 26.418, "grad_norm": 1.2845232486724854, "learning_rate": 2e-05, "loss": 0.0572209, "step": 13209 }, { "epoch": 26.42, "grad_norm": 3.141711950302124, "learning_rate": 2e-05, "loss": 0.06693154, "step": 13210 }, { "epoch": 26.422, "grad_norm": 1.205244541168213, "learning_rate": 2e-05, "loss": 0.06356322, "step": 13211 }, { "epoch": 26.424, "grad_norm": 1.9518312215805054, "learning_rate": 2e-05, "loss": 0.05921033, "step": 13212 }, { "epoch": 26.426, "grad_norm": 1.2242833375930786, "learning_rate": 2e-05, "loss": 0.05300123, "step": 13213 }, { "epoch": 26.428, "grad_norm": 1.729596734046936, "learning_rate": 2e-05, "loss": 0.06330936, "step": 13214 }, { "epoch": 26.43, "grad_norm": 1.3352259397506714, "learning_rate": 2e-05, "loss": 0.04613025, "step": 13215 }, { "epoch": 26.432, "grad_norm": 1.2625349760055542, "learning_rate": 2e-05, "loss": 0.04770651, "step": 13216 }, { "epoch": 26.434, "grad_norm": 1.345420241355896, "learning_rate": 2e-05, "loss": 0.06382583, "step": 13217 }, { "epoch": 26.436, "grad_norm": 1.0279170274734497, "learning_rate": 2e-05, "loss": 0.04521141, "step": 13218 }, { "epoch": 26.438, "grad_norm": 1.4427417516708374, "learning_rate": 2e-05, "loss": 0.04445741, "step": 13219 }, { "epoch": 26.44, "grad_norm": 1.3570067882537842, "learning_rate": 2e-05, "loss": 0.06396192, "step": 13220 }, { "epoch": 26.442, "grad_norm": 1.108048677444458, "learning_rate": 2e-05, "loss": 0.05925597, "step": 13221 }, { "epoch": 26.444, "grad_norm": 1.6790554523468018, "learning_rate": 2e-05, "loss": 0.04916564, "step": 13222 }, { "epoch": 26.446, "grad_norm": 1.0264934301376343, "learning_rate": 2e-05, "loss": 0.05014358, "step": 13223 }, { "epoch": 26.448, "grad_norm": 1.132135272026062, "learning_rate": 2e-05, "loss": 0.04457789, "step": 13224 }, { "epoch": 26.45, "grad_norm": 1.5034230947494507, "learning_rate": 2e-05, "loss": 0.04974685, "step": 13225 }, { "epoch": 26.452, "grad_norm": 1.1065789461135864, "learning_rate": 2e-05, "loss": 0.04760291, "step": 13226 }, { "epoch": 26.454, "grad_norm": 1.5903208255767822, "learning_rate": 2e-05, "loss": 0.05794491, "step": 13227 }, { "epoch": 26.456, "grad_norm": 1.0581609010696411, "learning_rate": 2e-05, "loss": 0.04343695, "step": 13228 }, { "epoch": 26.458, "grad_norm": 1.3373512029647827, "learning_rate": 2e-05, "loss": 0.05912542, "step": 13229 }, { "epoch": 26.46, "grad_norm": 0.9567322731018066, "learning_rate": 2e-05, "loss": 0.04032883, "step": 13230 }, { "epoch": 26.462, "grad_norm": 0.8994925022125244, "learning_rate": 2e-05, "loss": 0.0379176, "step": 13231 }, { "epoch": 26.464, "grad_norm": 1.464380145072937, "learning_rate": 2e-05, "loss": 0.05838546, "step": 13232 }, { "epoch": 26.466, "grad_norm": 2.7416834831237793, "learning_rate": 2e-05, "loss": 0.04611932, "step": 13233 }, { "epoch": 26.468, "grad_norm": 1.134220004081726, "learning_rate": 2e-05, "loss": 0.04399894, "step": 13234 }, { "epoch": 26.47, "grad_norm": 1.8290317058563232, "learning_rate": 2e-05, "loss": 0.05966036, "step": 13235 }, { "epoch": 26.472, "grad_norm": 0.9513573050498962, "learning_rate": 2e-05, "loss": 0.03478178, "step": 13236 }, { "epoch": 26.474, "grad_norm": 1.0561847686767578, "learning_rate": 2e-05, "loss": 0.0412119, "step": 13237 }, { "epoch": 26.476, "grad_norm": 1.0916813611984253, "learning_rate": 2e-05, "loss": 0.049308, "step": 13238 }, { "epoch": 26.478, "grad_norm": 1.2041770219802856, "learning_rate": 2e-05, "loss": 0.05385679, "step": 13239 }, { "epoch": 26.48, "grad_norm": 1.133793830871582, "learning_rate": 2e-05, "loss": 0.04168726, "step": 13240 }, { "epoch": 26.482, "grad_norm": 1.0468299388885498, "learning_rate": 2e-05, "loss": 0.04749839, "step": 13241 }, { "epoch": 26.484, "grad_norm": 1.442665457725525, "learning_rate": 2e-05, "loss": 0.05865144, "step": 13242 }, { "epoch": 26.486, "grad_norm": 1.3769043684005737, "learning_rate": 2e-05, "loss": 0.04683194, "step": 13243 }, { "epoch": 26.488, "grad_norm": 1.5264511108398438, "learning_rate": 2e-05, "loss": 0.06083031, "step": 13244 }, { "epoch": 26.49, "grad_norm": 1.8658037185668945, "learning_rate": 2e-05, "loss": 0.04151792, "step": 13245 }, { "epoch": 26.492, "grad_norm": 1.0687576532363892, "learning_rate": 2e-05, "loss": 0.04719483, "step": 13246 }, { "epoch": 26.494, "grad_norm": 1.0230963230133057, "learning_rate": 2e-05, "loss": 0.04502779, "step": 13247 }, { "epoch": 26.496, "grad_norm": 1.0735021829605103, "learning_rate": 2e-05, "loss": 0.04988918, "step": 13248 }, { "epoch": 26.498, "grad_norm": 1.313679814338684, "learning_rate": 2e-05, "loss": 0.06035023, "step": 13249 }, { "epoch": 26.5, "grad_norm": 1.219299554824829, "learning_rate": 2e-05, "loss": 0.06264805, "step": 13250 }, { "epoch": 26.502, "grad_norm": 1.1131439208984375, "learning_rate": 2e-05, "loss": 0.05607829, "step": 13251 }, { "epoch": 26.504, "grad_norm": 3.613189697265625, "learning_rate": 2e-05, "loss": 0.06390768, "step": 13252 }, { "epoch": 26.506, "grad_norm": 1.1748114824295044, "learning_rate": 2e-05, "loss": 0.0466454, "step": 13253 }, { "epoch": 26.508, "grad_norm": 1.2314624786376953, "learning_rate": 2e-05, "loss": 0.06864768, "step": 13254 }, { "epoch": 26.51, "grad_norm": 1.1474363803863525, "learning_rate": 2e-05, "loss": 0.03539545, "step": 13255 }, { "epoch": 26.512, "grad_norm": 1.3321956396102905, "learning_rate": 2e-05, "loss": 0.04917538, "step": 13256 }, { "epoch": 26.514, "grad_norm": 1.3954670429229736, "learning_rate": 2e-05, "loss": 0.0688828, "step": 13257 }, { "epoch": 26.516, "grad_norm": 1.2332017421722412, "learning_rate": 2e-05, "loss": 0.05598776, "step": 13258 }, { "epoch": 26.518, "grad_norm": 1.3138082027435303, "learning_rate": 2e-05, "loss": 0.04566165, "step": 13259 }, { "epoch": 26.52, "grad_norm": 1.2705225944519043, "learning_rate": 2e-05, "loss": 0.06151684, "step": 13260 }, { "epoch": 26.522, "grad_norm": 1.3123538494110107, "learning_rate": 2e-05, "loss": 0.05796801, "step": 13261 }, { "epoch": 26.524, "grad_norm": 1.1068973541259766, "learning_rate": 2e-05, "loss": 0.03931681, "step": 13262 }, { "epoch": 26.526, "grad_norm": 1.0414702892303467, "learning_rate": 2e-05, "loss": 0.04752044, "step": 13263 }, { "epoch": 26.528, "grad_norm": 1.1645705699920654, "learning_rate": 2e-05, "loss": 0.04385941, "step": 13264 }, { "epoch": 26.53, "grad_norm": 1.306740403175354, "learning_rate": 2e-05, "loss": 0.0544094, "step": 13265 }, { "epoch": 26.532, "grad_norm": 1.0808699131011963, "learning_rate": 2e-05, "loss": 0.05136307, "step": 13266 }, { "epoch": 26.534, "grad_norm": 6.374847888946533, "learning_rate": 2e-05, "loss": 0.04798483, "step": 13267 }, { "epoch": 26.536, "grad_norm": 1.2884674072265625, "learning_rate": 2e-05, "loss": 0.05522514, "step": 13268 }, { "epoch": 26.538, "grad_norm": 1.4735463857650757, "learning_rate": 2e-05, "loss": 0.06592898, "step": 13269 }, { "epoch": 26.54, "grad_norm": 1.2813459634780884, "learning_rate": 2e-05, "loss": 0.05419342, "step": 13270 }, { "epoch": 26.542, "grad_norm": 1.0963232517242432, "learning_rate": 2e-05, "loss": 0.04732808, "step": 13271 }, { "epoch": 26.544, "grad_norm": 1.7365325689315796, "learning_rate": 2e-05, "loss": 0.07154454, "step": 13272 }, { "epoch": 26.546, "grad_norm": 1.8865253925323486, "learning_rate": 2e-05, "loss": 0.05728427, "step": 13273 }, { "epoch": 26.548000000000002, "grad_norm": 1.0631060600280762, "learning_rate": 2e-05, "loss": 0.04352725, "step": 13274 }, { "epoch": 26.55, "grad_norm": 1.5659040212631226, "learning_rate": 2e-05, "loss": 0.06370717, "step": 13275 }, { "epoch": 26.552, "grad_norm": 1.35512113571167, "learning_rate": 2e-05, "loss": 0.0624593, "step": 13276 }, { "epoch": 26.554, "grad_norm": 1.116701364517212, "learning_rate": 2e-05, "loss": 0.05935667, "step": 13277 }, { "epoch": 26.556, "grad_norm": 1.1054497957229614, "learning_rate": 2e-05, "loss": 0.04536385, "step": 13278 }, { "epoch": 26.558, "grad_norm": 1.648237705230713, "learning_rate": 2e-05, "loss": 0.05427777, "step": 13279 }, { "epoch": 26.56, "grad_norm": 1.150078296661377, "learning_rate": 2e-05, "loss": 0.06186971, "step": 13280 }, { "epoch": 26.562, "grad_norm": 1.298709750175476, "learning_rate": 2e-05, "loss": 0.04700661, "step": 13281 }, { "epoch": 26.564, "grad_norm": 1.442981481552124, "learning_rate": 2e-05, "loss": 0.07026112, "step": 13282 }, { "epoch": 26.566, "grad_norm": 0.9356185793876648, "learning_rate": 2e-05, "loss": 0.03658377, "step": 13283 }, { "epoch": 26.568, "grad_norm": 0.9103512763977051, "learning_rate": 2e-05, "loss": 0.04727957, "step": 13284 }, { "epoch": 26.57, "grad_norm": 7.012560844421387, "learning_rate": 2e-05, "loss": 0.06903286, "step": 13285 }, { "epoch": 26.572, "grad_norm": 1.4070217609405518, "learning_rate": 2e-05, "loss": 0.05269193, "step": 13286 }, { "epoch": 26.574, "grad_norm": 1.077420711517334, "learning_rate": 2e-05, "loss": 0.03829563, "step": 13287 }, { "epoch": 26.576, "grad_norm": 1.1685208082199097, "learning_rate": 2e-05, "loss": 0.06135094, "step": 13288 }, { "epoch": 26.578, "grad_norm": 1.3737455606460571, "learning_rate": 2e-05, "loss": 0.05131703, "step": 13289 }, { "epoch": 26.58, "grad_norm": 1.229760766029358, "learning_rate": 2e-05, "loss": 0.0502059, "step": 13290 }, { "epoch": 26.582, "grad_norm": 1.4174994230270386, "learning_rate": 2e-05, "loss": 0.06914699, "step": 13291 }, { "epoch": 26.584, "grad_norm": 0.9644913673400879, "learning_rate": 2e-05, "loss": 0.04122015, "step": 13292 }, { "epoch": 26.586, "grad_norm": 1.0176165103912354, "learning_rate": 2e-05, "loss": 0.0398505, "step": 13293 }, { "epoch": 26.588, "grad_norm": 1.2282981872558594, "learning_rate": 2e-05, "loss": 0.06502441, "step": 13294 }, { "epoch": 26.59, "grad_norm": 2.152714967727661, "learning_rate": 2e-05, "loss": 0.05975642, "step": 13295 }, { "epoch": 26.592, "grad_norm": 1.9684288501739502, "learning_rate": 2e-05, "loss": 0.05579343, "step": 13296 }, { "epoch": 26.594, "grad_norm": 1.342258095741272, "learning_rate": 2e-05, "loss": 0.05168886, "step": 13297 }, { "epoch": 26.596, "grad_norm": 1.6183290481567383, "learning_rate": 2e-05, "loss": 0.0547764, "step": 13298 }, { "epoch": 26.598, "grad_norm": 1.58730947971344, "learning_rate": 2e-05, "loss": 0.04875871, "step": 13299 }, { "epoch": 26.6, "grad_norm": 1.839998722076416, "learning_rate": 2e-05, "loss": 0.05018334, "step": 13300 }, { "epoch": 26.602, "grad_norm": 1.052589774131775, "learning_rate": 2e-05, "loss": 0.04309983, "step": 13301 }, { "epoch": 26.604, "grad_norm": 1.2151761054992676, "learning_rate": 2e-05, "loss": 0.04942724, "step": 13302 }, { "epoch": 26.606, "grad_norm": 2.2870326042175293, "learning_rate": 2e-05, "loss": 0.04824776, "step": 13303 }, { "epoch": 26.608, "grad_norm": 1.0566188097000122, "learning_rate": 2e-05, "loss": 0.04932676, "step": 13304 }, { "epoch": 26.61, "grad_norm": 1.1661152839660645, "learning_rate": 2e-05, "loss": 0.04825563, "step": 13305 }, { "epoch": 26.612, "grad_norm": 1.4827237129211426, "learning_rate": 2e-05, "loss": 0.04262114, "step": 13306 }, { "epoch": 26.614, "grad_norm": 1.1317449808120728, "learning_rate": 2e-05, "loss": 0.05381303, "step": 13307 }, { "epoch": 26.616, "grad_norm": 1.5155291557312012, "learning_rate": 2e-05, "loss": 0.04964045, "step": 13308 }, { "epoch": 26.618, "grad_norm": 1.435861587524414, "learning_rate": 2e-05, "loss": 0.06266679, "step": 13309 }, { "epoch": 26.62, "grad_norm": 1.1185659170150757, "learning_rate": 2e-05, "loss": 0.0580629, "step": 13310 }, { "epoch": 26.622, "grad_norm": 1.3901716470718384, "learning_rate": 2e-05, "loss": 0.03445759, "step": 13311 }, { "epoch": 26.624, "grad_norm": 1.085146188735962, "learning_rate": 2e-05, "loss": 0.05184843, "step": 13312 }, { "epoch": 26.626, "grad_norm": 1.1906906366348267, "learning_rate": 2e-05, "loss": 0.05387475, "step": 13313 }, { "epoch": 26.628, "grad_norm": 0.9745740294456482, "learning_rate": 2e-05, "loss": 0.047746, "step": 13314 }, { "epoch": 26.63, "grad_norm": 1.7241405248641968, "learning_rate": 2e-05, "loss": 0.04471966, "step": 13315 }, { "epoch": 26.632, "grad_norm": 1.721947193145752, "learning_rate": 2e-05, "loss": 0.06881328, "step": 13316 }, { "epoch": 26.634, "grad_norm": 1.1999484300613403, "learning_rate": 2e-05, "loss": 0.05209696, "step": 13317 }, { "epoch": 26.636, "grad_norm": 1.4937021732330322, "learning_rate": 2e-05, "loss": 0.06319118, "step": 13318 }, { "epoch": 26.638, "grad_norm": 1.4807403087615967, "learning_rate": 2e-05, "loss": 0.04071166, "step": 13319 }, { "epoch": 26.64, "grad_norm": 1.2032313346862793, "learning_rate": 2e-05, "loss": 0.05783406, "step": 13320 }, { "epoch": 26.642, "grad_norm": 0.9704697728157043, "learning_rate": 2e-05, "loss": 0.03314882, "step": 13321 }, { "epoch": 26.644, "grad_norm": 1.1707502603530884, "learning_rate": 2e-05, "loss": 0.04419086, "step": 13322 }, { "epoch": 26.646, "grad_norm": 1.318064570426941, "learning_rate": 2e-05, "loss": 0.05181965, "step": 13323 }, { "epoch": 26.648, "grad_norm": 1.1124001741409302, "learning_rate": 2e-05, "loss": 0.04824238, "step": 13324 }, { "epoch": 26.65, "grad_norm": 1.2104384899139404, "learning_rate": 2e-05, "loss": 0.05137346, "step": 13325 }, { "epoch": 26.652, "grad_norm": 1.140204906463623, "learning_rate": 2e-05, "loss": 0.05226782, "step": 13326 }, { "epoch": 26.654, "grad_norm": 1.2928236722946167, "learning_rate": 2e-05, "loss": 0.03973456, "step": 13327 }, { "epoch": 26.656, "grad_norm": 0.8976708054542542, "learning_rate": 2e-05, "loss": 0.0361656, "step": 13328 }, { "epoch": 26.658, "grad_norm": 1.2751548290252686, "learning_rate": 2e-05, "loss": 0.04994558, "step": 13329 }, { "epoch": 26.66, "grad_norm": 1.0849025249481201, "learning_rate": 2e-05, "loss": 0.04145849, "step": 13330 }, { "epoch": 26.662, "grad_norm": 1.1326797008514404, "learning_rate": 2e-05, "loss": 0.04299215, "step": 13331 }, { "epoch": 26.664, "grad_norm": 1.076042652130127, "learning_rate": 2e-05, "loss": 0.04173702, "step": 13332 }, { "epoch": 26.666, "grad_norm": 1.107056736946106, "learning_rate": 2e-05, "loss": 0.05345965, "step": 13333 }, { "epoch": 26.668, "grad_norm": 1.2745137214660645, "learning_rate": 2e-05, "loss": 0.06330898, "step": 13334 }, { "epoch": 26.67, "grad_norm": 1.8877406120300293, "learning_rate": 2e-05, "loss": 0.06193574, "step": 13335 }, { "epoch": 26.672, "grad_norm": 1.1526881456375122, "learning_rate": 2e-05, "loss": 0.05003583, "step": 13336 }, { "epoch": 26.674, "grad_norm": 1.4239377975463867, "learning_rate": 2e-05, "loss": 0.05997038, "step": 13337 }, { "epoch": 26.676, "grad_norm": 1.0962940454483032, "learning_rate": 2e-05, "loss": 0.04370655, "step": 13338 }, { "epoch": 26.678, "grad_norm": 1.0551174879074097, "learning_rate": 2e-05, "loss": 0.04974976, "step": 13339 }, { "epoch": 26.68, "grad_norm": 1.3378124237060547, "learning_rate": 2e-05, "loss": 0.06182303, "step": 13340 }, { "epoch": 26.682, "grad_norm": 1.2719300985336304, "learning_rate": 2e-05, "loss": 0.04363333, "step": 13341 }, { "epoch": 26.684, "grad_norm": 0.9562928676605225, "learning_rate": 2e-05, "loss": 0.03557032, "step": 13342 }, { "epoch": 26.686, "grad_norm": 1.119278073310852, "learning_rate": 2e-05, "loss": 0.05500585, "step": 13343 }, { "epoch": 26.688, "grad_norm": 1.7050944566726685, "learning_rate": 2e-05, "loss": 0.0611475, "step": 13344 }, { "epoch": 26.69, "grad_norm": 1.226226806640625, "learning_rate": 2e-05, "loss": 0.04407649, "step": 13345 }, { "epoch": 26.692, "grad_norm": 1.1603362560272217, "learning_rate": 2e-05, "loss": 0.06644345, "step": 13346 }, { "epoch": 26.694, "grad_norm": 0.9289990663528442, "learning_rate": 2e-05, "loss": 0.03448639, "step": 13347 }, { "epoch": 26.696, "grad_norm": 1.0418504476547241, "learning_rate": 2e-05, "loss": 0.04236526, "step": 13348 }, { "epoch": 26.698, "grad_norm": 0.9695900678634644, "learning_rate": 2e-05, "loss": 0.04111674, "step": 13349 }, { "epoch": 26.7, "grad_norm": 0.9324625730514526, "learning_rate": 2e-05, "loss": 0.03929378, "step": 13350 }, { "epoch": 26.701999999999998, "grad_norm": 1.4047224521636963, "learning_rate": 2e-05, "loss": 0.06605045, "step": 13351 }, { "epoch": 26.704, "grad_norm": 1.3925564289093018, "learning_rate": 2e-05, "loss": 0.05571286, "step": 13352 }, { "epoch": 26.706, "grad_norm": 1.749979019165039, "learning_rate": 2e-05, "loss": 0.06745125, "step": 13353 }, { "epoch": 26.708, "grad_norm": 1.22734534740448, "learning_rate": 2e-05, "loss": 0.0587342, "step": 13354 }, { "epoch": 26.71, "grad_norm": 1.8725377321243286, "learning_rate": 2e-05, "loss": 0.06658432, "step": 13355 }, { "epoch": 26.712, "grad_norm": 1.18849515914917, "learning_rate": 2e-05, "loss": 0.03405991, "step": 13356 }, { "epoch": 26.714, "grad_norm": 1.5791819095611572, "learning_rate": 2e-05, "loss": 0.05914184, "step": 13357 }, { "epoch": 26.716, "grad_norm": 1.4424145221710205, "learning_rate": 2e-05, "loss": 0.03909074, "step": 13358 }, { "epoch": 26.718, "grad_norm": 1.595342755317688, "learning_rate": 2e-05, "loss": 0.05040825, "step": 13359 }, { "epoch": 26.72, "grad_norm": 1.8611633777618408, "learning_rate": 2e-05, "loss": 0.05254905, "step": 13360 }, { "epoch": 26.722, "grad_norm": 1.264326810836792, "learning_rate": 2e-05, "loss": 0.07023487, "step": 13361 }, { "epoch": 26.724, "grad_norm": 1.3335989713668823, "learning_rate": 2e-05, "loss": 0.04991566, "step": 13362 }, { "epoch": 26.726, "grad_norm": 1.0191148519515991, "learning_rate": 2e-05, "loss": 0.05633041, "step": 13363 }, { "epoch": 26.728, "grad_norm": 1.6957088708877563, "learning_rate": 2e-05, "loss": 0.04163916, "step": 13364 }, { "epoch": 26.73, "grad_norm": 1.3256901502609253, "learning_rate": 2e-05, "loss": 0.04524343, "step": 13365 }, { "epoch": 26.732, "grad_norm": 1.0504121780395508, "learning_rate": 2e-05, "loss": 0.05328402, "step": 13366 }, { "epoch": 26.734, "grad_norm": 1.228724718093872, "learning_rate": 2e-05, "loss": 0.04244063, "step": 13367 }, { "epoch": 26.736, "grad_norm": 0.9942216277122498, "learning_rate": 2e-05, "loss": 0.03584755, "step": 13368 }, { "epoch": 26.738, "grad_norm": 1.2876074314117432, "learning_rate": 2e-05, "loss": 0.0728865, "step": 13369 }, { "epoch": 26.74, "grad_norm": 1.1747729778289795, "learning_rate": 2e-05, "loss": 0.06701648, "step": 13370 }, { "epoch": 26.742, "grad_norm": 1.9831509590148926, "learning_rate": 2e-05, "loss": 0.05807206, "step": 13371 }, { "epoch": 26.744, "grad_norm": 1.108120322227478, "learning_rate": 2e-05, "loss": 0.05278052, "step": 13372 }, { "epoch": 26.746, "grad_norm": 1.2617312669754028, "learning_rate": 2e-05, "loss": 0.05679195, "step": 13373 }, { "epoch": 26.748, "grad_norm": 1.2960186004638672, "learning_rate": 2e-05, "loss": 0.04701432, "step": 13374 }, { "epoch": 26.75, "grad_norm": 1.0636016130447388, "learning_rate": 2e-05, "loss": 0.04439913, "step": 13375 }, { "epoch": 26.752, "grad_norm": 1.2635455131530762, "learning_rate": 2e-05, "loss": 0.03449576, "step": 13376 }, { "epoch": 26.754, "grad_norm": 1.2004293203353882, "learning_rate": 2e-05, "loss": 0.06352605, "step": 13377 }, { "epoch": 26.756, "grad_norm": 2.090850830078125, "learning_rate": 2e-05, "loss": 0.04741443, "step": 13378 }, { "epoch": 26.758, "grad_norm": 1.1920214891433716, "learning_rate": 2e-05, "loss": 0.05516186, "step": 13379 }, { "epoch": 26.76, "grad_norm": 1.1084606647491455, "learning_rate": 2e-05, "loss": 0.05634691, "step": 13380 }, { "epoch": 26.762, "grad_norm": 1.4865003824234009, "learning_rate": 2e-05, "loss": 0.0641894, "step": 13381 }, { "epoch": 26.764, "grad_norm": 1.1225507259368896, "learning_rate": 2e-05, "loss": 0.0450593, "step": 13382 }, { "epoch": 26.766, "grad_norm": 1.9932955503463745, "learning_rate": 2e-05, "loss": 0.05791462, "step": 13383 }, { "epoch": 26.768, "grad_norm": 0.8929860591888428, "learning_rate": 2e-05, "loss": 0.03649437, "step": 13384 }, { "epoch": 26.77, "grad_norm": 1.7671526670455933, "learning_rate": 2e-05, "loss": 0.06582198, "step": 13385 }, { "epoch": 26.772, "grad_norm": 1.9703317880630493, "learning_rate": 2e-05, "loss": 0.05542737, "step": 13386 }, { "epoch": 26.774, "grad_norm": 1.2753630876541138, "learning_rate": 2e-05, "loss": 0.05405483, "step": 13387 }, { "epoch": 26.776, "grad_norm": 1.046668529510498, "learning_rate": 2e-05, "loss": 0.03326663, "step": 13388 }, { "epoch": 26.778, "grad_norm": 1.2076324224472046, "learning_rate": 2e-05, "loss": 0.04809481, "step": 13389 }, { "epoch": 26.78, "grad_norm": 1.598802924156189, "learning_rate": 2e-05, "loss": 0.06436467, "step": 13390 }, { "epoch": 26.782, "grad_norm": 1.1584806442260742, "learning_rate": 2e-05, "loss": 0.0456543, "step": 13391 }, { "epoch": 26.784, "grad_norm": 1.131317377090454, "learning_rate": 2e-05, "loss": 0.05092357, "step": 13392 }, { "epoch": 26.786, "grad_norm": 1.1311395168304443, "learning_rate": 2e-05, "loss": 0.04797414, "step": 13393 }, { "epoch": 26.788, "grad_norm": 1.9838470220565796, "learning_rate": 2e-05, "loss": 0.04864065, "step": 13394 }, { "epoch": 26.79, "grad_norm": 1.8064590692520142, "learning_rate": 2e-05, "loss": 0.056331, "step": 13395 }, { "epoch": 26.792, "grad_norm": 1.1158827543258667, "learning_rate": 2e-05, "loss": 0.04320502, "step": 13396 }, { "epoch": 26.794, "grad_norm": 1.5387552976608276, "learning_rate": 2e-05, "loss": 0.04853616, "step": 13397 }, { "epoch": 26.796, "grad_norm": 1.7566001415252686, "learning_rate": 2e-05, "loss": 0.04736899, "step": 13398 }, { "epoch": 26.798000000000002, "grad_norm": 1.094180941581726, "learning_rate": 2e-05, "loss": 0.04937833, "step": 13399 }, { "epoch": 26.8, "grad_norm": 1.2456532716751099, "learning_rate": 2e-05, "loss": 0.05321681, "step": 13400 }, { "epoch": 26.802, "grad_norm": 0.9697641134262085, "learning_rate": 2e-05, "loss": 0.04513754, "step": 13401 }, { "epoch": 26.804, "grad_norm": 1.2267457246780396, "learning_rate": 2e-05, "loss": 0.05476194, "step": 13402 }, { "epoch": 26.806, "grad_norm": 1.0437538623809814, "learning_rate": 2e-05, "loss": 0.03745969, "step": 13403 }, { "epoch": 26.808, "grad_norm": 1.0773988962173462, "learning_rate": 2e-05, "loss": 0.03804321, "step": 13404 }, { "epoch": 26.81, "grad_norm": 2.34782075881958, "learning_rate": 2e-05, "loss": 0.05102146, "step": 13405 }, { "epoch": 26.812, "grad_norm": 1.27366042137146, "learning_rate": 2e-05, "loss": 0.04994742, "step": 13406 }, { "epoch": 26.814, "grad_norm": 1.2856743335723877, "learning_rate": 2e-05, "loss": 0.05034228, "step": 13407 }, { "epoch": 26.816, "grad_norm": 1.3903734683990479, "learning_rate": 2e-05, "loss": 0.06599744, "step": 13408 }, { "epoch": 26.818, "grad_norm": 1.1642183065414429, "learning_rate": 2e-05, "loss": 0.03917944, "step": 13409 }, { "epoch": 26.82, "grad_norm": 1.9895938634872437, "learning_rate": 2e-05, "loss": 0.05998167, "step": 13410 }, { "epoch": 26.822, "grad_norm": 1.6609904766082764, "learning_rate": 2e-05, "loss": 0.04581511, "step": 13411 }, { "epoch": 26.824, "grad_norm": 2.813419818878174, "learning_rate": 2e-05, "loss": 0.05934639, "step": 13412 }, { "epoch": 26.826, "grad_norm": 1.330479621887207, "learning_rate": 2e-05, "loss": 0.06485235, "step": 13413 }, { "epoch": 26.828, "grad_norm": 0.9554100036621094, "learning_rate": 2e-05, "loss": 0.03688127, "step": 13414 }, { "epoch": 26.83, "grad_norm": 1.3221074342727661, "learning_rate": 2e-05, "loss": 0.05746502, "step": 13415 }, { "epoch": 26.832, "grad_norm": 1.2634172439575195, "learning_rate": 2e-05, "loss": 0.04114674, "step": 13416 }, { "epoch": 26.834, "grad_norm": 1.2756625413894653, "learning_rate": 2e-05, "loss": 0.04632439, "step": 13417 }, { "epoch": 26.836, "grad_norm": 1.3295053243637085, "learning_rate": 2e-05, "loss": 0.0480155, "step": 13418 }, { "epoch": 26.838, "grad_norm": 0.9835327863693237, "learning_rate": 2e-05, "loss": 0.03880982, "step": 13419 }, { "epoch": 26.84, "grad_norm": 1.3555666208267212, "learning_rate": 2e-05, "loss": 0.03730651, "step": 13420 }, { "epoch": 26.842, "grad_norm": 1.2804653644561768, "learning_rate": 2e-05, "loss": 0.05204932, "step": 13421 }, { "epoch": 26.844, "grad_norm": 1.3233611583709717, "learning_rate": 2e-05, "loss": 0.07679477, "step": 13422 }, { "epoch": 26.846, "grad_norm": 1.5484726428985596, "learning_rate": 2e-05, "loss": 0.07074042, "step": 13423 }, { "epoch": 26.848, "grad_norm": 1.3596237897872925, "learning_rate": 2e-05, "loss": 0.06065723, "step": 13424 }, { "epoch": 26.85, "grad_norm": 1.1907684803009033, "learning_rate": 2e-05, "loss": 0.05623455, "step": 13425 }, { "epoch": 26.852, "grad_norm": 1.0989551544189453, "learning_rate": 2e-05, "loss": 0.06380674, "step": 13426 }, { "epoch": 26.854, "grad_norm": 1.487808108329773, "learning_rate": 2e-05, "loss": 0.05717751, "step": 13427 }, { "epoch": 26.856, "grad_norm": 0.9197402596473694, "learning_rate": 2e-05, "loss": 0.03407472, "step": 13428 }, { "epoch": 26.858, "grad_norm": 1.3216696977615356, "learning_rate": 2e-05, "loss": 0.0550472, "step": 13429 }, { "epoch": 26.86, "grad_norm": 0.981142520904541, "learning_rate": 2e-05, "loss": 0.04368771, "step": 13430 }, { "epoch": 26.862, "grad_norm": 1.8582639694213867, "learning_rate": 2e-05, "loss": 0.05056902, "step": 13431 }, { "epoch": 26.864, "grad_norm": 0.8691796064376831, "learning_rate": 2e-05, "loss": 0.03458659, "step": 13432 }, { "epoch": 26.866, "grad_norm": 1.9854918718338013, "learning_rate": 2e-05, "loss": 0.0623791, "step": 13433 }, { "epoch": 26.868, "grad_norm": 1.3548856973648071, "learning_rate": 2e-05, "loss": 0.06691352, "step": 13434 }, { "epoch": 26.87, "grad_norm": 1.199938178062439, "learning_rate": 2e-05, "loss": 0.04101471, "step": 13435 }, { "epoch": 26.872, "grad_norm": 1.0566649436950684, "learning_rate": 2e-05, "loss": 0.05181169, "step": 13436 }, { "epoch": 26.874, "grad_norm": 0.9982033967971802, "learning_rate": 2e-05, "loss": 0.04211713, "step": 13437 }, { "epoch": 26.876, "grad_norm": 1.733784794807434, "learning_rate": 2e-05, "loss": 0.05353837, "step": 13438 }, { "epoch": 26.878, "grad_norm": 1.277503490447998, "learning_rate": 2e-05, "loss": 0.05072171, "step": 13439 }, { "epoch": 26.88, "grad_norm": 1.5146923065185547, "learning_rate": 2e-05, "loss": 0.04689533, "step": 13440 }, { "epoch": 26.882, "grad_norm": 1.5443528890609741, "learning_rate": 2e-05, "loss": 0.06226253, "step": 13441 }, { "epoch": 26.884, "grad_norm": 0.9273480176925659, "learning_rate": 2e-05, "loss": 0.0373538, "step": 13442 }, { "epoch": 26.886, "grad_norm": 1.9211230278015137, "learning_rate": 2e-05, "loss": 0.07116228, "step": 13443 }, { "epoch": 26.888, "grad_norm": 1.0266342163085938, "learning_rate": 2e-05, "loss": 0.04924295, "step": 13444 }, { "epoch": 26.89, "grad_norm": 3.1845924854278564, "learning_rate": 2e-05, "loss": 0.0571335, "step": 13445 }, { "epoch": 26.892, "grad_norm": 1.1129764318466187, "learning_rate": 2e-05, "loss": 0.04218609, "step": 13446 }, { "epoch": 26.894, "grad_norm": 1.187995195388794, "learning_rate": 2e-05, "loss": 0.04500746, "step": 13447 }, { "epoch": 26.896, "grad_norm": 1.9154367446899414, "learning_rate": 2e-05, "loss": 0.07881141, "step": 13448 }, { "epoch": 26.898, "grad_norm": 1.5120553970336914, "learning_rate": 2e-05, "loss": 0.05724183, "step": 13449 }, { "epoch": 26.9, "grad_norm": 1.123485803604126, "learning_rate": 2e-05, "loss": 0.05353692, "step": 13450 }, { "epoch": 26.902, "grad_norm": 1.491946816444397, "learning_rate": 2e-05, "loss": 0.06026394, "step": 13451 }, { "epoch": 26.904, "grad_norm": 1.2873069047927856, "learning_rate": 2e-05, "loss": 0.05665761, "step": 13452 }, { "epoch": 26.906, "grad_norm": 1.4997217655181885, "learning_rate": 2e-05, "loss": 0.06611629, "step": 13453 }, { "epoch": 26.908, "grad_norm": 1.3807562589645386, "learning_rate": 2e-05, "loss": 0.04708906, "step": 13454 }, { "epoch": 26.91, "grad_norm": 1.1967717409133911, "learning_rate": 2e-05, "loss": 0.05000294, "step": 13455 }, { "epoch": 26.912, "grad_norm": 0.9841086864471436, "learning_rate": 2e-05, "loss": 0.04784705, "step": 13456 }, { "epoch": 26.914, "grad_norm": 1.2160786390304565, "learning_rate": 2e-05, "loss": 0.05242315, "step": 13457 }, { "epoch": 26.916, "grad_norm": 1.8159120082855225, "learning_rate": 2e-05, "loss": 0.0492094, "step": 13458 }, { "epoch": 26.918, "grad_norm": 2.728215217590332, "learning_rate": 2e-05, "loss": 0.0520096, "step": 13459 }, { "epoch": 26.92, "grad_norm": 1.1124919652938843, "learning_rate": 2e-05, "loss": 0.05309315, "step": 13460 }, { "epoch": 26.922, "grad_norm": 1.2887550592422485, "learning_rate": 2e-05, "loss": 0.06010344, "step": 13461 }, { "epoch": 26.924, "grad_norm": 1.0686558485031128, "learning_rate": 2e-05, "loss": 0.05369169, "step": 13462 }, { "epoch": 26.926, "grad_norm": 1.6229926347732544, "learning_rate": 2e-05, "loss": 0.0571235, "step": 13463 }, { "epoch": 26.928, "grad_norm": 1.430633783340454, "learning_rate": 2e-05, "loss": 0.05475698, "step": 13464 }, { "epoch": 26.93, "grad_norm": 1.2325900793075562, "learning_rate": 2e-05, "loss": 0.04147217, "step": 13465 }, { "epoch": 26.932, "grad_norm": 1.9730578660964966, "learning_rate": 2e-05, "loss": 0.05242481, "step": 13466 }, { "epoch": 26.934, "grad_norm": 1.063415765762329, "learning_rate": 2e-05, "loss": 0.04053297, "step": 13467 }, { "epoch": 26.936, "grad_norm": 0.8968410491943359, "learning_rate": 2e-05, "loss": 0.04108004, "step": 13468 }, { "epoch": 26.938, "grad_norm": 1.2411011457443237, "learning_rate": 2e-05, "loss": 0.04883048, "step": 13469 }, { "epoch": 26.94, "grad_norm": 1.221675157546997, "learning_rate": 2e-05, "loss": 0.05151308, "step": 13470 }, { "epoch": 26.942, "grad_norm": 1.037152886390686, "learning_rate": 2e-05, "loss": 0.0445459, "step": 13471 }, { "epoch": 26.944, "grad_norm": 1.2921762466430664, "learning_rate": 2e-05, "loss": 0.06004579, "step": 13472 }, { "epoch": 26.946, "grad_norm": 1.1060543060302734, "learning_rate": 2e-05, "loss": 0.04533466, "step": 13473 }, { "epoch": 26.948, "grad_norm": 1.8344323635101318, "learning_rate": 2e-05, "loss": 0.03602841, "step": 13474 }, { "epoch": 26.95, "grad_norm": 1.2391880750656128, "learning_rate": 2e-05, "loss": 0.04448626, "step": 13475 }, { "epoch": 26.951999999999998, "grad_norm": 1.072171688079834, "learning_rate": 2e-05, "loss": 0.04871508, "step": 13476 }, { "epoch": 26.954, "grad_norm": 2.1540536880493164, "learning_rate": 2e-05, "loss": 0.05453757, "step": 13477 }, { "epoch": 26.956, "grad_norm": 1.1730058193206787, "learning_rate": 2e-05, "loss": 0.04764026, "step": 13478 }, { "epoch": 26.958, "grad_norm": 1.7890634536743164, "learning_rate": 2e-05, "loss": 0.06414785, "step": 13479 }, { "epoch": 26.96, "grad_norm": 1.0291862487792969, "learning_rate": 2e-05, "loss": 0.0508276, "step": 13480 }, { "epoch": 26.962, "grad_norm": 1.664055585861206, "learning_rate": 2e-05, "loss": 0.06251275, "step": 13481 }, { "epoch": 26.964, "grad_norm": 1.9114272594451904, "learning_rate": 2e-05, "loss": 0.06242898, "step": 13482 }, { "epoch": 26.966, "grad_norm": 1.0080111026763916, "learning_rate": 2e-05, "loss": 0.04339243, "step": 13483 }, { "epoch": 26.968, "grad_norm": 1.4560894966125488, "learning_rate": 2e-05, "loss": 0.0516038, "step": 13484 }, { "epoch": 26.97, "grad_norm": 2.305577516555786, "learning_rate": 2e-05, "loss": 0.04791653, "step": 13485 }, { "epoch": 26.972, "grad_norm": 2.532003879547119, "learning_rate": 2e-05, "loss": 0.05522787, "step": 13486 }, { "epoch": 26.974, "grad_norm": 1.0381648540496826, "learning_rate": 2e-05, "loss": 0.04522001, "step": 13487 }, { "epoch": 26.976, "grad_norm": 1.9328395128250122, "learning_rate": 2e-05, "loss": 0.05120095, "step": 13488 }, { "epoch": 26.978, "grad_norm": 1.0722509622573853, "learning_rate": 2e-05, "loss": 0.05470925, "step": 13489 }, { "epoch": 26.98, "grad_norm": 1.1628985404968262, "learning_rate": 2e-05, "loss": 0.04816863, "step": 13490 }, { "epoch": 26.982, "grad_norm": 1.102081298828125, "learning_rate": 2e-05, "loss": 0.05070662, "step": 13491 }, { "epoch": 26.984, "grad_norm": 1.5382109880447388, "learning_rate": 2e-05, "loss": 0.06105553, "step": 13492 }, { "epoch": 26.986, "grad_norm": 1.7911146879196167, "learning_rate": 2e-05, "loss": 0.05552252, "step": 13493 }, { "epoch": 26.988, "grad_norm": 1.1248363256454468, "learning_rate": 2e-05, "loss": 0.04280062, "step": 13494 }, { "epoch": 26.99, "grad_norm": 1.108275055885315, "learning_rate": 2e-05, "loss": 0.0498399, "step": 13495 }, { "epoch": 26.992, "grad_norm": 1.0283393859863281, "learning_rate": 2e-05, "loss": 0.04871751, "step": 13496 }, { "epoch": 26.994, "grad_norm": 1.5245510339736938, "learning_rate": 2e-05, "loss": 0.08703743, "step": 13497 }, { "epoch": 26.996, "grad_norm": 1.1031713485717773, "learning_rate": 2e-05, "loss": 0.05174923, "step": 13498 }, { "epoch": 26.998, "grad_norm": 1.0976660251617432, "learning_rate": 2e-05, "loss": 0.03708324, "step": 13499 }, { "epoch": 27.0, "grad_norm": 1.3019388914108276, "learning_rate": 2e-05, "loss": 0.06600218, "step": 13500 }, { "epoch": 27.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.996, "Equal_2": 0.9780439121756487, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.99, "Perpendicular_1": 0.994, "Perpendicular_2": 0.99, "Perpendicular_3": 0.8857715430861723, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9952000000000001, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 226.8081, "eval_samples_per_second": 46.295, "eval_steps_per_second": 0.926, "step": 13500 }, { "epoch": 27.002, "grad_norm": 0.9873692393302917, "learning_rate": 2e-05, "loss": 0.04788631, "step": 13501 }, { "epoch": 27.004, "grad_norm": 1.2801342010498047, "learning_rate": 2e-05, "loss": 0.04159327, "step": 13502 }, { "epoch": 27.006, "grad_norm": 1.5515789985656738, "learning_rate": 2e-05, "loss": 0.04708043, "step": 13503 }, { "epoch": 27.008, "grad_norm": 1.30125093460083, "learning_rate": 2e-05, "loss": 0.05330529, "step": 13504 }, { "epoch": 27.01, "grad_norm": 1.5871691703796387, "learning_rate": 2e-05, "loss": 0.0661208, "step": 13505 }, { "epoch": 27.012, "grad_norm": 1.1906533241271973, "learning_rate": 2e-05, "loss": 0.05217385, "step": 13506 }, { "epoch": 27.014, "grad_norm": 1.8687881231307983, "learning_rate": 2e-05, "loss": 0.05460596, "step": 13507 }, { "epoch": 27.016, "grad_norm": 1.9657831192016602, "learning_rate": 2e-05, "loss": 0.04578027, "step": 13508 }, { "epoch": 27.018, "grad_norm": 1.5461804866790771, "learning_rate": 2e-05, "loss": 0.04632892, "step": 13509 }, { "epoch": 27.02, "grad_norm": 1.08390474319458, "learning_rate": 2e-05, "loss": 0.04842281, "step": 13510 }, { "epoch": 27.022, "grad_norm": 1.6836904287338257, "learning_rate": 2e-05, "loss": 0.06363787, "step": 13511 }, { "epoch": 27.024, "grad_norm": 1.0163270235061646, "learning_rate": 2e-05, "loss": 0.04757386, "step": 13512 }, { "epoch": 27.026, "grad_norm": 1.7118120193481445, "learning_rate": 2e-05, "loss": 0.06415087, "step": 13513 }, { "epoch": 27.028, "grad_norm": 1.8032100200653076, "learning_rate": 2e-05, "loss": 0.03989663, "step": 13514 }, { "epoch": 27.03, "grad_norm": 1.2385141849517822, "learning_rate": 2e-05, "loss": 0.04907056, "step": 13515 }, { "epoch": 27.032, "grad_norm": 1.4639415740966797, "learning_rate": 2e-05, "loss": 0.04766008, "step": 13516 }, { "epoch": 27.034, "grad_norm": 0.9580868482589722, "learning_rate": 2e-05, "loss": 0.03564188, "step": 13517 }, { "epoch": 27.036, "grad_norm": 1.120697021484375, "learning_rate": 2e-05, "loss": 0.04385304, "step": 13518 }, { "epoch": 27.038, "grad_norm": 1.1674983501434326, "learning_rate": 2e-05, "loss": 0.05096697, "step": 13519 }, { "epoch": 27.04, "grad_norm": 1.5755596160888672, "learning_rate": 2e-05, "loss": 0.06892861, "step": 13520 }, { "epoch": 27.042, "grad_norm": 1.153396725654602, "learning_rate": 2e-05, "loss": 0.04526899, "step": 13521 }, { "epoch": 27.044, "grad_norm": 5.516768455505371, "learning_rate": 2e-05, "loss": 0.06748554, "step": 13522 }, { "epoch": 27.046, "grad_norm": 0.8479743599891663, "learning_rate": 2e-05, "loss": 0.02769209, "step": 13523 }, { "epoch": 27.048, "grad_norm": 1.0615966320037842, "learning_rate": 2e-05, "loss": 0.04529217, "step": 13524 }, { "epoch": 27.05, "grad_norm": 0.9216062426567078, "learning_rate": 2e-05, "loss": 0.04702087, "step": 13525 }, { "epoch": 27.052, "grad_norm": 1.1367435455322266, "learning_rate": 2e-05, "loss": 0.04816135, "step": 13526 }, { "epoch": 27.054, "grad_norm": 1.1329289674758911, "learning_rate": 2e-05, "loss": 0.05322417, "step": 13527 }, { "epoch": 27.056, "grad_norm": 1.532296061515808, "learning_rate": 2e-05, "loss": 0.05503484, "step": 13528 }, { "epoch": 27.058, "grad_norm": 1.5901576280593872, "learning_rate": 2e-05, "loss": 0.06262022, "step": 13529 }, { "epoch": 27.06, "grad_norm": 1.5967779159545898, "learning_rate": 2e-05, "loss": 0.04403574, "step": 13530 }, { "epoch": 27.062, "grad_norm": 0.966113269329071, "learning_rate": 2e-05, "loss": 0.04282578, "step": 13531 }, { "epoch": 27.064, "grad_norm": 2.188673257827759, "learning_rate": 2e-05, "loss": 0.05408348, "step": 13532 }, { "epoch": 27.066, "grad_norm": 1.1561076641082764, "learning_rate": 2e-05, "loss": 0.0386926, "step": 13533 }, { "epoch": 27.068, "grad_norm": 0.8598868250846863, "learning_rate": 2e-05, "loss": 0.02973145, "step": 13534 }, { "epoch": 27.07, "grad_norm": 1.3912612199783325, "learning_rate": 2e-05, "loss": 0.04837053, "step": 13535 }, { "epoch": 27.072, "grad_norm": 1.1394031047821045, "learning_rate": 2e-05, "loss": 0.0506375, "step": 13536 }, { "epoch": 27.074, "grad_norm": 1.404325008392334, "learning_rate": 2e-05, "loss": 0.04690532, "step": 13537 }, { "epoch": 27.076, "grad_norm": 1.4108294248580933, "learning_rate": 2e-05, "loss": 0.05843884, "step": 13538 }, { "epoch": 27.078, "grad_norm": 0.853923499584198, "learning_rate": 2e-05, "loss": 0.03031493, "step": 13539 }, { "epoch": 27.08, "grad_norm": 1.1385308504104614, "learning_rate": 2e-05, "loss": 0.05130733, "step": 13540 }, { "epoch": 27.082, "grad_norm": 1.3952785730361938, "learning_rate": 2e-05, "loss": 0.05750138, "step": 13541 }, { "epoch": 27.084, "grad_norm": 1.254143476486206, "learning_rate": 2e-05, "loss": 0.04458023, "step": 13542 }, { "epoch": 27.086, "grad_norm": 1.1790730953216553, "learning_rate": 2e-05, "loss": 0.05331029, "step": 13543 }, { "epoch": 27.088, "grad_norm": 1.041754961013794, "learning_rate": 2e-05, "loss": 0.04956675, "step": 13544 }, { "epoch": 27.09, "grad_norm": 1.0502227544784546, "learning_rate": 2e-05, "loss": 0.04312596, "step": 13545 }, { "epoch": 27.092, "grad_norm": 1.0247337818145752, "learning_rate": 2e-05, "loss": 0.04822208, "step": 13546 }, { "epoch": 27.094, "grad_norm": 1.2307884693145752, "learning_rate": 2e-05, "loss": 0.04950684, "step": 13547 }, { "epoch": 27.096, "grad_norm": 1.0971537828445435, "learning_rate": 2e-05, "loss": 0.04755098, "step": 13548 }, { "epoch": 27.098, "grad_norm": 1.4812735319137573, "learning_rate": 2e-05, "loss": 0.05345424, "step": 13549 }, { "epoch": 27.1, "grad_norm": 1.0785373449325562, "learning_rate": 2e-05, "loss": 0.04222222, "step": 13550 }, { "epoch": 27.102, "grad_norm": 1.4908745288848877, "learning_rate": 2e-05, "loss": 0.03539231, "step": 13551 }, { "epoch": 27.104, "grad_norm": 1.0993859767913818, "learning_rate": 2e-05, "loss": 0.05072219, "step": 13552 }, { "epoch": 27.106, "grad_norm": 0.9125502109527588, "learning_rate": 2e-05, "loss": 0.04257827, "step": 13553 }, { "epoch": 27.108, "grad_norm": 1.218759298324585, "learning_rate": 2e-05, "loss": 0.04790259, "step": 13554 }, { "epoch": 27.11, "grad_norm": 1.2503265142440796, "learning_rate": 2e-05, "loss": 0.05482372, "step": 13555 }, { "epoch": 27.112, "grad_norm": 1.270150899887085, "learning_rate": 2e-05, "loss": 0.06402293, "step": 13556 }, { "epoch": 27.114, "grad_norm": 0.90687495470047, "learning_rate": 2e-05, "loss": 0.04171782, "step": 13557 }, { "epoch": 27.116, "grad_norm": 1.1963670253753662, "learning_rate": 2e-05, "loss": 0.06335641, "step": 13558 }, { "epoch": 27.118, "grad_norm": 0.9715232849121094, "learning_rate": 2e-05, "loss": 0.04580106, "step": 13559 }, { "epoch": 27.12, "grad_norm": 1.0304040908813477, "learning_rate": 2e-05, "loss": 0.0395345, "step": 13560 }, { "epoch": 27.122, "grad_norm": 1.0736416578292847, "learning_rate": 2e-05, "loss": 0.0429241, "step": 13561 }, { "epoch": 27.124, "grad_norm": 1.4583771228790283, "learning_rate": 2e-05, "loss": 0.05254416, "step": 13562 }, { "epoch": 27.126, "grad_norm": 2.536743402481079, "learning_rate": 2e-05, "loss": 0.04678726, "step": 13563 }, { "epoch": 27.128, "grad_norm": 1.4211798906326294, "learning_rate": 2e-05, "loss": 0.06039127, "step": 13564 }, { "epoch": 27.13, "grad_norm": 1.340863823890686, "learning_rate": 2e-05, "loss": 0.04962276, "step": 13565 }, { "epoch": 27.132, "grad_norm": 1.7033146619796753, "learning_rate": 2e-05, "loss": 0.06622193, "step": 13566 }, { "epoch": 27.134, "grad_norm": 1.1954058408737183, "learning_rate": 2e-05, "loss": 0.06669568, "step": 13567 }, { "epoch": 27.136, "grad_norm": 1.130146861076355, "learning_rate": 2e-05, "loss": 0.03560343, "step": 13568 }, { "epoch": 27.138, "grad_norm": 1.269113540649414, "learning_rate": 2e-05, "loss": 0.05632553, "step": 13569 }, { "epoch": 27.14, "grad_norm": 1.1189799308776855, "learning_rate": 2e-05, "loss": 0.05277349, "step": 13570 }, { "epoch": 27.142, "grad_norm": 1.2352285385131836, "learning_rate": 2e-05, "loss": 0.04073992, "step": 13571 }, { "epoch": 27.144, "grad_norm": 1.7227200269699097, "learning_rate": 2e-05, "loss": 0.02981717, "step": 13572 }, { "epoch": 27.146, "grad_norm": 1.4771575927734375, "learning_rate": 2e-05, "loss": 0.04973167, "step": 13573 }, { "epoch": 27.148, "grad_norm": 1.2423698902130127, "learning_rate": 2e-05, "loss": 0.04917215, "step": 13574 }, { "epoch": 27.15, "grad_norm": 1.4005793333053589, "learning_rate": 2e-05, "loss": 0.05550402, "step": 13575 }, { "epoch": 27.152, "grad_norm": 1.0053901672363281, "learning_rate": 2e-05, "loss": 0.04665535, "step": 13576 }, { "epoch": 27.154, "grad_norm": 1.8280709981918335, "learning_rate": 2e-05, "loss": 0.04688382, "step": 13577 }, { "epoch": 27.156, "grad_norm": 1.191278338432312, "learning_rate": 2e-05, "loss": 0.05568808, "step": 13578 }, { "epoch": 27.158, "grad_norm": 0.9335781335830688, "learning_rate": 2e-05, "loss": 0.03897196, "step": 13579 }, { "epoch": 27.16, "grad_norm": 1.2709007263183594, "learning_rate": 2e-05, "loss": 0.04309512, "step": 13580 }, { "epoch": 27.162, "grad_norm": 1.1201903820037842, "learning_rate": 2e-05, "loss": 0.05658621, "step": 13581 }, { "epoch": 27.164, "grad_norm": 0.9823492765426636, "learning_rate": 2e-05, "loss": 0.03707642, "step": 13582 }, { "epoch": 27.166, "grad_norm": 1.320197582244873, "learning_rate": 2e-05, "loss": 0.04763805, "step": 13583 }, { "epoch": 27.168, "grad_norm": 1.2183752059936523, "learning_rate": 2e-05, "loss": 0.05127168, "step": 13584 }, { "epoch": 27.17, "grad_norm": 1.3342764377593994, "learning_rate": 2e-05, "loss": 0.04884848, "step": 13585 }, { "epoch": 27.172, "grad_norm": 1.7080028057098389, "learning_rate": 2e-05, "loss": 0.05730741, "step": 13586 }, { "epoch": 27.174, "grad_norm": 1.3111906051635742, "learning_rate": 2e-05, "loss": 0.04510331, "step": 13587 }, { "epoch": 27.176, "grad_norm": 0.9458618760108948, "learning_rate": 2e-05, "loss": 0.0407837, "step": 13588 }, { "epoch": 27.178, "grad_norm": 3.409698247909546, "learning_rate": 2e-05, "loss": 0.05297036, "step": 13589 }, { "epoch": 27.18, "grad_norm": 1.1118155717849731, "learning_rate": 2e-05, "loss": 0.04160015, "step": 13590 }, { "epoch": 27.182, "grad_norm": 1.1362708806991577, "learning_rate": 2e-05, "loss": 0.05333767, "step": 13591 }, { "epoch": 27.184, "grad_norm": 0.9334754943847656, "learning_rate": 2e-05, "loss": 0.04053168, "step": 13592 }, { "epoch": 27.186, "grad_norm": 1.761768102645874, "learning_rate": 2e-05, "loss": 0.07699054, "step": 13593 }, { "epoch": 27.188, "grad_norm": 2.5373241901397705, "learning_rate": 2e-05, "loss": 0.08345076, "step": 13594 }, { "epoch": 27.19, "grad_norm": 1.7024145126342773, "learning_rate": 2e-05, "loss": 0.05732506, "step": 13595 }, { "epoch": 27.192, "grad_norm": 1.2460863590240479, "learning_rate": 2e-05, "loss": 0.03381988, "step": 13596 }, { "epoch": 27.194, "grad_norm": 2.921720504760742, "learning_rate": 2e-05, "loss": 0.07139143, "step": 13597 }, { "epoch": 27.196, "grad_norm": 1.621862530708313, "learning_rate": 2e-05, "loss": 0.05487503, "step": 13598 }, { "epoch": 27.198, "grad_norm": 1.934372901916504, "learning_rate": 2e-05, "loss": 0.04366919, "step": 13599 }, { "epoch": 27.2, "grad_norm": 1.1235582828521729, "learning_rate": 2e-05, "loss": 0.04188158, "step": 13600 }, { "epoch": 27.202, "grad_norm": 1.4671112298965454, "learning_rate": 2e-05, "loss": 0.06556534, "step": 13601 }, { "epoch": 27.204, "grad_norm": 1.5312081575393677, "learning_rate": 2e-05, "loss": 0.0522366, "step": 13602 }, { "epoch": 27.206, "grad_norm": 1.6893941164016724, "learning_rate": 2e-05, "loss": 0.05343276, "step": 13603 }, { "epoch": 27.208, "grad_norm": 1.552497148513794, "learning_rate": 2e-05, "loss": 0.06101022, "step": 13604 }, { "epoch": 27.21, "grad_norm": 1.107526421546936, "learning_rate": 2e-05, "loss": 0.04270256, "step": 13605 }, { "epoch": 27.212, "grad_norm": 1.152831792831421, "learning_rate": 2e-05, "loss": 0.05193291, "step": 13606 }, { "epoch": 27.214, "grad_norm": 1.1527708768844604, "learning_rate": 2e-05, "loss": 0.03270804, "step": 13607 }, { "epoch": 27.216, "grad_norm": 1.5784761905670166, "learning_rate": 2e-05, "loss": 0.06122062, "step": 13608 }, { "epoch": 27.218, "grad_norm": 1.1828579902648926, "learning_rate": 2e-05, "loss": 0.0529822, "step": 13609 }, { "epoch": 27.22, "grad_norm": 0.7792627811431885, "learning_rate": 2e-05, "loss": 0.02556605, "step": 13610 }, { "epoch": 27.222, "grad_norm": 1.0704801082611084, "learning_rate": 2e-05, "loss": 0.04917482, "step": 13611 }, { "epoch": 27.224, "grad_norm": 1.3431028127670288, "learning_rate": 2e-05, "loss": 0.04681998, "step": 13612 }, { "epoch": 27.226, "grad_norm": 1.1690142154693604, "learning_rate": 2e-05, "loss": 0.04428925, "step": 13613 }, { "epoch": 27.228, "grad_norm": 1.4378142356872559, "learning_rate": 2e-05, "loss": 0.04374067, "step": 13614 }, { "epoch": 27.23, "grad_norm": 2.012057065963745, "learning_rate": 2e-05, "loss": 0.05226861, "step": 13615 }, { "epoch": 27.232, "grad_norm": 1.0584989786148071, "learning_rate": 2e-05, "loss": 0.0444513, "step": 13616 }, { "epoch": 27.234, "grad_norm": 1.155761480331421, "learning_rate": 2e-05, "loss": 0.04416853, "step": 13617 }, { "epoch": 27.236, "grad_norm": 1.1852929592132568, "learning_rate": 2e-05, "loss": 0.04524426, "step": 13618 }, { "epoch": 27.238, "grad_norm": 1.1485570669174194, "learning_rate": 2e-05, "loss": 0.05691415, "step": 13619 }, { "epoch": 27.24, "grad_norm": 1.1632615327835083, "learning_rate": 2e-05, "loss": 0.04813182, "step": 13620 }, { "epoch": 27.242, "grad_norm": 1.3293126821517944, "learning_rate": 2e-05, "loss": 0.05324798, "step": 13621 }, { "epoch": 27.244, "grad_norm": 1.0680338144302368, "learning_rate": 2e-05, "loss": 0.04556869, "step": 13622 }, { "epoch": 27.246, "grad_norm": 1.0575073957443237, "learning_rate": 2e-05, "loss": 0.05174411, "step": 13623 }, { "epoch": 27.248, "grad_norm": 1.5854289531707764, "learning_rate": 2e-05, "loss": 0.04932591, "step": 13624 }, { "epoch": 27.25, "grad_norm": 1.0535950660705566, "learning_rate": 2e-05, "loss": 0.04823812, "step": 13625 }, { "epoch": 27.252, "grad_norm": 1.7334213256835938, "learning_rate": 2e-05, "loss": 0.05159856, "step": 13626 }, { "epoch": 27.254, "grad_norm": 1.5396692752838135, "learning_rate": 2e-05, "loss": 0.06329159, "step": 13627 }, { "epoch": 27.256, "grad_norm": 1.4117631912231445, "learning_rate": 2e-05, "loss": 0.05391858, "step": 13628 }, { "epoch": 27.258, "grad_norm": 1.3930706977844238, "learning_rate": 2e-05, "loss": 0.04341757, "step": 13629 }, { "epoch": 27.26, "grad_norm": 1.0496158599853516, "learning_rate": 2e-05, "loss": 0.03703929, "step": 13630 }, { "epoch": 27.262, "grad_norm": 1.1668221950531006, "learning_rate": 2e-05, "loss": 0.06037152, "step": 13631 }, { "epoch": 27.264, "grad_norm": 1.208855152130127, "learning_rate": 2e-05, "loss": 0.04942504, "step": 13632 }, { "epoch": 27.266, "grad_norm": 1.1125659942626953, "learning_rate": 2e-05, "loss": 0.04107204, "step": 13633 }, { "epoch": 27.268, "grad_norm": 0.9948403239250183, "learning_rate": 2e-05, "loss": 0.04543294, "step": 13634 }, { "epoch": 27.27, "grad_norm": 1.4545191526412964, "learning_rate": 2e-05, "loss": 0.05700861, "step": 13635 }, { "epoch": 27.272, "grad_norm": 0.8263612985610962, "learning_rate": 2e-05, "loss": 0.0241911, "step": 13636 }, { "epoch": 27.274, "grad_norm": 2.439054250717163, "learning_rate": 2e-05, "loss": 0.05339174, "step": 13637 }, { "epoch": 27.276, "grad_norm": 1.1809505224227905, "learning_rate": 2e-05, "loss": 0.04406996, "step": 13638 }, { "epoch": 27.278, "grad_norm": 1.4361692667007446, "learning_rate": 2e-05, "loss": 0.04413934, "step": 13639 }, { "epoch": 27.28, "grad_norm": 1.0051506757736206, "learning_rate": 2e-05, "loss": 0.04318884, "step": 13640 }, { "epoch": 27.282, "grad_norm": 1.1196904182434082, "learning_rate": 2e-05, "loss": 0.02383162, "step": 13641 }, { "epoch": 27.284, "grad_norm": 0.977554202079773, "learning_rate": 2e-05, "loss": 0.04639881, "step": 13642 }, { "epoch": 27.286, "grad_norm": 1.5253580808639526, "learning_rate": 2e-05, "loss": 0.06579242, "step": 13643 }, { "epoch": 27.288, "grad_norm": 2.20054030418396, "learning_rate": 2e-05, "loss": 0.06384794, "step": 13644 }, { "epoch": 27.29, "grad_norm": 1.0553257465362549, "learning_rate": 2e-05, "loss": 0.03681123, "step": 13645 }, { "epoch": 27.292, "grad_norm": 2.355741262435913, "learning_rate": 2e-05, "loss": 0.06212309, "step": 13646 }, { "epoch": 27.294, "grad_norm": 2.567756175994873, "learning_rate": 2e-05, "loss": 0.05767478, "step": 13647 }, { "epoch": 27.296, "grad_norm": 1.6233216524124146, "learning_rate": 2e-05, "loss": 0.04872718, "step": 13648 }, { "epoch": 27.298, "grad_norm": 1.2056982517242432, "learning_rate": 2e-05, "loss": 0.05383076, "step": 13649 }, { "epoch": 27.3, "grad_norm": 3.3751087188720703, "learning_rate": 2e-05, "loss": 0.05286688, "step": 13650 }, { "epoch": 27.302, "grad_norm": 1.2072515487670898, "learning_rate": 2e-05, "loss": 0.03878843, "step": 13651 }, { "epoch": 27.304, "grad_norm": 1.9044798612594604, "learning_rate": 2e-05, "loss": 0.05178762, "step": 13652 }, { "epoch": 27.306, "grad_norm": 0.944718599319458, "learning_rate": 2e-05, "loss": 0.035604, "step": 13653 }, { "epoch": 27.308, "grad_norm": 1.0908132791519165, "learning_rate": 2e-05, "loss": 0.0458922, "step": 13654 }, { "epoch": 27.31, "grad_norm": 1.706869125366211, "learning_rate": 2e-05, "loss": 0.0641887, "step": 13655 }, { "epoch": 27.312, "grad_norm": 1.0811024904251099, "learning_rate": 2e-05, "loss": 0.04592989, "step": 13656 }, { "epoch": 27.314, "grad_norm": 0.9977642297744751, "learning_rate": 2e-05, "loss": 0.04438194, "step": 13657 }, { "epoch": 27.316, "grad_norm": 1.26127290725708, "learning_rate": 2e-05, "loss": 0.05829148, "step": 13658 }, { "epoch": 27.318, "grad_norm": 1.5470954179763794, "learning_rate": 2e-05, "loss": 0.05833955, "step": 13659 }, { "epoch": 27.32, "grad_norm": 1.087085485458374, "learning_rate": 2e-05, "loss": 0.04190677, "step": 13660 }, { "epoch": 27.322, "grad_norm": 1.3258880376815796, "learning_rate": 2e-05, "loss": 0.04654058, "step": 13661 }, { "epoch": 27.324, "grad_norm": 1.328514814376831, "learning_rate": 2e-05, "loss": 0.05247537, "step": 13662 }, { "epoch": 27.326, "grad_norm": 3.411275625228882, "learning_rate": 2e-05, "loss": 0.05770024, "step": 13663 }, { "epoch": 27.328, "grad_norm": 1.2988200187683105, "learning_rate": 2e-05, "loss": 0.04476162, "step": 13664 }, { "epoch": 27.33, "grad_norm": 1.094386100769043, "learning_rate": 2e-05, "loss": 0.05379028, "step": 13665 }, { "epoch": 27.332, "grad_norm": 1.0226749181747437, "learning_rate": 2e-05, "loss": 0.04450128, "step": 13666 }, { "epoch": 27.334, "grad_norm": 1.1480973958969116, "learning_rate": 2e-05, "loss": 0.04870499, "step": 13667 }, { "epoch": 27.336, "grad_norm": 1.2726924419403076, "learning_rate": 2e-05, "loss": 0.04379404, "step": 13668 }, { "epoch": 27.338, "grad_norm": 1.7234653234481812, "learning_rate": 2e-05, "loss": 0.06291762, "step": 13669 }, { "epoch": 27.34, "grad_norm": 1.1524583101272583, "learning_rate": 2e-05, "loss": 0.04785874, "step": 13670 }, { "epoch": 27.342, "grad_norm": 1.5644360780715942, "learning_rate": 2e-05, "loss": 0.04766168, "step": 13671 }, { "epoch": 27.344, "grad_norm": 6.181217670440674, "learning_rate": 2e-05, "loss": 0.06027981, "step": 13672 }, { "epoch": 27.346, "grad_norm": 1.5024560689926147, "learning_rate": 2e-05, "loss": 0.05388537, "step": 13673 }, { "epoch": 27.348, "grad_norm": 1.0392744541168213, "learning_rate": 2e-05, "loss": 0.04798413, "step": 13674 }, { "epoch": 27.35, "grad_norm": 1.3223525285720825, "learning_rate": 2e-05, "loss": 0.0587207, "step": 13675 }, { "epoch": 27.352, "grad_norm": 1.1181870698928833, "learning_rate": 2e-05, "loss": 0.04296366, "step": 13676 }, { "epoch": 27.354, "grad_norm": 1.0634915828704834, "learning_rate": 2e-05, "loss": 0.04998367, "step": 13677 }, { "epoch": 27.356, "grad_norm": 1.6651540994644165, "learning_rate": 2e-05, "loss": 0.05943678, "step": 13678 }, { "epoch": 27.358, "grad_norm": 1.231003999710083, "learning_rate": 2e-05, "loss": 0.05515921, "step": 13679 }, { "epoch": 27.36, "grad_norm": 2.198192834854126, "learning_rate": 2e-05, "loss": 0.05521581, "step": 13680 }, { "epoch": 27.362, "grad_norm": 1.591314435005188, "learning_rate": 2e-05, "loss": 0.04444683, "step": 13681 }, { "epoch": 27.364, "grad_norm": 1.3259187936782837, "learning_rate": 2e-05, "loss": 0.04511594, "step": 13682 }, { "epoch": 27.366, "grad_norm": 1.0680851936340332, "learning_rate": 2e-05, "loss": 0.04297246, "step": 13683 }, { "epoch": 27.368, "grad_norm": 2.427532434463501, "learning_rate": 2e-05, "loss": 0.05413146, "step": 13684 }, { "epoch": 27.37, "grad_norm": 1.2107059955596924, "learning_rate": 2e-05, "loss": 0.0505407, "step": 13685 }, { "epoch": 27.372, "grad_norm": 1.6088509559631348, "learning_rate": 2e-05, "loss": 0.08271807, "step": 13686 }, { "epoch": 27.374, "grad_norm": 1.08454430103302, "learning_rate": 2e-05, "loss": 0.04307467, "step": 13687 }, { "epoch": 27.376, "grad_norm": 1.4270795583724976, "learning_rate": 2e-05, "loss": 0.05893801, "step": 13688 }, { "epoch": 27.378, "grad_norm": 1.019391655921936, "learning_rate": 2e-05, "loss": 0.03847574, "step": 13689 }, { "epoch": 27.38, "grad_norm": 1.6530576944351196, "learning_rate": 2e-05, "loss": 0.04754023, "step": 13690 }, { "epoch": 27.382, "grad_norm": 1.9182401895523071, "learning_rate": 2e-05, "loss": 0.06066131, "step": 13691 }, { "epoch": 27.384, "grad_norm": 1.1759978532791138, "learning_rate": 2e-05, "loss": 0.06101045, "step": 13692 }, { "epoch": 27.386, "grad_norm": 1.0552345514297485, "learning_rate": 2e-05, "loss": 0.04291611, "step": 13693 }, { "epoch": 27.388, "grad_norm": 2.6768200397491455, "learning_rate": 2e-05, "loss": 0.05817779, "step": 13694 }, { "epoch": 27.39, "grad_norm": 1.788456678390503, "learning_rate": 2e-05, "loss": 0.0740658, "step": 13695 }, { "epoch": 27.392, "grad_norm": 1.1295547485351562, "learning_rate": 2e-05, "loss": 0.04148526, "step": 13696 }, { "epoch": 27.394, "grad_norm": 1.2793599367141724, "learning_rate": 2e-05, "loss": 0.05736514, "step": 13697 }, { "epoch": 27.396, "grad_norm": 2.056825637817383, "learning_rate": 2e-05, "loss": 0.05165195, "step": 13698 }, { "epoch": 27.398, "grad_norm": 0.9963052868843079, "learning_rate": 2e-05, "loss": 0.04028187, "step": 13699 }, { "epoch": 27.4, "grad_norm": 1.3455522060394287, "learning_rate": 2e-05, "loss": 0.05624631, "step": 13700 }, { "epoch": 27.402, "grad_norm": 1.2473167181015015, "learning_rate": 2e-05, "loss": 0.04385392, "step": 13701 }, { "epoch": 27.404, "grad_norm": 1.037540078163147, "learning_rate": 2e-05, "loss": 0.04462228, "step": 13702 }, { "epoch": 27.406, "grad_norm": 1.2451199293136597, "learning_rate": 2e-05, "loss": 0.06110473, "step": 13703 }, { "epoch": 27.408, "grad_norm": 1.074033498764038, "learning_rate": 2e-05, "loss": 0.03319247, "step": 13704 }, { "epoch": 27.41, "grad_norm": 1.1430970430374146, "learning_rate": 2e-05, "loss": 0.04132932, "step": 13705 }, { "epoch": 27.412, "grad_norm": 1.7692979574203491, "learning_rate": 2e-05, "loss": 0.05424643, "step": 13706 }, { "epoch": 27.414, "grad_norm": 1.0685611963272095, "learning_rate": 2e-05, "loss": 0.03562991, "step": 13707 }, { "epoch": 27.416, "grad_norm": 1.6056164503097534, "learning_rate": 2e-05, "loss": 0.04753327, "step": 13708 }, { "epoch": 27.418, "grad_norm": 1.3051261901855469, "learning_rate": 2e-05, "loss": 0.04897521, "step": 13709 }, { "epoch": 27.42, "grad_norm": 1.4081815481185913, "learning_rate": 2e-05, "loss": 0.055948, "step": 13710 }, { "epoch": 27.422, "grad_norm": 1.1825768947601318, "learning_rate": 2e-05, "loss": 0.04976615, "step": 13711 }, { "epoch": 27.424, "grad_norm": 1.2105929851531982, "learning_rate": 2e-05, "loss": 0.04447066, "step": 13712 }, { "epoch": 27.426, "grad_norm": 1.27484130859375, "learning_rate": 2e-05, "loss": 0.05836751, "step": 13713 }, { "epoch": 27.428, "grad_norm": 1.7164219617843628, "learning_rate": 2e-05, "loss": 0.06305015, "step": 13714 }, { "epoch": 27.43, "grad_norm": 1.666743278503418, "learning_rate": 2e-05, "loss": 0.06220429, "step": 13715 }, { "epoch": 27.432, "grad_norm": 1.8280636072158813, "learning_rate": 2e-05, "loss": 0.05910287, "step": 13716 }, { "epoch": 27.434, "grad_norm": 1.7620023488998413, "learning_rate": 2e-05, "loss": 0.05800281, "step": 13717 }, { "epoch": 27.436, "grad_norm": 1.2569005489349365, "learning_rate": 2e-05, "loss": 0.05842879, "step": 13718 }, { "epoch": 27.438, "grad_norm": 1.7099578380584717, "learning_rate": 2e-05, "loss": 0.06419121, "step": 13719 }, { "epoch": 27.44, "grad_norm": 1.3108617067337036, "learning_rate": 2e-05, "loss": 0.07125148, "step": 13720 }, { "epoch": 27.442, "grad_norm": 0.9960970878601074, "learning_rate": 2e-05, "loss": 0.04110102, "step": 13721 }, { "epoch": 27.444, "grad_norm": 1.337522268295288, "learning_rate": 2e-05, "loss": 0.02819405, "step": 13722 }, { "epoch": 27.446, "grad_norm": 1.1323511600494385, "learning_rate": 2e-05, "loss": 0.04807519, "step": 13723 }, { "epoch": 27.448, "grad_norm": 2.4076807498931885, "learning_rate": 2e-05, "loss": 0.05359387, "step": 13724 }, { "epoch": 27.45, "grad_norm": 1.732336401939392, "learning_rate": 2e-05, "loss": 0.05767593, "step": 13725 }, { "epoch": 27.452, "grad_norm": 1.2247333526611328, "learning_rate": 2e-05, "loss": 0.05499199, "step": 13726 }, { "epoch": 27.454, "grad_norm": 1.1251083612442017, "learning_rate": 2e-05, "loss": 0.04646206, "step": 13727 }, { "epoch": 27.456, "grad_norm": 1.3075662851333618, "learning_rate": 2e-05, "loss": 0.04829896, "step": 13728 }, { "epoch": 27.458, "grad_norm": 4.016895771026611, "learning_rate": 2e-05, "loss": 0.04590724, "step": 13729 }, { "epoch": 27.46, "grad_norm": 1.162635087966919, "learning_rate": 2e-05, "loss": 0.05118883, "step": 13730 }, { "epoch": 27.462, "grad_norm": 1.188884973526001, "learning_rate": 2e-05, "loss": 0.05772907, "step": 13731 }, { "epoch": 27.464, "grad_norm": 1.2289090156555176, "learning_rate": 2e-05, "loss": 0.052696, "step": 13732 }, { "epoch": 27.466, "grad_norm": 1.2998096942901611, "learning_rate": 2e-05, "loss": 0.05408775, "step": 13733 }, { "epoch": 27.468, "grad_norm": 1.5252641439437866, "learning_rate": 2e-05, "loss": 0.05020402, "step": 13734 }, { "epoch": 27.47, "grad_norm": 1.1923192739486694, "learning_rate": 2e-05, "loss": 0.05518716, "step": 13735 }, { "epoch": 27.472, "grad_norm": 1.1531234979629517, "learning_rate": 2e-05, "loss": 0.04152745, "step": 13736 }, { "epoch": 27.474, "grad_norm": 1.3713219165802002, "learning_rate": 2e-05, "loss": 0.05860648, "step": 13737 }, { "epoch": 27.476, "grad_norm": 1.0815269947052002, "learning_rate": 2e-05, "loss": 0.03975716, "step": 13738 }, { "epoch": 27.478, "grad_norm": 1.3328443765640259, "learning_rate": 2e-05, "loss": 0.05969763, "step": 13739 }, { "epoch": 27.48, "grad_norm": 1.5476949214935303, "learning_rate": 2e-05, "loss": 0.06153233, "step": 13740 }, { "epoch": 27.482, "grad_norm": 2.1031620502471924, "learning_rate": 2e-05, "loss": 0.05920611, "step": 13741 }, { "epoch": 27.484, "grad_norm": 1.5141756534576416, "learning_rate": 2e-05, "loss": 0.05117217, "step": 13742 }, { "epoch": 27.486, "grad_norm": 1.3930394649505615, "learning_rate": 2e-05, "loss": 0.0457719, "step": 13743 }, { "epoch": 27.488, "grad_norm": 3.4706075191497803, "learning_rate": 2e-05, "loss": 0.05435749, "step": 13744 }, { "epoch": 27.49, "grad_norm": 1.361970067024231, "learning_rate": 2e-05, "loss": 0.05121572, "step": 13745 }, { "epoch": 27.492, "grad_norm": 1.496351718902588, "learning_rate": 2e-05, "loss": 0.06652565, "step": 13746 }, { "epoch": 27.494, "grad_norm": 2.256734848022461, "learning_rate": 2e-05, "loss": 0.05830427, "step": 13747 }, { "epoch": 27.496, "grad_norm": 1.2743185758590698, "learning_rate": 2e-05, "loss": 0.05657552, "step": 13748 }, { "epoch": 27.498, "grad_norm": 1.1020525693893433, "learning_rate": 2e-05, "loss": 0.05559982, "step": 13749 }, { "epoch": 27.5, "grad_norm": 2.075915575027466, "learning_rate": 2e-05, "loss": 0.06269626, "step": 13750 }, { "epoch": 27.502, "grad_norm": 1.060697317123413, "learning_rate": 2e-05, "loss": 0.0584397, "step": 13751 }, { "epoch": 27.504, "grad_norm": 0.8998603224754333, "learning_rate": 2e-05, "loss": 0.03698571, "step": 13752 }, { "epoch": 27.506, "grad_norm": 1.1477724313735962, "learning_rate": 2e-05, "loss": 0.057973, "step": 13753 }, { "epoch": 27.508, "grad_norm": 1.012613296508789, "learning_rate": 2e-05, "loss": 0.05348552, "step": 13754 }, { "epoch": 27.51, "grad_norm": 1.275446891784668, "learning_rate": 2e-05, "loss": 0.05122288, "step": 13755 }, { "epoch": 27.512, "grad_norm": 1.2952581644058228, "learning_rate": 2e-05, "loss": 0.0512023, "step": 13756 }, { "epoch": 27.514, "grad_norm": 1.8725554943084717, "learning_rate": 2e-05, "loss": 0.07112886, "step": 13757 }, { "epoch": 27.516, "grad_norm": 1.103540301322937, "learning_rate": 2e-05, "loss": 0.0397427, "step": 13758 }, { "epoch": 27.518, "grad_norm": 2.023007392883301, "learning_rate": 2e-05, "loss": 0.05158167, "step": 13759 }, { "epoch": 27.52, "grad_norm": 0.8484739065170288, "learning_rate": 2e-05, "loss": 0.03641016, "step": 13760 }, { "epoch": 27.522, "grad_norm": 1.3486119508743286, "learning_rate": 2e-05, "loss": 0.03948713, "step": 13761 }, { "epoch": 27.524, "grad_norm": 1.2041372060775757, "learning_rate": 2e-05, "loss": 0.03497097, "step": 13762 }, { "epoch": 27.526, "grad_norm": 2.085909843444824, "learning_rate": 2e-05, "loss": 0.03267628, "step": 13763 }, { "epoch": 27.528, "grad_norm": 1.6440179347991943, "learning_rate": 2e-05, "loss": 0.04585116, "step": 13764 }, { "epoch": 27.53, "grad_norm": 0.9636499285697937, "learning_rate": 2e-05, "loss": 0.03647757, "step": 13765 }, { "epoch": 27.532, "grad_norm": 1.3876582384109497, "learning_rate": 2e-05, "loss": 0.04596443, "step": 13766 }, { "epoch": 27.534, "grad_norm": 1.1591322422027588, "learning_rate": 2e-05, "loss": 0.05923095, "step": 13767 }, { "epoch": 27.536, "grad_norm": 1.2394334077835083, "learning_rate": 2e-05, "loss": 0.0504396, "step": 13768 }, { "epoch": 27.538, "grad_norm": 1.33267343044281, "learning_rate": 2e-05, "loss": 0.06966679, "step": 13769 }, { "epoch": 27.54, "grad_norm": 1.1768724918365479, "learning_rate": 2e-05, "loss": 0.04107112, "step": 13770 }, { "epoch": 27.542, "grad_norm": 1.3187763690948486, "learning_rate": 2e-05, "loss": 0.06226375, "step": 13771 }, { "epoch": 27.544, "grad_norm": 1.20506751537323, "learning_rate": 2e-05, "loss": 0.05537635, "step": 13772 }, { "epoch": 27.546, "grad_norm": 0.984427273273468, "learning_rate": 2e-05, "loss": 0.03905368, "step": 13773 }, { "epoch": 27.548000000000002, "grad_norm": 1.2565078735351562, "learning_rate": 2e-05, "loss": 0.05029213, "step": 13774 }, { "epoch": 27.55, "grad_norm": 1.2640584707260132, "learning_rate": 2e-05, "loss": 0.04651093, "step": 13775 }, { "epoch": 27.552, "grad_norm": 1.1570326089859009, "learning_rate": 2e-05, "loss": 0.04353457, "step": 13776 }, { "epoch": 27.554, "grad_norm": 1.8831313848495483, "learning_rate": 2e-05, "loss": 0.05737369, "step": 13777 }, { "epoch": 27.556, "grad_norm": 1.2131006717681885, "learning_rate": 2e-05, "loss": 0.04108252, "step": 13778 }, { "epoch": 27.558, "grad_norm": 1.1210381984710693, "learning_rate": 2e-05, "loss": 0.05285828, "step": 13779 }, { "epoch": 27.56, "grad_norm": 1.4638532400131226, "learning_rate": 2e-05, "loss": 0.04803923, "step": 13780 }, { "epoch": 27.562, "grad_norm": 1.148979663848877, "learning_rate": 2e-05, "loss": 0.06066269, "step": 13781 }, { "epoch": 27.564, "grad_norm": 1.5923223495483398, "learning_rate": 2e-05, "loss": 0.04082999, "step": 13782 }, { "epoch": 27.566, "grad_norm": 1.1766352653503418, "learning_rate": 2e-05, "loss": 0.04370734, "step": 13783 }, { "epoch": 27.568, "grad_norm": 1.0209811925888062, "learning_rate": 2e-05, "loss": 0.0441969, "step": 13784 }, { "epoch": 27.57, "grad_norm": 1.0491085052490234, "learning_rate": 2e-05, "loss": 0.05175408, "step": 13785 }, { "epoch": 27.572, "grad_norm": 1.2934017181396484, "learning_rate": 2e-05, "loss": 0.05024385, "step": 13786 }, { "epoch": 27.574, "grad_norm": 1.0555508136749268, "learning_rate": 2e-05, "loss": 0.04476925, "step": 13787 }, { "epoch": 27.576, "grad_norm": 1.519317388534546, "learning_rate": 2e-05, "loss": 0.06816263, "step": 13788 }, { "epoch": 27.578, "grad_norm": 2.574275255203247, "learning_rate": 2e-05, "loss": 0.06771293, "step": 13789 }, { "epoch": 27.58, "grad_norm": 1.64373779296875, "learning_rate": 2e-05, "loss": 0.04614891, "step": 13790 }, { "epoch": 27.582, "grad_norm": 0.9626046419143677, "learning_rate": 2e-05, "loss": 0.04089271, "step": 13791 }, { "epoch": 27.584, "grad_norm": 0.909767210483551, "learning_rate": 2e-05, "loss": 0.04194653, "step": 13792 }, { "epoch": 27.586, "grad_norm": 1.0245842933654785, "learning_rate": 2e-05, "loss": 0.0385797, "step": 13793 }, { "epoch": 27.588, "grad_norm": 1.1717418432235718, "learning_rate": 2e-05, "loss": 0.04517675, "step": 13794 }, { "epoch": 27.59, "grad_norm": 3.440595865249634, "learning_rate": 2e-05, "loss": 0.05503734, "step": 13795 }, { "epoch": 27.592, "grad_norm": 1.4660612344741821, "learning_rate": 2e-05, "loss": 0.06340294, "step": 13796 }, { "epoch": 27.594, "grad_norm": 1.2383249998092651, "learning_rate": 2e-05, "loss": 0.04243916, "step": 13797 }, { "epoch": 27.596, "grad_norm": 1.4384685754776, "learning_rate": 2e-05, "loss": 0.06041228, "step": 13798 }, { "epoch": 27.598, "grad_norm": 1.2475026845932007, "learning_rate": 2e-05, "loss": 0.05659985, "step": 13799 }, { "epoch": 27.6, "grad_norm": 1.1132606267929077, "learning_rate": 2e-05, "loss": 0.05860177, "step": 13800 }, { "epoch": 27.602, "grad_norm": 1.5617867708206177, "learning_rate": 2e-05, "loss": 0.05068574, "step": 13801 }, { "epoch": 27.604, "grad_norm": 1.4931225776672363, "learning_rate": 2e-05, "loss": 0.05397515, "step": 13802 }, { "epoch": 27.606, "grad_norm": 1.4573091268539429, "learning_rate": 2e-05, "loss": 0.06062449, "step": 13803 }, { "epoch": 27.608, "grad_norm": 1.362664818763733, "learning_rate": 2e-05, "loss": 0.04964469, "step": 13804 }, { "epoch": 27.61, "grad_norm": 1.3979135751724243, "learning_rate": 2e-05, "loss": 0.05811334, "step": 13805 }, { "epoch": 27.612, "grad_norm": 1.2293182611465454, "learning_rate": 2e-05, "loss": 0.05415693, "step": 13806 }, { "epoch": 27.614, "grad_norm": 1.06702721118927, "learning_rate": 2e-05, "loss": 0.0383089, "step": 13807 }, { "epoch": 27.616, "grad_norm": 1.3986848592758179, "learning_rate": 2e-05, "loss": 0.06884108, "step": 13808 }, { "epoch": 27.618, "grad_norm": 1.1494394540786743, "learning_rate": 2e-05, "loss": 0.04200149, "step": 13809 }, { "epoch": 27.62, "grad_norm": 1.434314489364624, "learning_rate": 2e-05, "loss": 0.04495163, "step": 13810 }, { "epoch": 27.622, "grad_norm": 1.771521806716919, "learning_rate": 2e-05, "loss": 0.05346426, "step": 13811 }, { "epoch": 27.624, "grad_norm": 1.658406376838684, "learning_rate": 2e-05, "loss": 0.07082607, "step": 13812 }, { "epoch": 27.626, "grad_norm": 1.1903034448623657, "learning_rate": 2e-05, "loss": 0.05057081, "step": 13813 }, { "epoch": 27.628, "grad_norm": 1.185341477394104, "learning_rate": 2e-05, "loss": 0.04183183, "step": 13814 }, { "epoch": 27.63, "grad_norm": 1.4053906202316284, "learning_rate": 2e-05, "loss": 0.04691254, "step": 13815 }, { "epoch": 27.632, "grad_norm": 1.786252498626709, "learning_rate": 2e-05, "loss": 0.06172635, "step": 13816 }, { "epoch": 27.634, "grad_norm": 1.634802222251892, "learning_rate": 2e-05, "loss": 0.04311569, "step": 13817 }, { "epoch": 27.636, "grad_norm": 1.7363250255584717, "learning_rate": 2e-05, "loss": 0.05510516, "step": 13818 }, { "epoch": 27.638, "grad_norm": 1.1772897243499756, "learning_rate": 2e-05, "loss": 0.05120692, "step": 13819 }, { "epoch": 27.64, "grad_norm": 1.1961190700531006, "learning_rate": 2e-05, "loss": 0.04048288, "step": 13820 }, { "epoch": 27.642, "grad_norm": 1.30369234085083, "learning_rate": 2e-05, "loss": 0.04853419, "step": 13821 }, { "epoch": 27.644, "grad_norm": 1.0288712978363037, "learning_rate": 2e-05, "loss": 0.04480207, "step": 13822 }, { "epoch": 27.646, "grad_norm": 1.1966861486434937, "learning_rate": 2e-05, "loss": 0.05335034, "step": 13823 }, { "epoch": 27.648, "grad_norm": 1.1542290449142456, "learning_rate": 2e-05, "loss": 0.04805242, "step": 13824 }, { "epoch": 27.65, "grad_norm": 1.51029372215271, "learning_rate": 2e-05, "loss": 0.05097276, "step": 13825 }, { "epoch": 27.652, "grad_norm": 1.0829248428344727, "learning_rate": 2e-05, "loss": 0.04303951, "step": 13826 }, { "epoch": 27.654, "grad_norm": 1.3713996410369873, "learning_rate": 2e-05, "loss": 0.04612532, "step": 13827 }, { "epoch": 27.656, "grad_norm": 1.5636634826660156, "learning_rate": 2e-05, "loss": 0.0650598, "step": 13828 }, { "epoch": 27.658, "grad_norm": 1.4010292291641235, "learning_rate": 2e-05, "loss": 0.06315856, "step": 13829 }, { "epoch": 27.66, "grad_norm": 0.9139993786811829, "learning_rate": 2e-05, "loss": 0.03729622, "step": 13830 }, { "epoch": 27.662, "grad_norm": 1.4215222597122192, "learning_rate": 2e-05, "loss": 0.05179662, "step": 13831 }, { "epoch": 27.664, "grad_norm": 2.481299877166748, "learning_rate": 2e-05, "loss": 0.06742984, "step": 13832 }, { "epoch": 27.666, "grad_norm": 1.185290813446045, "learning_rate": 2e-05, "loss": 0.04957722, "step": 13833 }, { "epoch": 27.668, "grad_norm": 1.2204636335372925, "learning_rate": 2e-05, "loss": 0.04205001, "step": 13834 }, { "epoch": 27.67, "grad_norm": 0.9806143641471863, "learning_rate": 2e-05, "loss": 0.03496443, "step": 13835 }, { "epoch": 27.672, "grad_norm": 1.3551045656204224, "learning_rate": 2e-05, "loss": 0.05290792, "step": 13836 }, { "epoch": 27.674, "grad_norm": 0.9434770941734314, "learning_rate": 2e-05, "loss": 0.03982441, "step": 13837 }, { "epoch": 27.676, "grad_norm": 1.2684959173202515, "learning_rate": 2e-05, "loss": 0.03757454, "step": 13838 }, { "epoch": 27.678, "grad_norm": 0.9965443015098572, "learning_rate": 2e-05, "loss": 0.04075042, "step": 13839 }, { "epoch": 27.68, "grad_norm": 1.276584267616272, "learning_rate": 2e-05, "loss": 0.05190865, "step": 13840 }, { "epoch": 27.682, "grad_norm": 1.1221213340759277, "learning_rate": 2e-05, "loss": 0.04872163, "step": 13841 }, { "epoch": 27.684, "grad_norm": 1.4311329126358032, "learning_rate": 2e-05, "loss": 0.05780797, "step": 13842 }, { "epoch": 27.686, "grad_norm": 0.9199719429016113, "learning_rate": 2e-05, "loss": 0.03807329, "step": 13843 }, { "epoch": 27.688, "grad_norm": 2.2727510929107666, "learning_rate": 2e-05, "loss": 0.04805681, "step": 13844 }, { "epoch": 27.69, "grad_norm": 1.3352643251419067, "learning_rate": 2e-05, "loss": 0.05965505, "step": 13845 }, { "epoch": 27.692, "grad_norm": 1.0805542469024658, "learning_rate": 2e-05, "loss": 0.0389508, "step": 13846 }, { "epoch": 27.694, "grad_norm": 2.0555317401885986, "learning_rate": 2e-05, "loss": 0.04444616, "step": 13847 }, { "epoch": 27.696, "grad_norm": 2.029655933380127, "learning_rate": 2e-05, "loss": 0.05219391, "step": 13848 }, { "epoch": 27.698, "grad_norm": 1.1439883708953857, "learning_rate": 2e-05, "loss": 0.03246567, "step": 13849 }, { "epoch": 27.7, "grad_norm": 1.1407350301742554, "learning_rate": 2e-05, "loss": 0.05633612, "step": 13850 }, { "epoch": 27.701999999999998, "grad_norm": 1.1266043186187744, "learning_rate": 2e-05, "loss": 0.0420356, "step": 13851 }, { "epoch": 27.704, "grad_norm": 1.113688349723816, "learning_rate": 2e-05, "loss": 0.04947349, "step": 13852 }, { "epoch": 27.706, "grad_norm": 7.0289716720581055, "learning_rate": 2e-05, "loss": 0.06139268, "step": 13853 }, { "epoch": 27.708, "grad_norm": 1.280626654624939, "learning_rate": 2e-05, "loss": 0.05660423, "step": 13854 }, { "epoch": 27.71, "grad_norm": 1.4162342548370361, "learning_rate": 2e-05, "loss": 0.04602627, "step": 13855 }, { "epoch": 27.712, "grad_norm": 1.1193690299987793, "learning_rate": 2e-05, "loss": 0.05793196, "step": 13856 }, { "epoch": 27.714, "grad_norm": 2.0348377227783203, "learning_rate": 2e-05, "loss": 0.04950704, "step": 13857 }, { "epoch": 27.716, "grad_norm": 1.6183253526687622, "learning_rate": 2e-05, "loss": 0.05771424, "step": 13858 }, { "epoch": 27.718, "grad_norm": 1.3953875303268433, "learning_rate": 2e-05, "loss": 0.03940511, "step": 13859 }, { "epoch": 27.72, "grad_norm": 1.0033621788024902, "learning_rate": 2e-05, "loss": 0.0361148, "step": 13860 }, { "epoch": 27.722, "grad_norm": 1.1963427066802979, "learning_rate": 2e-05, "loss": 0.04811078, "step": 13861 }, { "epoch": 27.724, "grad_norm": 1.1764371395111084, "learning_rate": 2e-05, "loss": 0.05527811, "step": 13862 }, { "epoch": 27.726, "grad_norm": 1.1694661378860474, "learning_rate": 2e-05, "loss": 0.05597811, "step": 13863 }, { "epoch": 27.728, "grad_norm": 2.0215795040130615, "learning_rate": 2e-05, "loss": 0.04680409, "step": 13864 }, { "epoch": 27.73, "grad_norm": 1.7509485483169556, "learning_rate": 2e-05, "loss": 0.04974722, "step": 13865 }, { "epoch": 27.732, "grad_norm": 1.3956106901168823, "learning_rate": 2e-05, "loss": 0.05899543, "step": 13866 }, { "epoch": 27.734, "grad_norm": 1.1714872121810913, "learning_rate": 2e-05, "loss": 0.05479924, "step": 13867 }, { "epoch": 27.736, "grad_norm": 1.4502583742141724, "learning_rate": 2e-05, "loss": 0.05704089, "step": 13868 }, { "epoch": 27.738, "grad_norm": 3.081385374069214, "learning_rate": 2e-05, "loss": 0.04889526, "step": 13869 }, { "epoch": 27.74, "grad_norm": 1.7258799076080322, "learning_rate": 2e-05, "loss": 0.03984695, "step": 13870 }, { "epoch": 27.742, "grad_norm": 1.6187059879302979, "learning_rate": 2e-05, "loss": 0.04262181, "step": 13871 }, { "epoch": 27.744, "grad_norm": 0.9548773169517517, "learning_rate": 2e-05, "loss": 0.04156426, "step": 13872 }, { "epoch": 27.746, "grad_norm": 1.181675910949707, "learning_rate": 2e-05, "loss": 0.03379217, "step": 13873 }, { "epoch": 27.748, "grad_norm": 0.9974355697631836, "learning_rate": 2e-05, "loss": 0.03629694, "step": 13874 }, { "epoch": 27.75, "grad_norm": 1.0894088745117188, "learning_rate": 2e-05, "loss": 0.04181784, "step": 13875 }, { "epoch": 27.752, "grad_norm": 1.3820403814315796, "learning_rate": 2e-05, "loss": 0.05686478, "step": 13876 }, { "epoch": 27.754, "grad_norm": 1.0543464422225952, "learning_rate": 2e-05, "loss": 0.04107931, "step": 13877 }, { "epoch": 27.756, "grad_norm": 1.085068702697754, "learning_rate": 2e-05, "loss": 0.04298123, "step": 13878 }, { "epoch": 27.758, "grad_norm": 1.3363054990768433, "learning_rate": 2e-05, "loss": 0.05749026, "step": 13879 }, { "epoch": 27.76, "grad_norm": 1.1313159465789795, "learning_rate": 2e-05, "loss": 0.03738141, "step": 13880 }, { "epoch": 27.762, "grad_norm": 0.9753918051719666, "learning_rate": 2e-05, "loss": 0.03613247, "step": 13881 }, { "epoch": 27.764, "grad_norm": 2.3439650535583496, "learning_rate": 2e-05, "loss": 0.07411982, "step": 13882 }, { "epoch": 27.766, "grad_norm": 0.8631768822669983, "learning_rate": 2e-05, "loss": 0.03058292, "step": 13883 }, { "epoch": 27.768, "grad_norm": 0.898760974407196, "learning_rate": 2e-05, "loss": 0.03849625, "step": 13884 }, { "epoch": 27.77, "grad_norm": 1.0752990245819092, "learning_rate": 2e-05, "loss": 0.04668605, "step": 13885 }, { "epoch": 27.772, "grad_norm": 1.4357421398162842, "learning_rate": 2e-05, "loss": 0.0589397, "step": 13886 }, { "epoch": 27.774, "grad_norm": 1.095463514328003, "learning_rate": 2e-05, "loss": 0.0414239, "step": 13887 }, { "epoch": 27.776, "grad_norm": 1.1844574213027954, "learning_rate": 2e-05, "loss": 0.06904715, "step": 13888 }, { "epoch": 27.778, "grad_norm": 1.182238221168518, "learning_rate": 2e-05, "loss": 0.04742506, "step": 13889 }, { "epoch": 27.78, "grad_norm": 1.0911122560501099, "learning_rate": 2e-05, "loss": 0.0477729, "step": 13890 }, { "epoch": 27.782, "grad_norm": 0.9467042684555054, "learning_rate": 2e-05, "loss": 0.03461124, "step": 13891 }, { "epoch": 27.784, "grad_norm": 1.2857006788253784, "learning_rate": 2e-05, "loss": 0.07485963, "step": 13892 }, { "epoch": 27.786, "grad_norm": 1.107034683227539, "learning_rate": 2e-05, "loss": 0.04877251, "step": 13893 }, { "epoch": 27.788, "grad_norm": 1.021686315536499, "learning_rate": 2e-05, "loss": 0.04908875, "step": 13894 }, { "epoch": 27.79, "grad_norm": 1.2403331995010376, "learning_rate": 2e-05, "loss": 0.04559707, "step": 13895 }, { "epoch": 27.792, "grad_norm": 1.2987374067306519, "learning_rate": 2e-05, "loss": 0.05640421, "step": 13896 }, { "epoch": 27.794, "grad_norm": 2.034902572631836, "learning_rate": 2e-05, "loss": 0.06132263, "step": 13897 }, { "epoch": 27.796, "grad_norm": 0.8802751302719116, "learning_rate": 2e-05, "loss": 0.03111104, "step": 13898 }, { "epoch": 27.798000000000002, "grad_norm": 1.2100516557693481, "learning_rate": 2e-05, "loss": 0.0364811, "step": 13899 }, { "epoch": 27.8, "grad_norm": 0.9517601132392883, "learning_rate": 2e-05, "loss": 0.03731903, "step": 13900 }, { "epoch": 27.802, "grad_norm": 1.4495230913162231, "learning_rate": 2e-05, "loss": 0.05857515, "step": 13901 }, { "epoch": 27.804, "grad_norm": 1.176120400428772, "learning_rate": 2e-05, "loss": 0.04578595, "step": 13902 }, { "epoch": 27.806, "grad_norm": 1.0333614349365234, "learning_rate": 2e-05, "loss": 0.04311878, "step": 13903 }, { "epoch": 27.808, "grad_norm": 2.1322693824768066, "learning_rate": 2e-05, "loss": 0.06819106, "step": 13904 }, { "epoch": 27.81, "grad_norm": 1.1993663311004639, "learning_rate": 2e-05, "loss": 0.05295255, "step": 13905 }, { "epoch": 27.812, "grad_norm": 1.1196625232696533, "learning_rate": 2e-05, "loss": 0.04639095, "step": 13906 }, { "epoch": 27.814, "grad_norm": 1.2885843515396118, "learning_rate": 2e-05, "loss": 0.04978818, "step": 13907 }, { "epoch": 27.816, "grad_norm": 0.8948124051094055, "learning_rate": 2e-05, "loss": 0.03198558, "step": 13908 }, { "epoch": 27.818, "grad_norm": 1.6860870122909546, "learning_rate": 2e-05, "loss": 0.06135628, "step": 13909 }, { "epoch": 27.82, "grad_norm": 1.1558820009231567, "learning_rate": 2e-05, "loss": 0.03576055, "step": 13910 }, { "epoch": 27.822, "grad_norm": 1.064172387123108, "learning_rate": 2e-05, "loss": 0.04480666, "step": 13911 }, { "epoch": 27.824, "grad_norm": 1.1855511665344238, "learning_rate": 2e-05, "loss": 0.04820254, "step": 13912 }, { "epoch": 27.826, "grad_norm": 1.1401631832122803, "learning_rate": 2e-05, "loss": 0.04387826, "step": 13913 }, { "epoch": 27.828, "grad_norm": 1.352203607559204, "learning_rate": 2e-05, "loss": 0.05549879, "step": 13914 }, { "epoch": 27.83, "grad_norm": 1.1364407539367676, "learning_rate": 2e-05, "loss": 0.05167679, "step": 13915 }, { "epoch": 27.832, "grad_norm": 1.8792164325714111, "learning_rate": 2e-05, "loss": 0.0588867, "step": 13916 }, { "epoch": 27.834, "grad_norm": 1.0816301107406616, "learning_rate": 2e-05, "loss": 0.04240282, "step": 13917 }, { "epoch": 27.836, "grad_norm": 1.1816086769104004, "learning_rate": 2e-05, "loss": 0.06508492, "step": 13918 }, { "epoch": 27.838, "grad_norm": 1.1822892427444458, "learning_rate": 2e-05, "loss": 0.05014569, "step": 13919 }, { "epoch": 27.84, "grad_norm": 4.474466323852539, "learning_rate": 2e-05, "loss": 0.05703727, "step": 13920 }, { "epoch": 27.842, "grad_norm": 0.9988319277763367, "learning_rate": 2e-05, "loss": 0.03302813, "step": 13921 }, { "epoch": 27.844, "grad_norm": 1.101577639579773, "learning_rate": 2e-05, "loss": 0.04120149, "step": 13922 }, { "epoch": 27.846, "grad_norm": 2.459514856338501, "learning_rate": 2e-05, "loss": 0.05404228, "step": 13923 }, { "epoch": 27.848, "grad_norm": 1.5336848497390747, "learning_rate": 2e-05, "loss": 0.06002779, "step": 13924 }, { "epoch": 27.85, "grad_norm": 2.14388108253479, "learning_rate": 2e-05, "loss": 0.04957658, "step": 13925 }, { "epoch": 27.852, "grad_norm": 1.245545744895935, "learning_rate": 2e-05, "loss": 0.04385869, "step": 13926 }, { "epoch": 27.854, "grad_norm": 1.1639904975891113, "learning_rate": 2e-05, "loss": 0.05318103, "step": 13927 }, { "epoch": 27.856, "grad_norm": 1.1186693906784058, "learning_rate": 2e-05, "loss": 0.0422943, "step": 13928 }, { "epoch": 27.858, "grad_norm": 1.220611333847046, "learning_rate": 2e-05, "loss": 0.05335102, "step": 13929 }, { "epoch": 27.86, "grad_norm": 1.3203026056289673, "learning_rate": 2e-05, "loss": 0.05414462, "step": 13930 }, { "epoch": 27.862, "grad_norm": 1.1504926681518555, "learning_rate": 2e-05, "loss": 0.03921906, "step": 13931 }, { "epoch": 27.864, "grad_norm": 1.1838384866714478, "learning_rate": 2e-05, "loss": 0.0498357, "step": 13932 }, { "epoch": 27.866, "grad_norm": 1.2185877561569214, "learning_rate": 2e-05, "loss": 0.04333254, "step": 13933 }, { "epoch": 27.868, "grad_norm": 1.2446582317352295, "learning_rate": 2e-05, "loss": 0.04616841, "step": 13934 }, { "epoch": 27.87, "grad_norm": 1.9660826921463013, "learning_rate": 2e-05, "loss": 0.05890882, "step": 13935 }, { "epoch": 27.872, "grad_norm": 2.022188663482666, "learning_rate": 2e-05, "loss": 0.05026356, "step": 13936 }, { "epoch": 27.874, "grad_norm": 1.3820898532867432, "learning_rate": 2e-05, "loss": 0.05485899, "step": 13937 }, { "epoch": 27.876, "grad_norm": 1.2792017459869385, "learning_rate": 2e-05, "loss": 0.04454219, "step": 13938 }, { "epoch": 27.878, "grad_norm": 1.453120470046997, "learning_rate": 2e-05, "loss": 0.0500898, "step": 13939 }, { "epoch": 27.88, "grad_norm": 1.4074959754943848, "learning_rate": 2e-05, "loss": 0.06458472, "step": 13940 }, { "epoch": 27.882, "grad_norm": 1.2896713018417358, "learning_rate": 2e-05, "loss": 0.04736995, "step": 13941 }, { "epoch": 27.884, "grad_norm": 1.1082723140716553, "learning_rate": 2e-05, "loss": 0.0422068, "step": 13942 }, { "epoch": 27.886, "grad_norm": 1.1270164251327515, "learning_rate": 2e-05, "loss": 0.04499193, "step": 13943 }, { "epoch": 27.888, "grad_norm": 1.3460699319839478, "learning_rate": 2e-05, "loss": 0.05188408, "step": 13944 }, { "epoch": 27.89, "grad_norm": 1.223083734512329, "learning_rate": 2e-05, "loss": 0.04043397, "step": 13945 }, { "epoch": 27.892, "grad_norm": 2.040724992752075, "learning_rate": 2e-05, "loss": 0.0713658, "step": 13946 }, { "epoch": 27.894, "grad_norm": 3.084088087081909, "learning_rate": 2e-05, "loss": 0.06303418, "step": 13947 }, { "epoch": 27.896, "grad_norm": 1.3216333389282227, "learning_rate": 2e-05, "loss": 0.05471015, "step": 13948 }, { "epoch": 27.898, "grad_norm": 3.6924383640289307, "learning_rate": 2e-05, "loss": 0.05341459, "step": 13949 }, { "epoch": 27.9, "grad_norm": 1.192460536956787, "learning_rate": 2e-05, "loss": 0.05586625, "step": 13950 }, { "epoch": 27.902, "grad_norm": 1.10248601436615, "learning_rate": 2e-05, "loss": 0.04660258, "step": 13951 }, { "epoch": 27.904, "grad_norm": 1.591509461402893, "learning_rate": 2e-05, "loss": 0.06772734, "step": 13952 }, { "epoch": 27.906, "grad_norm": 1.336363673210144, "learning_rate": 2e-05, "loss": 0.07250066, "step": 13953 }, { "epoch": 27.908, "grad_norm": 1.3499382734298706, "learning_rate": 2e-05, "loss": 0.04728671, "step": 13954 }, { "epoch": 27.91, "grad_norm": 1.3442902565002441, "learning_rate": 2e-05, "loss": 0.06382729, "step": 13955 }, { "epoch": 27.912, "grad_norm": 1.2155158519744873, "learning_rate": 2e-05, "loss": 0.05304793, "step": 13956 }, { "epoch": 27.914, "grad_norm": 1.4830251932144165, "learning_rate": 2e-05, "loss": 0.05614383, "step": 13957 }, { "epoch": 27.916, "grad_norm": 1.0681856870651245, "learning_rate": 2e-05, "loss": 0.05393625, "step": 13958 }, { "epoch": 27.918, "grad_norm": 1.0325919389724731, "learning_rate": 2e-05, "loss": 0.04755799, "step": 13959 }, { "epoch": 27.92, "grad_norm": 1.2546429634094238, "learning_rate": 2e-05, "loss": 0.05811261, "step": 13960 }, { "epoch": 27.922, "grad_norm": 1.0625957250595093, "learning_rate": 2e-05, "loss": 0.05381419, "step": 13961 }, { "epoch": 27.924, "grad_norm": 1.291330337524414, "learning_rate": 2e-05, "loss": 0.03868761, "step": 13962 }, { "epoch": 27.926, "grad_norm": 1.3198494911193848, "learning_rate": 2e-05, "loss": 0.05909553, "step": 13963 }, { "epoch": 27.928, "grad_norm": 1.454842209815979, "learning_rate": 2e-05, "loss": 0.0611601, "step": 13964 }, { "epoch": 27.93, "grad_norm": 1.0169469118118286, "learning_rate": 2e-05, "loss": 0.04105099, "step": 13965 }, { "epoch": 27.932, "grad_norm": 1.1746594905853271, "learning_rate": 2e-05, "loss": 0.05205319, "step": 13966 }, { "epoch": 27.934, "grad_norm": 1.6182756423950195, "learning_rate": 2e-05, "loss": 0.04446393, "step": 13967 }, { "epoch": 27.936, "grad_norm": 1.2762821912765503, "learning_rate": 2e-05, "loss": 0.04054119, "step": 13968 }, { "epoch": 27.938, "grad_norm": 0.9889112114906311, "learning_rate": 2e-05, "loss": 0.04223812, "step": 13969 }, { "epoch": 27.94, "grad_norm": 1.5975980758666992, "learning_rate": 2e-05, "loss": 0.039861, "step": 13970 }, { "epoch": 27.942, "grad_norm": 1.1211752891540527, "learning_rate": 2e-05, "loss": 0.04917958, "step": 13971 }, { "epoch": 27.944, "grad_norm": 1.382899522781372, "learning_rate": 2e-05, "loss": 0.04696003, "step": 13972 }, { "epoch": 27.946, "grad_norm": 1.018843173980713, "learning_rate": 2e-05, "loss": 0.03948437, "step": 13973 }, { "epoch": 27.948, "grad_norm": 0.9556752443313599, "learning_rate": 2e-05, "loss": 0.04078113, "step": 13974 }, { "epoch": 27.95, "grad_norm": 4.137959003448486, "learning_rate": 2e-05, "loss": 0.05063497, "step": 13975 }, { "epoch": 27.951999999999998, "grad_norm": 1.122243046760559, "learning_rate": 2e-05, "loss": 0.03610418, "step": 13976 }, { "epoch": 27.954, "grad_norm": 1.122756838798523, "learning_rate": 2e-05, "loss": 0.04875606, "step": 13977 }, { "epoch": 27.956, "grad_norm": 1.117292881011963, "learning_rate": 2e-05, "loss": 0.05630633, "step": 13978 }, { "epoch": 27.958, "grad_norm": 1.0666700601577759, "learning_rate": 2e-05, "loss": 0.05083863, "step": 13979 }, { "epoch": 27.96, "grad_norm": 1.36178719997406, "learning_rate": 2e-05, "loss": 0.0418558, "step": 13980 }, { "epoch": 27.962, "grad_norm": 1.7230415344238281, "learning_rate": 2e-05, "loss": 0.06059814, "step": 13981 }, { "epoch": 27.964, "grad_norm": 1.1696524620056152, "learning_rate": 2e-05, "loss": 0.0435648, "step": 13982 }, { "epoch": 27.966, "grad_norm": 1.1608043909072876, "learning_rate": 2e-05, "loss": 0.04516421, "step": 13983 }, { "epoch": 27.968, "grad_norm": 1.7001107931137085, "learning_rate": 2e-05, "loss": 0.05355493, "step": 13984 }, { "epoch": 27.97, "grad_norm": 1.5130566358566284, "learning_rate": 2e-05, "loss": 0.04900081, "step": 13985 }, { "epoch": 27.972, "grad_norm": 2.3197219371795654, "learning_rate": 2e-05, "loss": 0.06461625, "step": 13986 }, { "epoch": 27.974, "grad_norm": 1.0300438404083252, "learning_rate": 2e-05, "loss": 0.04541411, "step": 13987 }, { "epoch": 27.976, "grad_norm": 3.203516721725464, "learning_rate": 2e-05, "loss": 0.05565455, "step": 13988 }, { "epoch": 27.978, "grad_norm": 0.9678990840911865, "learning_rate": 2e-05, "loss": 0.04628026, "step": 13989 }, { "epoch": 27.98, "grad_norm": 1.0995022058486938, "learning_rate": 2e-05, "loss": 0.05342911, "step": 13990 }, { "epoch": 27.982, "grad_norm": 1.0251004695892334, "learning_rate": 2e-05, "loss": 0.03990908, "step": 13991 }, { "epoch": 27.984, "grad_norm": 1.0145243406295776, "learning_rate": 2e-05, "loss": 0.04830322, "step": 13992 }, { "epoch": 27.986, "grad_norm": 3.732783555984497, "learning_rate": 2e-05, "loss": 0.05173131, "step": 13993 }, { "epoch": 27.988, "grad_norm": 1.5884466171264648, "learning_rate": 2e-05, "loss": 0.05113035, "step": 13994 }, { "epoch": 27.99, "grad_norm": 1.1071032285690308, "learning_rate": 2e-05, "loss": 0.04745899, "step": 13995 }, { "epoch": 27.992, "grad_norm": 1.3977738618850708, "learning_rate": 2e-05, "loss": 0.05147257, "step": 13996 }, { "epoch": 27.994, "grad_norm": 1.4739811420440674, "learning_rate": 2e-05, "loss": 0.05509406, "step": 13997 }, { "epoch": 27.996, "grad_norm": 2.95452880859375, "learning_rate": 2e-05, "loss": 0.05132447, "step": 13998 }, { "epoch": 27.998, "grad_norm": 1.3393985033035278, "learning_rate": 2e-05, "loss": 0.05062933, "step": 13999 }, { "epoch": 28.0, "grad_norm": 1.0982304811477661, "learning_rate": 2e-05, "loss": 0.04777197, "step": 14000 }, { "epoch": 28.0, "eval_performance": { "AngleClassification_1": 0.996, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.998, "Equal_2": 0.9800399201596807, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 1.0, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.992, "Perpendicular_2": 0.988, "Perpendicular_3": 0.8917835671342685, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9912000000000001, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 225.8755, "eval_samples_per_second": 46.486, "eval_steps_per_second": 0.93, "step": 14000 }, { "epoch": 28.002, "grad_norm": 0.9589885473251343, "learning_rate": 2e-05, "loss": 0.03192472, "step": 14001 }, { "epoch": 28.004, "grad_norm": 1.6763265132904053, "learning_rate": 2e-05, "loss": 0.05118313, "step": 14002 }, { "epoch": 28.006, "grad_norm": 1.182698369026184, "learning_rate": 2e-05, "loss": 0.04611558, "step": 14003 }, { "epoch": 28.008, "grad_norm": 1.308375358581543, "learning_rate": 2e-05, "loss": 0.06471476, "step": 14004 }, { "epoch": 28.01, "grad_norm": 1.9592188596725464, "learning_rate": 2e-05, "loss": 0.06416531, "step": 14005 }, { "epoch": 28.012, "grad_norm": 1.1212422847747803, "learning_rate": 2e-05, "loss": 0.04081588, "step": 14006 }, { "epoch": 28.014, "grad_norm": 1.23434579372406, "learning_rate": 2e-05, "loss": 0.06098711, "step": 14007 }, { "epoch": 28.016, "grad_norm": 1.0909266471862793, "learning_rate": 2e-05, "loss": 0.04146683, "step": 14008 }, { "epoch": 28.018, "grad_norm": 1.221076488494873, "learning_rate": 2e-05, "loss": 0.04725564, "step": 14009 }, { "epoch": 28.02, "grad_norm": 1.2924137115478516, "learning_rate": 2e-05, "loss": 0.04508425, "step": 14010 }, { "epoch": 28.022, "grad_norm": 0.9948585033416748, "learning_rate": 2e-05, "loss": 0.04095497, "step": 14011 }, { "epoch": 28.024, "grad_norm": 1.5098035335540771, "learning_rate": 2e-05, "loss": 0.06174885, "step": 14012 }, { "epoch": 28.026, "grad_norm": 1.592028021812439, "learning_rate": 2e-05, "loss": 0.04074557, "step": 14013 }, { "epoch": 28.028, "grad_norm": 1.2844913005828857, "learning_rate": 2e-05, "loss": 0.06136954, "step": 14014 }, { "epoch": 28.03, "grad_norm": 1.30637788772583, "learning_rate": 2e-05, "loss": 0.05842869, "step": 14015 }, { "epoch": 28.032, "grad_norm": 1.5720715522766113, "learning_rate": 2e-05, "loss": 0.04726084, "step": 14016 }, { "epoch": 28.034, "grad_norm": 1.178145170211792, "learning_rate": 2e-05, "loss": 0.06219021, "step": 14017 }, { "epoch": 28.036, "grad_norm": 1.0121883153915405, "learning_rate": 2e-05, "loss": 0.03924335, "step": 14018 }, { "epoch": 28.038, "grad_norm": 0.9410894513130188, "learning_rate": 2e-05, "loss": 0.04420645, "step": 14019 }, { "epoch": 28.04, "grad_norm": 1.0590211153030396, "learning_rate": 2e-05, "loss": 0.03924129, "step": 14020 }, { "epoch": 28.042, "grad_norm": 1.1825803518295288, "learning_rate": 2e-05, "loss": 0.05216803, "step": 14021 }, { "epoch": 28.044, "grad_norm": 1.5470445156097412, "learning_rate": 2e-05, "loss": 0.04120636, "step": 14022 }, { "epoch": 28.046, "grad_norm": 1.1469334363937378, "learning_rate": 2e-05, "loss": 0.04326928, "step": 14023 }, { "epoch": 28.048, "grad_norm": 1.3864457607269287, "learning_rate": 2e-05, "loss": 0.04464956, "step": 14024 }, { "epoch": 28.05, "grad_norm": 1.2183732986450195, "learning_rate": 2e-05, "loss": 0.05627831, "step": 14025 }, { "epoch": 28.052, "grad_norm": 1.2339484691619873, "learning_rate": 2e-05, "loss": 0.04752214, "step": 14026 }, { "epoch": 28.054, "grad_norm": 1.214246392250061, "learning_rate": 2e-05, "loss": 0.05357888, "step": 14027 }, { "epoch": 28.056, "grad_norm": 1.8142962455749512, "learning_rate": 2e-05, "loss": 0.06305675, "step": 14028 }, { "epoch": 28.058, "grad_norm": 1.0343353748321533, "learning_rate": 2e-05, "loss": 0.04792928, "step": 14029 }, { "epoch": 28.06, "grad_norm": 1.3261992931365967, "learning_rate": 2e-05, "loss": 0.04944962, "step": 14030 }, { "epoch": 28.062, "grad_norm": 1.099822998046875, "learning_rate": 2e-05, "loss": 0.05839844, "step": 14031 }, { "epoch": 28.064, "grad_norm": 1.8612264394760132, "learning_rate": 2e-05, "loss": 0.05120856, "step": 14032 }, { "epoch": 28.066, "grad_norm": 1.4481234550476074, "learning_rate": 2e-05, "loss": 0.05204151, "step": 14033 }, { "epoch": 28.068, "grad_norm": 1.4144114255905151, "learning_rate": 2e-05, "loss": 0.0442274, "step": 14034 }, { "epoch": 28.07, "grad_norm": 1.2719594240188599, "learning_rate": 2e-05, "loss": 0.05410684, "step": 14035 }, { "epoch": 28.072, "grad_norm": 1.1499301195144653, "learning_rate": 2e-05, "loss": 0.05026767, "step": 14036 }, { "epoch": 28.074, "grad_norm": 1.3675947189331055, "learning_rate": 2e-05, "loss": 0.04874235, "step": 14037 }, { "epoch": 28.076, "grad_norm": 0.9079284071922302, "learning_rate": 2e-05, "loss": 0.03628619, "step": 14038 }, { "epoch": 28.078, "grad_norm": 1.1983814239501953, "learning_rate": 2e-05, "loss": 0.05439235, "step": 14039 }, { "epoch": 28.08, "grad_norm": 1.1797288656234741, "learning_rate": 2e-05, "loss": 0.06060669, "step": 14040 }, { "epoch": 28.082, "grad_norm": 1.110556960105896, "learning_rate": 2e-05, "loss": 0.05188231, "step": 14041 }, { "epoch": 28.084, "grad_norm": 1.215267539024353, "learning_rate": 2e-05, "loss": 0.05635985, "step": 14042 }, { "epoch": 28.086, "grad_norm": 2.2255046367645264, "learning_rate": 2e-05, "loss": 0.05363511, "step": 14043 }, { "epoch": 28.088, "grad_norm": 1.0870945453643799, "learning_rate": 2e-05, "loss": 0.05201738, "step": 14044 }, { "epoch": 28.09, "grad_norm": 1.273977518081665, "learning_rate": 2e-05, "loss": 0.04594712, "step": 14045 }, { "epoch": 28.092, "grad_norm": 1.80062735080719, "learning_rate": 2e-05, "loss": 0.0470517, "step": 14046 }, { "epoch": 28.094, "grad_norm": 1.0497883558273315, "learning_rate": 2e-05, "loss": 0.04246815, "step": 14047 }, { "epoch": 28.096, "grad_norm": 1.176626205444336, "learning_rate": 2e-05, "loss": 0.05963624, "step": 14048 }, { "epoch": 28.098, "grad_norm": 1.117715835571289, "learning_rate": 2e-05, "loss": 0.05090708, "step": 14049 }, { "epoch": 28.1, "grad_norm": 1.2431190013885498, "learning_rate": 2e-05, "loss": 0.03869393, "step": 14050 }, { "epoch": 28.102, "grad_norm": 1.4195657968521118, "learning_rate": 2e-05, "loss": 0.05490259, "step": 14051 }, { "epoch": 28.104, "grad_norm": 0.9730716943740845, "learning_rate": 2e-05, "loss": 0.04298246, "step": 14052 }, { "epoch": 28.106, "grad_norm": 0.9492455720901489, "learning_rate": 2e-05, "loss": 0.04059314, "step": 14053 }, { "epoch": 28.108, "grad_norm": 0.9498223066329956, "learning_rate": 2e-05, "loss": 0.04043512, "step": 14054 }, { "epoch": 28.11, "grad_norm": 1.0479615926742554, "learning_rate": 2e-05, "loss": 0.04633528, "step": 14055 }, { "epoch": 28.112, "grad_norm": 2.1273159980773926, "learning_rate": 2e-05, "loss": 0.04907396, "step": 14056 }, { "epoch": 28.114, "grad_norm": 0.9238464832305908, "learning_rate": 2e-05, "loss": 0.03899527, "step": 14057 }, { "epoch": 28.116, "grad_norm": 1.0335479974746704, "learning_rate": 2e-05, "loss": 0.04224519, "step": 14058 }, { "epoch": 28.118, "grad_norm": 1.3347941637039185, "learning_rate": 2e-05, "loss": 0.04622374, "step": 14059 }, { "epoch": 28.12, "grad_norm": 1.5429894924163818, "learning_rate": 2e-05, "loss": 0.05550133, "step": 14060 }, { "epoch": 28.122, "grad_norm": 2.1340670585632324, "learning_rate": 2e-05, "loss": 0.07429726, "step": 14061 }, { "epoch": 28.124, "grad_norm": 1.1149505376815796, "learning_rate": 2e-05, "loss": 0.0526567, "step": 14062 }, { "epoch": 28.126, "grad_norm": 1.4307209253311157, "learning_rate": 2e-05, "loss": 0.05440434, "step": 14063 }, { "epoch": 28.128, "grad_norm": 1.0403140783309937, "learning_rate": 2e-05, "loss": 0.04739083, "step": 14064 }, { "epoch": 28.13, "grad_norm": 1.019863486289978, "learning_rate": 2e-05, "loss": 0.04135643, "step": 14065 }, { "epoch": 28.132, "grad_norm": 1.2269132137298584, "learning_rate": 2e-05, "loss": 0.05765355, "step": 14066 }, { "epoch": 28.134, "grad_norm": 1.0494272708892822, "learning_rate": 2e-05, "loss": 0.04595459, "step": 14067 }, { "epoch": 28.136, "grad_norm": 1.1972683668136597, "learning_rate": 2e-05, "loss": 0.06780625, "step": 14068 }, { "epoch": 28.138, "grad_norm": 1.185584306716919, "learning_rate": 2e-05, "loss": 0.04852325, "step": 14069 }, { "epoch": 28.14, "grad_norm": 2.0557947158813477, "learning_rate": 2e-05, "loss": 0.05814135, "step": 14070 }, { "epoch": 28.142, "grad_norm": 1.3834878206253052, "learning_rate": 2e-05, "loss": 0.06105272, "step": 14071 }, { "epoch": 28.144, "grad_norm": 1.8960943222045898, "learning_rate": 2e-05, "loss": 0.07006301, "step": 14072 }, { "epoch": 28.146, "grad_norm": 3.9685282707214355, "learning_rate": 2e-05, "loss": 0.04802244, "step": 14073 }, { "epoch": 28.148, "grad_norm": 1.3225816488265991, "learning_rate": 2e-05, "loss": 0.03916916, "step": 14074 }, { "epoch": 28.15, "grad_norm": 1.2627843618392944, "learning_rate": 2e-05, "loss": 0.04471166, "step": 14075 }, { "epoch": 28.152, "grad_norm": 1.0370601415634155, "learning_rate": 2e-05, "loss": 0.04555884, "step": 14076 }, { "epoch": 28.154, "grad_norm": 1.2499042749404907, "learning_rate": 2e-05, "loss": 0.05491342, "step": 14077 }, { "epoch": 28.156, "grad_norm": 1.157971978187561, "learning_rate": 2e-05, "loss": 0.05268575, "step": 14078 }, { "epoch": 28.158, "grad_norm": 2.98366641998291, "learning_rate": 2e-05, "loss": 0.04065453, "step": 14079 }, { "epoch": 28.16, "grad_norm": 1.1362018585205078, "learning_rate": 2e-05, "loss": 0.06046869, "step": 14080 }, { "epoch": 28.162, "grad_norm": 1.2248120307922363, "learning_rate": 2e-05, "loss": 0.05201956, "step": 14081 }, { "epoch": 28.164, "grad_norm": 1.0102171897888184, "learning_rate": 2e-05, "loss": 0.04150052, "step": 14082 }, { "epoch": 28.166, "grad_norm": 1.8682960271835327, "learning_rate": 2e-05, "loss": 0.07284208, "step": 14083 }, { "epoch": 28.168, "grad_norm": 1.3426510095596313, "learning_rate": 2e-05, "loss": 0.04266308, "step": 14084 }, { "epoch": 28.17, "grad_norm": 1.146626591682434, "learning_rate": 2e-05, "loss": 0.04671438, "step": 14085 }, { "epoch": 28.172, "grad_norm": 2.4640090465545654, "learning_rate": 2e-05, "loss": 0.05072067, "step": 14086 }, { "epoch": 28.174, "grad_norm": 0.9155437350273132, "learning_rate": 2e-05, "loss": 0.04435801, "step": 14087 }, { "epoch": 28.176, "grad_norm": 1.1291377544403076, "learning_rate": 2e-05, "loss": 0.03823424, "step": 14088 }, { "epoch": 28.178, "grad_norm": 1.2758152484893799, "learning_rate": 2e-05, "loss": 0.07527094, "step": 14089 }, { "epoch": 28.18, "grad_norm": 1.446370005607605, "learning_rate": 2e-05, "loss": 0.04908771, "step": 14090 }, { "epoch": 28.182, "grad_norm": 1.8574540615081787, "learning_rate": 2e-05, "loss": 0.04790211, "step": 14091 }, { "epoch": 28.184, "grad_norm": 1.1243407726287842, "learning_rate": 2e-05, "loss": 0.05384775, "step": 14092 }, { "epoch": 28.186, "grad_norm": 1.7336320877075195, "learning_rate": 2e-05, "loss": 0.06516328, "step": 14093 }, { "epoch": 28.188, "grad_norm": 1.2211413383483887, "learning_rate": 2e-05, "loss": 0.03760356, "step": 14094 }, { "epoch": 28.19, "grad_norm": 1.1788825988769531, "learning_rate": 2e-05, "loss": 0.03312983, "step": 14095 }, { "epoch": 28.192, "grad_norm": 1.2192015647888184, "learning_rate": 2e-05, "loss": 0.05191582, "step": 14096 }, { "epoch": 28.194, "grad_norm": 1.6936148405075073, "learning_rate": 2e-05, "loss": 0.06833375, "step": 14097 }, { "epoch": 28.196, "grad_norm": 1.2406272888183594, "learning_rate": 2e-05, "loss": 0.04745254, "step": 14098 }, { "epoch": 28.198, "grad_norm": 1.489187240600586, "learning_rate": 2e-05, "loss": 0.05994038, "step": 14099 }, { "epoch": 28.2, "grad_norm": 1.1657342910766602, "learning_rate": 2e-05, "loss": 0.04350929, "step": 14100 }, { "epoch": 28.202, "grad_norm": 1.2240591049194336, "learning_rate": 2e-05, "loss": 0.05900217, "step": 14101 }, { "epoch": 28.204, "grad_norm": 2.1244421005249023, "learning_rate": 2e-05, "loss": 0.06863314, "step": 14102 }, { "epoch": 28.206, "grad_norm": 1.1060055494308472, "learning_rate": 2e-05, "loss": 0.05775836, "step": 14103 }, { "epoch": 28.208, "grad_norm": 1.1527104377746582, "learning_rate": 2e-05, "loss": 0.05117126, "step": 14104 }, { "epoch": 28.21, "grad_norm": 1.1074719429016113, "learning_rate": 2e-05, "loss": 0.05639096, "step": 14105 }, { "epoch": 28.212, "grad_norm": 1.3275132179260254, "learning_rate": 2e-05, "loss": 0.05322661, "step": 14106 }, { "epoch": 28.214, "grad_norm": 0.8755719065666199, "learning_rate": 2e-05, "loss": 0.03370619, "step": 14107 }, { "epoch": 28.216, "grad_norm": 1.5260087251663208, "learning_rate": 2e-05, "loss": 0.06016211, "step": 14108 }, { "epoch": 28.218, "grad_norm": 1.3420600891113281, "learning_rate": 2e-05, "loss": 0.06352885, "step": 14109 }, { "epoch": 28.22, "grad_norm": 0.9698576331138611, "learning_rate": 2e-05, "loss": 0.04411756, "step": 14110 }, { "epoch": 28.222, "grad_norm": 1.1220991611480713, "learning_rate": 2e-05, "loss": 0.04810376, "step": 14111 }, { "epoch": 28.224, "grad_norm": 1.1706749200820923, "learning_rate": 2e-05, "loss": 0.05258048, "step": 14112 }, { "epoch": 28.226, "grad_norm": 1.5424922704696655, "learning_rate": 2e-05, "loss": 0.04416787, "step": 14113 }, { "epoch": 28.228, "grad_norm": 1.228280782699585, "learning_rate": 2e-05, "loss": 0.04819671, "step": 14114 }, { "epoch": 28.23, "grad_norm": 1.3177189826965332, "learning_rate": 2e-05, "loss": 0.06610861, "step": 14115 }, { "epoch": 28.232, "grad_norm": 1.211240291595459, "learning_rate": 2e-05, "loss": 0.06382084, "step": 14116 }, { "epoch": 28.234, "grad_norm": 1.3349344730377197, "learning_rate": 2e-05, "loss": 0.04534159, "step": 14117 }, { "epoch": 28.236, "grad_norm": 1.1278730630874634, "learning_rate": 2e-05, "loss": 0.04737261, "step": 14118 }, { "epoch": 28.238, "grad_norm": 1.3732061386108398, "learning_rate": 2e-05, "loss": 0.06195763, "step": 14119 }, { "epoch": 28.24, "grad_norm": 1.067338228225708, "learning_rate": 2e-05, "loss": 0.05194592, "step": 14120 }, { "epoch": 28.242, "grad_norm": 1.0665085315704346, "learning_rate": 2e-05, "loss": 0.05403936, "step": 14121 }, { "epoch": 28.244, "grad_norm": 1.1192151308059692, "learning_rate": 2e-05, "loss": 0.05224983, "step": 14122 }, { "epoch": 28.246, "grad_norm": 2.093144416809082, "learning_rate": 2e-05, "loss": 0.07865152, "step": 14123 }, { "epoch": 28.248, "grad_norm": 1.2102408409118652, "learning_rate": 2e-05, "loss": 0.05297173, "step": 14124 }, { "epoch": 28.25, "grad_norm": 1.447779655456543, "learning_rate": 2e-05, "loss": 0.04425377, "step": 14125 }, { "epoch": 28.252, "grad_norm": 1.0098801851272583, "learning_rate": 2e-05, "loss": 0.04514705, "step": 14126 }, { "epoch": 28.254, "grad_norm": 1.1338164806365967, "learning_rate": 2e-05, "loss": 0.04084329, "step": 14127 }, { "epoch": 28.256, "grad_norm": 1.4635968208312988, "learning_rate": 2e-05, "loss": 0.07033442, "step": 14128 }, { "epoch": 28.258, "grad_norm": 1.5438834428787231, "learning_rate": 2e-05, "loss": 0.04933818, "step": 14129 }, { "epoch": 28.26, "grad_norm": 1.1094868183135986, "learning_rate": 2e-05, "loss": 0.04933073, "step": 14130 }, { "epoch": 28.262, "grad_norm": 1.3814165592193604, "learning_rate": 2e-05, "loss": 0.0379516, "step": 14131 }, { "epoch": 28.264, "grad_norm": 1.6791162490844727, "learning_rate": 2e-05, "loss": 0.06294131, "step": 14132 }, { "epoch": 28.266, "grad_norm": 1.0613534450531006, "learning_rate": 2e-05, "loss": 0.04104646, "step": 14133 }, { "epoch": 28.268, "grad_norm": 1.0788605213165283, "learning_rate": 2e-05, "loss": 0.04123756, "step": 14134 }, { "epoch": 28.27, "grad_norm": 1.6847679615020752, "learning_rate": 2e-05, "loss": 0.04510987, "step": 14135 }, { "epoch": 28.272, "grad_norm": 1.0833982229232788, "learning_rate": 2e-05, "loss": 0.04803877, "step": 14136 }, { "epoch": 28.274, "grad_norm": 1.0500634908676147, "learning_rate": 2e-05, "loss": 0.04524401, "step": 14137 }, { "epoch": 28.276, "grad_norm": 2.0358710289001465, "learning_rate": 2e-05, "loss": 0.05563102, "step": 14138 }, { "epoch": 28.278, "grad_norm": 1.9700868129730225, "learning_rate": 2e-05, "loss": 0.05167124, "step": 14139 }, { "epoch": 28.28, "grad_norm": 1.322805643081665, "learning_rate": 2e-05, "loss": 0.03498475, "step": 14140 }, { "epoch": 28.282, "grad_norm": 1.270660400390625, "learning_rate": 2e-05, "loss": 0.04830954, "step": 14141 }, { "epoch": 28.284, "grad_norm": 2.6301474571228027, "learning_rate": 2e-05, "loss": 0.04769824, "step": 14142 }, { "epoch": 28.286, "grad_norm": 1.0601212978363037, "learning_rate": 2e-05, "loss": 0.0525927, "step": 14143 }, { "epoch": 28.288, "grad_norm": 5.727256774902344, "learning_rate": 2e-05, "loss": 0.05114076, "step": 14144 }, { "epoch": 28.29, "grad_norm": 2.307408094406128, "learning_rate": 2e-05, "loss": 0.07158878, "step": 14145 }, { "epoch": 28.292, "grad_norm": 1.4327751398086548, "learning_rate": 2e-05, "loss": 0.04871903, "step": 14146 }, { "epoch": 28.294, "grad_norm": 2.2921621799468994, "learning_rate": 2e-05, "loss": 0.04797332, "step": 14147 }, { "epoch": 28.296, "grad_norm": 2.8305745124816895, "learning_rate": 2e-05, "loss": 0.07857145, "step": 14148 }, { "epoch": 28.298, "grad_norm": 1.2730368375778198, "learning_rate": 2e-05, "loss": 0.04915085, "step": 14149 }, { "epoch": 28.3, "grad_norm": 1.0279103517532349, "learning_rate": 2e-05, "loss": 0.05039107, "step": 14150 }, { "epoch": 28.302, "grad_norm": 1.1975942850112915, "learning_rate": 2e-05, "loss": 0.05498748, "step": 14151 }, { "epoch": 28.304, "grad_norm": 1.4700589179992676, "learning_rate": 2e-05, "loss": 0.05818937, "step": 14152 }, { "epoch": 28.306, "grad_norm": 0.9843897223472595, "learning_rate": 2e-05, "loss": 0.04163428, "step": 14153 }, { "epoch": 28.308, "grad_norm": 2.482224225997925, "learning_rate": 2e-05, "loss": 0.07201196, "step": 14154 }, { "epoch": 28.31, "grad_norm": 2.8909153938293457, "learning_rate": 2e-05, "loss": 0.03670835, "step": 14155 }, { "epoch": 28.312, "grad_norm": 1.1694040298461914, "learning_rate": 2e-05, "loss": 0.05486508, "step": 14156 }, { "epoch": 28.314, "grad_norm": 1.4956320524215698, "learning_rate": 2e-05, "loss": 0.06367174, "step": 14157 }, { "epoch": 28.316, "grad_norm": 1.5688138008117676, "learning_rate": 2e-05, "loss": 0.05934228, "step": 14158 }, { "epoch": 28.318, "grad_norm": 1.237672209739685, "learning_rate": 2e-05, "loss": 0.03960197, "step": 14159 }, { "epoch": 28.32, "grad_norm": 1.0728367567062378, "learning_rate": 2e-05, "loss": 0.04807479, "step": 14160 }, { "epoch": 28.322, "grad_norm": 1.051055908203125, "learning_rate": 2e-05, "loss": 0.04590215, "step": 14161 }, { "epoch": 28.324, "grad_norm": 3.6231753826141357, "learning_rate": 2e-05, "loss": 0.052162, "step": 14162 }, { "epoch": 28.326, "grad_norm": 1.2580742835998535, "learning_rate": 2e-05, "loss": 0.04747877, "step": 14163 }, { "epoch": 28.328, "grad_norm": 1.758090853691101, "learning_rate": 2e-05, "loss": 0.06075525, "step": 14164 }, { "epoch": 28.33, "grad_norm": 1.0375627279281616, "learning_rate": 2e-05, "loss": 0.03587, "step": 14165 }, { "epoch": 28.332, "grad_norm": 1.4057998657226562, "learning_rate": 2e-05, "loss": 0.06076965, "step": 14166 }, { "epoch": 28.334, "grad_norm": 1.1139633655548096, "learning_rate": 2e-05, "loss": 0.06156767, "step": 14167 }, { "epoch": 28.336, "grad_norm": 1.4080520868301392, "learning_rate": 2e-05, "loss": 0.06401391, "step": 14168 }, { "epoch": 28.338, "grad_norm": 1.055641531944275, "learning_rate": 2e-05, "loss": 0.03654396, "step": 14169 }, { "epoch": 28.34, "grad_norm": 1.0406756401062012, "learning_rate": 2e-05, "loss": 0.04183136, "step": 14170 }, { "epoch": 28.342, "grad_norm": 1.1066478490829468, "learning_rate": 2e-05, "loss": 0.04356313, "step": 14171 }, { "epoch": 28.344, "grad_norm": 1.5559896230697632, "learning_rate": 2e-05, "loss": 0.06464694, "step": 14172 }, { "epoch": 28.346, "grad_norm": 1.2917381525039673, "learning_rate": 2e-05, "loss": 0.05953247, "step": 14173 }, { "epoch": 28.348, "grad_norm": 1.5571998357772827, "learning_rate": 2e-05, "loss": 0.05779471, "step": 14174 }, { "epoch": 28.35, "grad_norm": 1.1026018857955933, "learning_rate": 2e-05, "loss": 0.05282699, "step": 14175 }, { "epoch": 28.352, "grad_norm": 1.053712010383606, "learning_rate": 2e-05, "loss": 0.04301897, "step": 14176 }, { "epoch": 28.354, "grad_norm": 0.964838445186615, "learning_rate": 2e-05, "loss": 0.0378805, "step": 14177 }, { "epoch": 28.356, "grad_norm": 2.2700605392456055, "learning_rate": 2e-05, "loss": 0.05634594, "step": 14178 }, { "epoch": 28.358, "grad_norm": 0.9916933178901672, "learning_rate": 2e-05, "loss": 0.04360142, "step": 14179 }, { "epoch": 28.36, "grad_norm": 1.1369694471359253, "learning_rate": 2e-05, "loss": 0.0552519, "step": 14180 }, { "epoch": 28.362, "grad_norm": 2.1990485191345215, "learning_rate": 2e-05, "loss": 0.05574964, "step": 14181 }, { "epoch": 28.364, "grad_norm": 1.074510931968689, "learning_rate": 2e-05, "loss": 0.04561885, "step": 14182 }, { "epoch": 28.366, "grad_norm": 1.0176482200622559, "learning_rate": 2e-05, "loss": 0.03819156, "step": 14183 }, { "epoch": 28.368, "grad_norm": 0.9057084918022156, "learning_rate": 2e-05, "loss": 0.03722963, "step": 14184 }, { "epoch": 28.37, "grad_norm": 1.1909552812576294, "learning_rate": 2e-05, "loss": 0.05888623, "step": 14185 }, { "epoch": 28.372, "grad_norm": 1.1309235095977783, "learning_rate": 2e-05, "loss": 0.05316223, "step": 14186 }, { "epoch": 28.374, "grad_norm": 1.4506255388259888, "learning_rate": 2e-05, "loss": 0.04478668, "step": 14187 }, { "epoch": 28.376, "grad_norm": 1.254969596862793, "learning_rate": 2e-05, "loss": 0.04819975, "step": 14188 }, { "epoch": 28.378, "grad_norm": 0.9195451736450195, "learning_rate": 2e-05, "loss": 0.03114618, "step": 14189 }, { "epoch": 28.38, "grad_norm": 1.347697377204895, "learning_rate": 2e-05, "loss": 0.04204157, "step": 14190 }, { "epoch": 28.382, "grad_norm": 2.8922486305236816, "learning_rate": 2e-05, "loss": 0.05275394, "step": 14191 }, { "epoch": 28.384, "grad_norm": 1.2228972911834717, "learning_rate": 2e-05, "loss": 0.05039288, "step": 14192 }, { "epoch": 28.386, "grad_norm": 1.0761572122573853, "learning_rate": 2e-05, "loss": 0.0578664, "step": 14193 }, { "epoch": 28.388, "grad_norm": 1.2144663333892822, "learning_rate": 2e-05, "loss": 0.0541693, "step": 14194 }, { "epoch": 28.39, "grad_norm": 1.4729324579238892, "learning_rate": 2e-05, "loss": 0.04443662, "step": 14195 }, { "epoch": 28.392, "grad_norm": 1.1051536798477173, "learning_rate": 2e-05, "loss": 0.05182551, "step": 14196 }, { "epoch": 28.394, "grad_norm": 2.3291468620300293, "learning_rate": 2e-05, "loss": 0.0749258, "step": 14197 }, { "epoch": 28.396, "grad_norm": 1.588195562362671, "learning_rate": 2e-05, "loss": 0.06317502, "step": 14198 }, { "epoch": 28.398, "grad_norm": 1.0108226537704468, "learning_rate": 2e-05, "loss": 0.03754605, "step": 14199 }, { "epoch": 28.4, "grad_norm": 2.075113534927368, "learning_rate": 2e-05, "loss": 0.06992606, "step": 14200 }, { "epoch": 28.402, "grad_norm": 1.5361895561218262, "learning_rate": 2e-05, "loss": 0.06642133, "step": 14201 }, { "epoch": 28.404, "grad_norm": 1.5755606889724731, "learning_rate": 2e-05, "loss": 0.04676132, "step": 14202 }, { "epoch": 28.406, "grad_norm": 1.1816959381103516, "learning_rate": 2e-05, "loss": 0.05332002, "step": 14203 }, { "epoch": 28.408, "grad_norm": 1.5009119510650635, "learning_rate": 2e-05, "loss": 0.0631537, "step": 14204 }, { "epoch": 28.41, "grad_norm": 1.0937538146972656, "learning_rate": 2e-05, "loss": 0.04668565, "step": 14205 }, { "epoch": 28.412, "grad_norm": 1.5544747114181519, "learning_rate": 2e-05, "loss": 0.04987885, "step": 14206 }, { "epoch": 28.414, "grad_norm": 1.459844946861267, "learning_rate": 2e-05, "loss": 0.054866, "step": 14207 }, { "epoch": 28.416, "grad_norm": 1.0544934272766113, "learning_rate": 2e-05, "loss": 0.04791127, "step": 14208 }, { "epoch": 28.418, "grad_norm": 1.1513046026229858, "learning_rate": 2e-05, "loss": 0.04784077, "step": 14209 }, { "epoch": 28.42, "grad_norm": 1.281095266342163, "learning_rate": 2e-05, "loss": 0.0616293, "step": 14210 }, { "epoch": 28.422, "grad_norm": 1.0270214080810547, "learning_rate": 2e-05, "loss": 0.04437097, "step": 14211 }, { "epoch": 28.424, "grad_norm": 1.152099847793579, "learning_rate": 2e-05, "loss": 0.04068635, "step": 14212 }, { "epoch": 28.426, "grad_norm": 2.2201297283172607, "learning_rate": 2e-05, "loss": 0.05039227, "step": 14213 }, { "epoch": 28.428, "grad_norm": 1.164056658744812, "learning_rate": 2e-05, "loss": 0.04014026, "step": 14214 }, { "epoch": 28.43, "grad_norm": 3.6887073516845703, "learning_rate": 2e-05, "loss": 0.04864767, "step": 14215 }, { "epoch": 28.432, "grad_norm": 1.087576150894165, "learning_rate": 2e-05, "loss": 0.04564459, "step": 14216 }, { "epoch": 28.434, "grad_norm": 1.0503522157669067, "learning_rate": 2e-05, "loss": 0.05318118, "step": 14217 }, { "epoch": 28.436, "grad_norm": 2.4197983741760254, "learning_rate": 2e-05, "loss": 0.05320466, "step": 14218 }, { "epoch": 28.438, "grad_norm": 1.1138126850128174, "learning_rate": 2e-05, "loss": 0.05077223, "step": 14219 }, { "epoch": 28.44, "grad_norm": 1.1684259176254272, "learning_rate": 2e-05, "loss": 0.03906719, "step": 14220 }, { "epoch": 28.442, "grad_norm": 1.5182048082351685, "learning_rate": 2e-05, "loss": 0.04108508, "step": 14221 }, { "epoch": 28.444, "grad_norm": 1.810714840888977, "learning_rate": 2e-05, "loss": 0.05943859, "step": 14222 }, { "epoch": 28.446, "grad_norm": 1.8546385765075684, "learning_rate": 2e-05, "loss": 0.05346815, "step": 14223 }, { "epoch": 28.448, "grad_norm": 1.8148488998413086, "learning_rate": 2e-05, "loss": 0.05627569, "step": 14224 }, { "epoch": 28.45, "grad_norm": 1.169124960899353, "learning_rate": 2e-05, "loss": 0.04022298, "step": 14225 }, { "epoch": 28.452, "grad_norm": 0.9906086325645447, "learning_rate": 2e-05, "loss": 0.03913615, "step": 14226 }, { "epoch": 28.454, "grad_norm": 1.4638454914093018, "learning_rate": 2e-05, "loss": 0.05906557, "step": 14227 }, { "epoch": 28.456, "grad_norm": 1.5009502172470093, "learning_rate": 2e-05, "loss": 0.06505533, "step": 14228 }, { "epoch": 28.458, "grad_norm": 1.489866852760315, "learning_rate": 2e-05, "loss": 0.05865881, "step": 14229 }, { "epoch": 28.46, "grad_norm": 0.9519407153129578, "learning_rate": 2e-05, "loss": 0.02765641, "step": 14230 }, { "epoch": 28.462, "grad_norm": 1.4552841186523438, "learning_rate": 2e-05, "loss": 0.05513899, "step": 14231 }, { "epoch": 28.464, "grad_norm": 1.1801583766937256, "learning_rate": 2e-05, "loss": 0.04616518, "step": 14232 }, { "epoch": 28.466, "grad_norm": 1.048248529434204, "learning_rate": 2e-05, "loss": 0.04423217, "step": 14233 }, { "epoch": 28.468, "grad_norm": 0.8251314759254456, "learning_rate": 2e-05, "loss": 0.02158103, "step": 14234 }, { "epoch": 28.47, "grad_norm": 1.4055218696594238, "learning_rate": 2e-05, "loss": 0.04912665, "step": 14235 }, { "epoch": 28.472, "grad_norm": 1.0676335096359253, "learning_rate": 2e-05, "loss": 0.04554426, "step": 14236 }, { "epoch": 28.474, "grad_norm": 2.020879030227661, "learning_rate": 2e-05, "loss": 0.0523311, "step": 14237 }, { "epoch": 28.476, "grad_norm": 1.5445070266723633, "learning_rate": 2e-05, "loss": 0.05826297, "step": 14238 }, { "epoch": 28.478, "grad_norm": 1.0442067384719849, "learning_rate": 2e-05, "loss": 0.0448417, "step": 14239 }, { "epoch": 28.48, "grad_norm": 1.2893093824386597, "learning_rate": 2e-05, "loss": 0.04190796, "step": 14240 }, { "epoch": 28.482, "grad_norm": 1.0473220348358154, "learning_rate": 2e-05, "loss": 0.04087887, "step": 14241 }, { "epoch": 28.484, "grad_norm": 1.1895166635513306, "learning_rate": 2e-05, "loss": 0.04934598, "step": 14242 }, { "epoch": 28.486, "grad_norm": 0.9796755909919739, "learning_rate": 2e-05, "loss": 0.04334146, "step": 14243 }, { "epoch": 28.488, "grad_norm": 2.1016547679901123, "learning_rate": 2e-05, "loss": 0.05654687, "step": 14244 }, { "epoch": 28.49, "grad_norm": 1.394168496131897, "learning_rate": 2e-05, "loss": 0.05647502, "step": 14245 }, { "epoch": 28.492, "grad_norm": 1.1902996301651, "learning_rate": 2e-05, "loss": 0.04661873, "step": 14246 }, { "epoch": 28.494, "grad_norm": 0.9599032998085022, "learning_rate": 2e-05, "loss": 0.03410069, "step": 14247 }, { "epoch": 28.496, "grad_norm": 1.250897765159607, "learning_rate": 2e-05, "loss": 0.05334844, "step": 14248 }, { "epoch": 28.498, "grad_norm": 1.1826105117797852, "learning_rate": 2e-05, "loss": 0.05385578, "step": 14249 }, { "epoch": 28.5, "grad_norm": 1.4346892833709717, "learning_rate": 2e-05, "loss": 0.0562182, "step": 14250 }, { "epoch": 28.502, "grad_norm": 1.2595847845077515, "learning_rate": 2e-05, "loss": 0.03731131, "step": 14251 }, { "epoch": 28.504, "grad_norm": 1.095626950263977, "learning_rate": 2e-05, "loss": 0.03731666, "step": 14252 }, { "epoch": 28.506, "grad_norm": 1.2750048637390137, "learning_rate": 2e-05, "loss": 0.06110781, "step": 14253 }, { "epoch": 28.508, "grad_norm": 1.1414785385131836, "learning_rate": 2e-05, "loss": 0.04365663, "step": 14254 }, { "epoch": 28.51, "grad_norm": 1.2128257751464844, "learning_rate": 2e-05, "loss": 0.05428216, "step": 14255 }, { "epoch": 28.512, "grad_norm": 1.4649673700332642, "learning_rate": 2e-05, "loss": 0.05592339, "step": 14256 }, { "epoch": 28.514, "grad_norm": 1.3559781312942505, "learning_rate": 2e-05, "loss": 0.06326771, "step": 14257 }, { "epoch": 28.516, "grad_norm": 1.2045713663101196, "learning_rate": 2e-05, "loss": 0.0450969, "step": 14258 }, { "epoch": 28.518, "grad_norm": 1.333905816078186, "learning_rate": 2e-05, "loss": 0.05851347, "step": 14259 }, { "epoch": 28.52, "grad_norm": 2.6904025077819824, "learning_rate": 2e-05, "loss": 0.05237071, "step": 14260 }, { "epoch": 28.522, "grad_norm": 1.5729610919952393, "learning_rate": 2e-05, "loss": 0.05397804, "step": 14261 }, { "epoch": 28.524, "grad_norm": 1.3961398601531982, "learning_rate": 2e-05, "loss": 0.05436852, "step": 14262 }, { "epoch": 28.526, "grad_norm": 1.2430596351623535, "learning_rate": 2e-05, "loss": 0.04819159, "step": 14263 }, { "epoch": 28.528, "grad_norm": 1.2030974626541138, "learning_rate": 2e-05, "loss": 0.04276945, "step": 14264 }, { "epoch": 28.53, "grad_norm": 1.314576506614685, "learning_rate": 2e-05, "loss": 0.0392509, "step": 14265 }, { "epoch": 28.532, "grad_norm": 1.2434229850769043, "learning_rate": 2e-05, "loss": 0.04280588, "step": 14266 }, { "epoch": 28.534, "grad_norm": 1.103236198425293, "learning_rate": 2e-05, "loss": 0.04434071, "step": 14267 }, { "epoch": 28.536, "grad_norm": 2.219329357147217, "learning_rate": 2e-05, "loss": 0.04926879, "step": 14268 }, { "epoch": 28.538, "grad_norm": 0.9897520542144775, "learning_rate": 2e-05, "loss": 0.05005113, "step": 14269 }, { "epoch": 28.54, "grad_norm": 4.43552303314209, "learning_rate": 2e-05, "loss": 0.05165307, "step": 14270 }, { "epoch": 28.542, "grad_norm": 1.0372741222381592, "learning_rate": 2e-05, "loss": 0.04444811, "step": 14271 }, { "epoch": 28.544, "grad_norm": 1.1552242040634155, "learning_rate": 2e-05, "loss": 0.05945187, "step": 14272 }, { "epoch": 28.546, "grad_norm": 1.1484119892120361, "learning_rate": 2e-05, "loss": 0.03236457, "step": 14273 }, { "epoch": 28.548000000000002, "grad_norm": 1.337377667427063, "learning_rate": 2e-05, "loss": 0.049745, "step": 14274 }, { "epoch": 28.55, "grad_norm": 1.8503460884094238, "learning_rate": 2e-05, "loss": 0.06147053, "step": 14275 }, { "epoch": 28.552, "grad_norm": 1.2620158195495605, "learning_rate": 2e-05, "loss": 0.03967803, "step": 14276 }, { "epoch": 28.554, "grad_norm": 1.4288681745529175, "learning_rate": 2e-05, "loss": 0.05816342, "step": 14277 }, { "epoch": 28.556, "grad_norm": 1.4467310905456543, "learning_rate": 2e-05, "loss": 0.07627648, "step": 14278 }, { "epoch": 28.558, "grad_norm": 1.1054469347000122, "learning_rate": 2e-05, "loss": 0.03413836, "step": 14279 }, { "epoch": 28.56, "grad_norm": 1.1141843795776367, "learning_rate": 2e-05, "loss": 0.04112323, "step": 14280 }, { "epoch": 28.562, "grad_norm": 1.0520139932632446, "learning_rate": 2e-05, "loss": 0.04258643, "step": 14281 }, { "epoch": 28.564, "grad_norm": 1.0116606950759888, "learning_rate": 2e-05, "loss": 0.03528443, "step": 14282 }, { "epoch": 28.566, "grad_norm": 1.0243785381317139, "learning_rate": 2e-05, "loss": 0.03781324, "step": 14283 }, { "epoch": 28.568, "grad_norm": 1.734710931777954, "learning_rate": 2e-05, "loss": 0.07294004, "step": 14284 }, { "epoch": 28.57, "grad_norm": 1.0915411710739136, "learning_rate": 2e-05, "loss": 0.03967145, "step": 14285 }, { "epoch": 28.572, "grad_norm": 1.0831040143966675, "learning_rate": 2e-05, "loss": 0.04613046, "step": 14286 }, { "epoch": 28.574, "grad_norm": 1.7763166427612305, "learning_rate": 2e-05, "loss": 0.04819128, "step": 14287 }, { "epoch": 28.576, "grad_norm": 1.4782124757766724, "learning_rate": 2e-05, "loss": 0.05433568, "step": 14288 }, { "epoch": 28.578, "grad_norm": 1.1395972967147827, "learning_rate": 2e-05, "loss": 0.05151636, "step": 14289 }, { "epoch": 28.58, "grad_norm": 1.019631266593933, "learning_rate": 2e-05, "loss": 0.04353862, "step": 14290 }, { "epoch": 28.582, "grad_norm": 1.152753472328186, "learning_rate": 2e-05, "loss": 0.04237518, "step": 14291 }, { "epoch": 28.584, "grad_norm": 0.9378215670585632, "learning_rate": 2e-05, "loss": 0.03555574, "step": 14292 }, { "epoch": 28.586, "grad_norm": 1.475366234779358, "learning_rate": 2e-05, "loss": 0.05275155, "step": 14293 }, { "epoch": 28.588, "grad_norm": 1.270757794380188, "learning_rate": 2e-05, "loss": 0.04754128, "step": 14294 }, { "epoch": 28.59, "grad_norm": 1.3979893922805786, "learning_rate": 2e-05, "loss": 0.06471662, "step": 14295 }, { "epoch": 28.592, "grad_norm": 1.1572070121765137, "learning_rate": 2e-05, "loss": 0.05989663, "step": 14296 }, { "epoch": 28.594, "grad_norm": 1.3296196460723877, "learning_rate": 2e-05, "loss": 0.05947623, "step": 14297 }, { "epoch": 28.596, "grad_norm": 1.1352485418319702, "learning_rate": 2e-05, "loss": 0.06240717, "step": 14298 }, { "epoch": 28.598, "grad_norm": 1.4964193105697632, "learning_rate": 2e-05, "loss": 0.06334744, "step": 14299 }, { "epoch": 28.6, "grad_norm": 1.033329963684082, "learning_rate": 2e-05, "loss": 0.03799847, "step": 14300 }, { "epoch": 28.602, "grad_norm": 1.121423363685608, "learning_rate": 2e-05, "loss": 0.04782546, "step": 14301 }, { "epoch": 28.604, "grad_norm": 1.3283140659332275, "learning_rate": 2e-05, "loss": 0.04062272, "step": 14302 }, { "epoch": 28.606, "grad_norm": 1.3512027263641357, "learning_rate": 2e-05, "loss": 0.0430729, "step": 14303 }, { "epoch": 28.608, "grad_norm": 1.166361689567566, "learning_rate": 2e-05, "loss": 0.04468785, "step": 14304 }, { "epoch": 28.61, "grad_norm": 1.7289087772369385, "learning_rate": 2e-05, "loss": 0.05869601, "step": 14305 }, { "epoch": 28.612, "grad_norm": 1.2280229330062866, "learning_rate": 2e-05, "loss": 0.0445801, "step": 14306 }, { "epoch": 28.614, "grad_norm": 1.408436894416809, "learning_rate": 2e-05, "loss": 0.04638, "step": 14307 }, { "epoch": 28.616, "grad_norm": 1.141066074371338, "learning_rate": 2e-05, "loss": 0.04608263, "step": 14308 }, { "epoch": 28.618, "grad_norm": 1.4153738021850586, "learning_rate": 2e-05, "loss": 0.0519913, "step": 14309 }, { "epoch": 28.62, "grad_norm": 1.0140947103500366, "learning_rate": 2e-05, "loss": 0.04462872, "step": 14310 }, { "epoch": 28.622, "grad_norm": 1.0564391613006592, "learning_rate": 2e-05, "loss": 0.04239165, "step": 14311 }, { "epoch": 28.624, "grad_norm": 1.1021504402160645, "learning_rate": 2e-05, "loss": 0.05415932, "step": 14312 }, { "epoch": 28.626, "grad_norm": 1.1620004177093506, "learning_rate": 2e-05, "loss": 0.04092529, "step": 14313 }, { "epoch": 28.628, "grad_norm": 1.3989795446395874, "learning_rate": 2e-05, "loss": 0.04282364, "step": 14314 }, { "epoch": 28.63, "grad_norm": 1.1773464679718018, "learning_rate": 2e-05, "loss": 0.0368087, "step": 14315 }, { "epoch": 28.632, "grad_norm": 1.8446012735366821, "learning_rate": 2e-05, "loss": 0.06626733, "step": 14316 }, { "epoch": 28.634, "grad_norm": 0.9078043103218079, "learning_rate": 2e-05, "loss": 0.03762669, "step": 14317 }, { "epoch": 28.636, "grad_norm": 0.9992712736129761, "learning_rate": 2e-05, "loss": 0.04445425, "step": 14318 }, { "epoch": 28.638, "grad_norm": 1.1327447891235352, "learning_rate": 2e-05, "loss": 0.03851479, "step": 14319 }, { "epoch": 28.64, "grad_norm": 1.0853513479232788, "learning_rate": 2e-05, "loss": 0.0469379, "step": 14320 }, { "epoch": 28.642, "grad_norm": 1.291372299194336, "learning_rate": 2e-05, "loss": 0.05949248, "step": 14321 }, { "epoch": 28.644, "grad_norm": 2.6608824729919434, "learning_rate": 2e-05, "loss": 0.07719251, "step": 14322 }, { "epoch": 28.646, "grad_norm": 1.0239036083221436, "learning_rate": 2e-05, "loss": 0.05067696, "step": 14323 }, { "epoch": 28.648, "grad_norm": 2.6112844944000244, "learning_rate": 2e-05, "loss": 0.05457635, "step": 14324 }, { "epoch": 28.65, "grad_norm": 1.7143548727035522, "learning_rate": 2e-05, "loss": 0.04329626, "step": 14325 }, { "epoch": 28.652, "grad_norm": 1.0481735467910767, "learning_rate": 2e-05, "loss": 0.04226059, "step": 14326 }, { "epoch": 28.654, "grad_norm": 1.7270582914352417, "learning_rate": 2e-05, "loss": 0.06254755, "step": 14327 }, { "epoch": 28.656, "grad_norm": 1.0991578102111816, "learning_rate": 2e-05, "loss": 0.0415943, "step": 14328 }, { "epoch": 28.658, "grad_norm": 1.1321725845336914, "learning_rate": 2e-05, "loss": 0.04647674, "step": 14329 }, { "epoch": 28.66, "grad_norm": 2.168670177459717, "learning_rate": 2e-05, "loss": 0.06582256, "step": 14330 }, { "epoch": 28.662, "grad_norm": 1.0573934316635132, "learning_rate": 2e-05, "loss": 0.0433431, "step": 14331 }, { "epoch": 28.664, "grad_norm": 1.0872145891189575, "learning_rate": 2e-05, "loss": 0.04302694, "step": 14332 }, { "epoch": 28.666, "grad_norm": 1.1373875141143799, "learning_rate": 2e-05, "loss": 0.05047703, "step": 14333 }, { "epoch": 28.668, "grad_norm": 2.043168067932129, "learning_rate": 2e-05, "loss": 0.04606549, "step": 14334 }, { "epoch": 28.67, "grad_norm": 1.1146574020385742, "learning_rate": 2e-05, "loss": 0.05331158, "step": 14335 }, { "epoch": 28.672, "grad_norm": 1.2046722173690796, "learning_rate": 2e-05, "loss": 0.0526657, "step": 14336 }, { "epoch": 28.674, "grad_norm": 1.27951180934906, "learning_rate": 2e-05, "loss": 0.04821653, "step": 14337 }, { "epoch": 28.676, "grad_norm": 1.1907305717468262, "learning_rate": 2e-05, "loss": 0.0418507, "step": 14338 }, { "epoch": 28.678, "grad_norm": 1.7656348943710327, "learning_rate": 2e-05, "loss": 0.06238958, "step": 14339 }, { "epoch": 28.68, "grad_norm": 1.123564600944519, "learning_rate": 2e-05, "loss": 0.05293564, "step": 14340 }, { "epoch": 28.682, "grad_norm": 1.4332327842712402, "learning_rate": 2e-05, "loss": 0.05154917, "step": 14341 }, { "epoch": 28.684, "grad_norm": 1.3120306730270386, "learning_rate": 2e-05, "loss": 0.05033649, "step": 14342 }, { "epoch": 28.686, "grad_norm": 2.110966444015503, "learning_rate": 2e-05, "loss": 0.0683824, "step": 14343 }, { "epoch": 28.688, "grad_norm": 1.1649389266967773, "learning_rate": 2e-05, "loss": 0.04631986, "step": 14344 }, { "epoch": 28.69, "grad_norm": 1.221285343170166, "learning_rate": 2e-05, "loss": 0.05183788, "step": 14345 }, { "epoch": 28.692, "grad_norm": 0.9967836141586304, "learning_rate": 2e-05, "loss": 0.03994695, "step": 14346 }, { "epoch": 28.694, "grad_norm": 0.9123416543006897, "learning_rate": 2e-05, "loss": 0.04623649, "step": 14347 }, { "epoch": 28.696, "grad_norm": 2.066378116607666, "learning_rate": 2e-05, "loss": 0.06183371, "step": 14348 }, { "epoch": 28.698, "grad_norm": 1.1055715084075928, "learning_rate": 2e-05, "loss": 0.03918929, "step": 14349 }, { "epoch": 28.7, "grad_norm": 0.9038406610488892, "learning_rate": 2e-05, "loss": 0.03372643, "step": 14350 }, { "epoch": 28.701999999999998, "grad_norm": 1.5504449605941772, "learning_rate": 2e-05, "loss": 0.06298279, "step": 14351 }, { "epoch": 28.704, "grad_norm": 1.1415177583694458, "learning_rate": 2e-05, "loss": 0.05686108, "step": 14352 }, { "epoch": 28.706, "grad_norm": 1.2425048351287842, "learning_rate": 2e-05, "loss": 0.04860462, "step": 14353 }, { "epoch": 28.708, "grad_norm": 1.4368317127227783, "learning_rate": 2e-05, "loss": 0.06575515, "step": 14354 }, { "epoch": 28.71, "grad_norm": 1.2236424684524536, "learning_rate": 2e-05, "loss": 0.04923696, "step": 14355 }, { "epoch": 28.712, "grad_norm": 1.038508415222168, "learning_rate": 2e-05, "loss": 0.04665044, "step": 14356 }, { "epoch": 28.714, "grad_norm": 1.472262978553772, "learning_rate": 2e-05, "loss": 0.05734815, "step": 14357 }, { "epoch": 28.716, "grad_norm": 1.0540971755981445, "learning_rate": 2e-05, "loss": 0.04578978, "step": 14358 }, { "epoch": 28.718, "grad_norm": 1.6080116033554077, "learning_rate": 2e-05, "loss": 0.04620618, "step": 14359 }, { "epoch": 28.72, "grad_norm": 1.9651175737380981, "learning_rate": 2e-05, "loss": 0.0554799, "step": 14360 }, { "epoch": 28.722, "grad_norm": 1.394176959991455, "learning_rate": 2e-05, "loss": 0.06383412, "step": 14361 }, { "epoch": 28.724, "grad_norm": 1.1381794214248657, "learning_rate": 2e-05, "loss": 0.05216906, "step": 14362 }, { "epoch": 28.726, "grad_norm": 0.9953507781028748, "learning_rate": 2e-05, "loss": 0.04028524, "step": 14363 }, { "epoch": 28.728, "grad_norm": 1.2503029108047485, "learning_rate": 2e-05, "loss": 0.05787265, "step": 14364 }, { "epoch": 28.73, "grad_norm": 1.2147308588027954, "learning_rate": 2e-05, "loss": 0.06940012, "step": 14365 }, { "epoch": 28.732, "grad_norm": 0.9646700620651245, "learning_rate": 2e-05, "loss": 0.051755, "step": 14366 }, { "epoch": 28.734, "grad_norm": 1.1635664701461792, "learning_rate": 2e-05, "loss": 0.04023926, "step": 14367 }, { "epoch": 28.736, "grad_norm": 0.8383843302726746, "learning_rate": 2e-05, "loss": 0.03957512, "step": 14368 }, { "epoch": 28.738, "grad_norm": 2.2686729431152344, "learning_rate": 2e-05, "loss": 0.05979198, "step": 14369 }, { "epoch": 28.74, "grad_norm": 1.1249829530715942, "learning_rate": 2e-05, "loss": 0.04337114, "step": 14370 }, { "epoch": 28.742, "grad_norm": 1.9169130325317383, "learning_rate": 2e-05, "loss": 0.06050321, "step": 14371 }, { "epoch": 28.744, "grad_norm": 1.0821468830108643, "learning_rate": 2e-05, "loss": 0.04710738, "step": 14372 }, { "epoch": 28.746, "grad_norm": 2.381197214126587, "learning_rate": 2e-05, "loss": 0.04772149, "step": 14373 }, { "epoch": 28.748, "grad_norm": 0.8552329540252686, "learning_rate": 2e-05, "loss": 0.0367963, "step": 14374 }, { "epoch": 28.75, "grad_norm": 1.0977646112442017, "learning_rate": 2e-05, "loss": 0.05155388, "step": 14375 }, { "epoch": 28.752, "grad_norm": 1.5170761346817017, "learning_rate": 2e-05, "loss": 0.07114626, "step": 14376 }, { "epoch": 28.754, "grad_norm": 2.400148391723633, "learning_rate": 2e-05, "loss": 0.05175724, "step": 14377 }, { "epoch": 28.756, "grad_norm": 1.0229874849319458, "learning_rate": 2e-05, "loss": 0.04067989, "step": 14378 }, { "epoch": 28.758, "grad_norm": 1.2624506950378418, "learning_rate": 2e-05, "loss": 0.05834974, "step": 14379 }, { "epoch": 28.76, "grad_norm": 1.1263177394866943, "learning_rate": 2e-05, "loss": 0.04429045, "step": 14380 }, { "epoch": 28.762, "grad_norm": 1.7576185464859009, "learning_rate": 2e-05, "loss": 0.06904487, "step": 14381 }, { "epoch": 28.764, "grad_norm": 1.0562002658843994, "learning_rate": 2e-05, "loss": 0.04999235, "step": 14382 }, { "epoch": 28.766, "grad_norm": 1.446751594543457, "learning_rate": 2e-05, "loss": 0.0664718, "step": 14383 }, { "epoch": 28.768, "grad_norm": 1.4562450647354126, "learning_rate": 2e-05, "loss": 0.03974837, "step": 14384 }, { "epoch": 28.77, "grad_norm": 1.2023104429244995, "learning_rate": 2e-05, "loss": 0.05896343, "step": 14385 }, { "epoch": 28.772, "grad_norm": 1.383141040802002, "learning_rate": 2e-05, "loss": 0.05919487, "step": 14386 }, { "epoch": 28.774, "grad_norm": 1.3116073608398438, "learning_rate": 2e-05, "loss": 0.06527594, "step": 14387 }, { "epoch": 28.776, "grad_norm": 1.3542439937591553, "learning_rate": 2e-05, "loss": 0.06707092, "step": 14388 }, { "epoch": 28.778, "grad_norm": 1.1312203407287598, "learning_rate": 2e-05, "loss": 0.06443762, "step": 14389 }, { "epoch": 28.78, "grad_norm": 1.3685983419418335, "learning_rate": 2e-05, "loss": 0.06890296, "step": 14390 }, { "epoch": 28.782, "grad_norm": 1.105054259300232, "learning_rate": 2e-05, "loss": 0.05117954, "step": 14391 }, { "epoch": 28.784, "grad_norm": 1.0170961618423462, "learning_rate": 2e-05, "loss": 0.04466943, "step": 14392 }, { "epoch": 28.786, "grad_norm": 2.2528724670410156, "learning_rate": 2e-05, "loss": 0.05923943, "step": 14393 }, { "epoch": 28.788, "grad_norm": 1.7997747659683228, "learning_rate": 2e-05, "loss": 0.05873613, "step": 14394 }, { "epoch": 28.79, "grad_norm": 1.1069847345352173, "learning_rate": 2e-05, "loss": 0.05168425, "step": 14395 }, { "epoch": 28.792, "grad_norm": 1.8673585653305054, "learning_rate": 2e-05, "loss": 0.04908233, "step": 14396 }, { "epoch": 28.794, "grad_norm": 0.9609074592590332, "learning_rate": 2e-05, "loss": 0.03519578, "step": 14397 }, { "epoch": 28.796, "grad_norm": 1.1476932764053345, "learning_rate": 2e-05, "loss": 0.04545148, "step": 14398 }, { "epoch": 28.798000000000002, "grad_norm": 1.051865816116333, "learning_rate": 2e-05, "loss": 0.04343429, "step": 14399 }, { "epoch": 28.8, "grad_norm": 1.3605501651763916, "learning_rate": 2e-05, "loss": 0.06554951, "step": 14400 }, { "epoch": 28.802, "grad_norm": 1.3182153701782227, "learning_rate": 2e-05, "loss": 0.07407647, "step": 14401 }, { "epoch": 28.804, "grad_norm": 1.3018434047698975, "learning_rate": 2e-05, "loss": 0.06070177, "step": 14402 }, { "epoch": 28.806, "grad_norm": 1.0453804731369019, "learning_rate": 2e-05, "loss": 0.04236388, "step": 14403 }, { "epoch": 28.808, "grad_norm": 1.6198655366897583, "learning_rate": 2e-05, "loss": 0.05726817, "step": 14404 }, { "epoch": 28.81, "grad_norm": 1.0208441019058228, "learning_rate": 2e-05, "loss": 0.04299459, "step": 14405 }, { "epoch": 28.812, "grad_norm": 0.9605217576026917, "learning_rate": 2e-05, "loss": 0.03797925, "step": 14406 }, { "epoch": 28.814, "grad_norm": 1.129928708076477, "learning_rate": 2e-05, "loss": 0.04700078, "step": 14407 }, { "epoch": 28.816, "grad_norm": 2.547285318374634, "learning_rate": 2e-05, "loss": 0.05196394, "step": 14408 }, { "epoch": 28.818, "grad_norm": 0.9673292636871338, "learning_rate": 2e-05, "loss": 0.04520608, "step": 14409 }, { "epoch": 28.82, "grad_norm": 1.1572656631469727, "learning_rate": 2e-05, "loss": 0.0491136, "step": 14410 }, { "epoch": 28.822, "grad_norm": 1.167949914932251, "learning_rate": 2e-05, "loss": 0.0398555, "step": 14411 }, { "epoch": 28.824, "grad_norm": 1.358520746231079, "learning_rate": 2e-05, "loss": 0.06121388, "step": 14412 }, { "epoch": 28.826, "grad_norm": 1.1100794076919556, "learning_rate": 2e-05, "loss": 0.04527421, "step": 14413 }, { "epoch": 28.828, "grad_norm": 0.9927181005477905, "learning_rate": 2e-05, "loss": 0.03984749, "step": 14414 }, { "epoch": 28.83, "grad_norm": 1.1248859167099, "learning_rate": 2e-05, "loss": 0.0589344, "step": 14415 }, { "epoch": 28.832, "grad_norm": 1.2787030935287476, "learning_rate": 2e-05, "loss": 0.03260321, "step": 14416 }, { "epoch": 28.834, "grad_norm": 1.2994235754013062, "learning_rate": 2e-05, "loss": 0.05486408, "step": 14417 }, { "epoch": 28.836, "grad_norm": 1.1073411703109741, "learning_rate": 2e-05, "loss": 0.04296948, "step": 14418 }, { "epoch": 28.838, "grad_norm": 6.582603454589844, "learning_rate": 2e-05, "loss": 0.05329283, "step": 14419 }, { "epoch": 28.84, "grad_norm": 2.0256950855255127, "learning_rate": 2e-05, "loss": 0.06914046, "step": 14420 }, { "epoch": 28.842, "grad_norm": 1.162785291671753, "learning_rate": 2e-05, "loss": 0.05226928, "step": 14421 }, { "epoch": 28.844, "grad_norm": 1.5783532857894897, "learning_rate": 2e-05, "loss": 0.05091108, "step": 14422 }, { "epoch": 28.846, "grad_norm": 1.1526124477386475, "learning_rate": 2e-05, "loss": 0.04739578, "step": 14423 }, { "epoch": 28.848, "grad_norm": 0.9885454773902893, "learning_rate": 2e-05, "loss": 0.04144773, "step": 14424 }, { "epoch": 28.85, "grad_norm": 2.1791322231292725, "learning_rate": 2e-05, "loss": 0.03768017, "step": 14425 }, { "epoch": 28.852, "grad_norm": 1.2882776260375977, "learning_rate": 2e-05, "loss": 0.05305962, "step": 14426 }, { "epoch": 28.854, "grad_norm": 1.0187267065048218, "learning_rate": 2e-05, "loss": 0.04496597, "step": 14427 }, { "epoch": 28.856, "grad_norm": 0.9752914905548096, "learning_rate": 2e-05, "loss": 0.0357612, "step": 14428 }, { "epoch": 28.858, "grad_norm": 1.1194292306900024, "learning_rate": 2e-05, "loss": 0.05062325, "step": 14429 }, { "epoch": 28.86, "grad_norm": 1.2130147218704224, "learning_rate": 2e-05, "loss": 0.05091491, "step": 14430 }, { "epoch": 28.862, "grad_norm": 1.0704991817474365, "learning_rate": 2e-05, "loss": 0.04797363, "step": 14431 }, { "epoch": 28.864, "grad_norm": 1.070533037185669, "learning_rate": 2e-05, "loss": 0.04777229, "step": 14432 }, { "epoch": 28.866, "grad_norm": 1.4517334699630737, "learning_rate": 2e-05, "loss": 0.04032, "step": 14433 }, { "epoch": 28.868, "grad_norm": 1.1186455488204956, "learning_rate": 2e-05, "loss": 0.05210125, "step": 14434 }, { "epoch": 28.87, "grad_norm": 2.7450759410858154, "learning_rate": 2e-05, "loss": 0.06522125, "step": 14435 }, { "epoch": 28.872, "grad_norm": 1.2842767238616943, "learning_rate": 2e-05, "loss": 0.06017283, "step": 14436 }, { "epoch": 28.874, "grad_norm": 1.143119215965271, "learning_rate": 2e-05, "loss": 0.05266198, "step": 14437 }, { "epoch": 28.876, "grad_norm": 1.080701231956482, "learning_rate": 2e-05, "loss": 0.04235354, "step": 14438 }, { "epoch": 28.878, "grad_norm": 2.0926880836486816, "learning_rate": 2e-05, "loss": 0.05445161, "step": 14439 }, { "epoch": 28.88, "grad_norm": 2.0756032466888428, "learning_rate": 2e-05, "loss": 0.0516451, "step": 14440 }, { "epoch": 28.882, "grad_norm": 2.163771390914917, "learning_rate": 2e-05, "loss": 0.07149144, "step": 14441 }, { "epoch": 28.884, "grad_norm": 0.7748874425888062, "learning_rate": 2e-05, "loss": 0.02597756, "step": 14442 }, { "epoch": 28.886, "grad_norm": 1.1359424591064453, "learning_rate": 2e-05, "loss": 0.0504017, "step": 14443 }, { "epoch": 28.888, "grad_norm": 1.0414564609527588, "learning_rate": 2e-05, "loss": 0.03726147, "step": 14444 }, { "epoch": 28.89, "grad_norm": 1.5606720447540283, "learning_rate": 2e-05, "loss": 0.0590294, "step": 14445 }, { "epoch": 28.892, "grad_norm": 0.9526653289794922, "learning_rate": 2e-05, "loss": 0.04286746, "step": 14446 }, { "epoch": 28.894, "grad_norm": 1.368575096130371, "learning_rate": 2e-05, "loss": 0.05439974, "step": 14447 }, { "epoch": 28.896, "grad_norm": 1.4041539430618286, "learning_rate": 2e-05, "loss": 0.05664683, "step": 14448 }, { "epoch": 28.898, "grad_norm": 1.407791256904602, "learning_rate": 2e-05, "loss": 0.05042948, "step": 14449 }, { "epoch": 28.9, "grad_norm": 1.0279254913330078, "learning_rate": 2e-05, "loss": 0.04005009, "step": 14450 }, { "epoch": 28.902, "grad_norm": 1.4114142656326294, "learning_rate": 2e-05, "loss": 0.04705514, "step": 14451 }, { "epoch": 28.904, "grad_norm": 1.53988516330719, "learning_rate": 2e-05, "loss": 0.06968574, "step": 14452 }, { "epoch": 28.906, "grad_norm": 1.947677493095398, "learning_rate": 2e-05, "loss": 0.05959962, "step": 14453 }, { "epoch": 28.908, "grad_norm": 1.4355823993682861, "learning_rate": 2e-05, "loss": 0.06722293, "step": 14454 }, { "epoch": 28.91, "grad_norm": 1.0307726860046387, "learning_rate": 2e-05, "loss": 0.04445631, "step": 14455 }, { "epoch": 28.912, "grad_norm": 0.980252742767334, "learning_rate": 2e-05, "loss": 0.04236844, "step": 14456 }, { "epoch": 28.914, "grad_norm": 1.4885029792785645, "learning_rate": 2e-05, "loss": 0.05278682, "step": 14457 }, { "epoch": 28.916, "grad_norm": 1.1298633813858032, "learning_rate": 2e-05, "loss": 0.05039386, "step": 14458 }, { "epoch": 28.918, "grad_norm": 1.348698616027832, "learning_rate": 2e-05, "loss": 0.04628156, "step": 14459 }, { "epoch": 28.92, "grad_norm": 1.261788249015808, "learning_rate": 2e-05, "loss": 0.06371572, "step": 14460 }, { "epoch": 28.922, "grad_norm": 1.0611457824707031, "learning_rate": 2e-05, "loss": 0.04659911, "step": 14461 }, { "epoch": 28.924, "grad_norm": 1.2571258544921875, "learning_rate": 2e-05, "loss": 0.05672054, "step": 14462 }, { "epoch": 28.926, "grad_norm": 1.739106297492981, "learning_rate": 2e-05, "loss": 0.0530343, "step": 14463 }, { "epoch": 28.928, "grad_norm": 1.0750688314437866, "learning_rate": 2e-05, "loss": 0.04423099, "step": 14464 }, { "epoch": 28.93, "grad_norm": 1.0634437799453735, "learning_rate": 2e-05, "loss": 0.05261937, "step": 14465 }, { "epoch": 28.932, "grad_norm": 1.2713947296142578, "learning_rate": 2e-05, "loss": 0.04110869, "step": 14466 }, { "epoch": 28.934, "grad_norm": 1.677098274230957, "learning_rate": 2e-05, "loss": 0.0479702, "step": 14467 }, { "epoch": 28.936, "grad_norm": 1.2165437936782837, "learning_rate": 2e-05, "loss": 0.04104428, "step": 14468 }, { "epoch": 28.938, "grad_norm": 0.9581998586654663, "learning_rate": 2e-05, "loss": 0.03263333, "step": 14469 }, { "epoch": 28.94, "grad_norm": 1.067044973373413, "learning_rate": 2e-05, "loss": 0.04231325, "step": 14470 }, { "epoch": 28.942, "grad_norm": 1.5720524787902832, "learning_rate": 2e-05, "loss": 0.05206659, "step": 14471 }, { "epoch": 28.944, "grad_norm": 1.424851894378662, "learning_rate": 2e-05, "loss": 0.07767393, "step": 14472 }, { "epoch": 28.946, "grad_norm": 1.2293959856033325, "learning_rate": 2e-05, "loss": 0.05438047, "step": 14473 }, { "epoch": 28.948, "grad_norm": 1.1583430767059326, "learning_rate": 2e-05, "loss": 0.06535377, "step": 14474 }, { "epoch": 28.95, "grad_norm": 1.7023050785064697, "learning_rate": 2e-05, "loss": 0.06043659, "step": 14475 }, { "epoch": 28.951999999999998, "grad_norm": 1.386309027671814, "learning_rate": 2e-05, "loss": 0.04968517, "step": 14476 }, { "epoch": 28.954, "grad_norm": 1.5965526103973389, "learning_rate": 2e-05, "loss": 0.05485709, "step": 14477 }, { "epoch": 28.956, "grad_norm": 1.2020167112350464, "learning_rate": 2e-05, "loss": 0.04717738, "step": 14478 }, { "epoch": 28.958, "grad_norm": 1.4906593561172485, "learning_rate": 2e-05, "loss": 0.07477991, "step": 14479 }, { "epoch": 28.96, "grad_norm": 1.6102551221847534, "learning_rate": 2e-05, "loss": 0.05819091, "step": 14480 }, { "epoch": 28.962, "grad_norm": 1.1162158250808716, "learning_rate": 2e-05, "loss": 0.05569955, "step": 14481 }, { "epoch": 28.964, "grad_norm": 1.1727420091629028, "learning_rate": 2e-05, "loss": 0.04410589, "step": 14482 }, { "epoch": 28.966, "grad_norm": 1.1126596927642822, "learning_rate": 2e-05, "loss": 0.050468, "step": 14483 }, { "epoch": 28.968, "grad_norm": 1.4743733406066895, "learning_rate": 2e-05, "loss": 0.05779273, "step": 14484 }, { "epoch": 28.97, "grad_norm": 1.7813376188278198, "learning_rate": 2e-05, "loss": 0.06212969, "step": 14485 }, { "epoch": 28.972, "grad_norm": 1.1595388650894165, "learning_rate": 2e-05, "loss": 0.05398744, "step": 14486 }, { "epoch": 28.974, "grad_norm": 2.424596071243286, "learning_rate": 2e-05, "loss": 0.07104319, "step": 14487 }, { "epoch": 28.976, "grad_norm": 1.1801377534866333, "learning_rate": 2e-05, "loss": 0.04399161, "step": 14488 }, { "epoch": 28.978, "grad_norm": 1.4197784662246704, "learning_rate": 2e-05, "loss": 0.04760091, "step": 14489 }, { "epoch": 28.98, "grad_norm": 1.5055569410324097, "learning_rate": 2e-05, "loss": 0.05355086, "step": 14490 }, { "epoch": 28.982, "grad_norm": 1.965364933013916, "learning_rate": 2e-05, "loss": 0.0732315, "step": 14491 }, { "epoch": 28.984, "grad_norm": 1.301583170890808, "learning_rate": 2e-05, "loss": 0.05943606, "step": 14492 }, { "epoch": 28.986, "grad_norm": 1.713722825050354, "learning_rate": 2e-05, "loss": 0.05918575, "step": 14493 }, { "epoch": 28.988, "grad_norm": 1.4282037019729614, "learning_rate": 2e-05, "loss": 0.0530496, "step": 14494 }, { "epoch": 28.99, "grad_norm": 2.1234238147735596, "learning_rate": 2e-05, "loss": 0.04770797, "step": 14495 }, { "epoch": 28.992, "grad_norm": 1.765354871749878, "learning_rate": 2e-05, "loss": 0.04876926, "step": 14496 }, { "epoch": 28.994, "grad_norm": 0.9834134578704834, "learning_rate": 2e-05, "loss": 0.0446346, "step": 14497 }, { "epoch": 28.996, "grad_norm": 1.0510073900222778, "learning_rate": 2e-05, "loss": 0.05039446, "step": 14498 }, { "epoch": 28.998, "grad_norm": 1.1189841032028198, "learning_rate": 2e-05, "loss": 0.06130198, "step": 14499 }, { "epoch": 29.0, "grad_norm": 1.3341281414031982, "learning_rate": 2e-05, "loss": 0.04612795, "step": 14500 }, { "epoch": 29.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.992, "Equal_2": 0.9800399201596807, "Equal_3": 0.9900199600798403, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9859719438877755, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.988, "Perpendicular_1": 0.994, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8827655310621243, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.9892000000000001, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 224.78, "eval_samples_per_second": 46.712, "eval_steps_per_second": 0.934, "step": 14500 }, { "epoch": 29.002, "grad_norm": 1.0085065364837646, "learning_rate": 2e-05, "loss": 0.05397366, "step": 14501 }, { "epoch": 29.004, "grad_norm": 1.593377709388733, "learning_rate": 2e-05, "loss": 0.05960596, "step": 14502 }, { "epoch": 29.006, "grad_norm": 1.202800989151001, "learning_rate": 2e-05, "loss": 0.05681243, "step": 14503 }, { "epoch": 29.008, "grad_norm": 1.1617650985717773, "learning_rate": 2e-05, "loss": 0.04067148, "step": 14504 }, { "epoch": 29.01, "grad_norm": 1.0620235204696655, "learning_rate": 2e-05, "loss": 0.04951955, "step": 14505 }, { "epoch": 29.012, "grad_norm": 1.0893293619155884, "learning_rate": 2e-05, "loss": 0.04509952, "step": 14506 }, { "epoch": 29.014, "grad_norm": 1.3881046772003174, "learning_rate": 2e-05, "loss": 0.03806736, "step": 14507 }, { "epoch": 29.016, "grad_norm": 0.9388900399208069, "learning_rate": 2e-05, "loss": 0.032476, "step": 14508 }, { "epoch": 29.018, "grad_norm": 1.1282795667648315, "learning_rate": 2e-05, "loss": 0.04318281, "step": 14509 }, { "epoch": 29.02, "grad_norm": 1.435393214225769, "learning_rate": 2e-05, "loss": 0.03717603, "step": 14510 }, { "epoch": 29.022, "grad_norm": 1.0801018476486206, "learning_rate": 2e-05, "loss": 0.04077236, "step": 14511 }, { "epoch": 29.024, "grad_norm": 1.2452207803726196, "learning_rate": 2e-05, "loss": 0.05042613, "step": 14512 }, { "epoch": 29.026, "grad_norm": 1.4742083549499512, "learning_rate": 2e-05, "loss": 0.04571113, "step": 14513 }, { "epoch": 29.028, "grad_norm": 1.6864469051361084, "learning_rate": 2e-05, "loss": 0.04096045, "step": 14514 }, { "epoch": 29.03, "grad_norm": 1.0924264192581177, "learning_rate": 2e-05, "loss": 0.0513719, "step": 14515 }, { "epoch": 29.032, "grad_norm": 0.9132217168807983, "learning_rate": 2e-05, "loss": 0.03668626, "step": 14516 }, { "epoch": 29.034, "grad_norm": 1.4167120456695557, "learning_rate": 2e-05, "loss": 0.0656193, "step": 14517 }, { "epoch": 29.036, "grad_norm": 2.0873804092407227, "learning_rate": 2e-05, "loss": 0.04734476, "step": 14518 }, { "epoch": 29.038, "grad_norm": 1.0035337209701538, "learning_rate": 2e-05, "loss": 0.03882658, "step": 14519 }, { "epoch": 29.04, "grad_norm": 2.175960063934326, "learning_rate": 2e-05, "loss": 0.06199027, "step": 14520 }, { "epoch": 29.042, "grad_norm": 1.2706019878387451, "learning_rate": 2e-05, "loss": 0.04938548, "step": 14521 }, { "epoch": 29.044, "grad_norm": 1.9009894132614136, "learning_rate": 2e-05, "loss": 0.06406468, "step": 14522 }, { "epoch": 29.046, "grad_norm": 7.850635051727295, "learning_rate": 2e-05, "loss": 0.05797679, "step": 14523 }, { "epoch": 29.048, "grad_norm": 1.0417027473449707, "learning_rate": 2e-05, "loss": 0.04766767, "step": 14524 }, { "epoch": 29.05, "grad_norm": 1.328752875328064, "learning_rate": 2e-05, "loss": 0.05423351, "step": 14525 }, { "epoch": 29.052, "grad_norm": 1.4133834838867188, "learning_rate": 2e-05, "loss": 0.04859059, "step": 14526 }, { "epoch": 29.054, "grad_norm": 2.0497994422912598, "learning_rate": 2e-05, "loss": 0.05579399, "step": 14527 }, { "epoch": 29.056, "grad_norm": 1.1131045818328857, "learning_rate": 2e-05, "loss": 0.04574338, "step": 14528 }, { "epoch": 29.058, "grad_norm": 1.221169114112854, "learning_rate": 2e-05, "loss": 0.04415936, "step": 14529 }, { "epoch": 29.06, "grad_norm": 2.2448322772979736, "learning_rate": 2e-05, "loss": 0.05031776, "step": 14530 }, { "epoch": 29.062, "grad_norm": 1.0660725831985474, "learning_rate": 2e-05, "loss": 0.04940791, "step": 14531 }, { "epoch": 29.064, "grad_norm": 0.8484916090965271, "learning_rate": 2e-05, "loss": 0.0465458, "step": 14532 }, { "epoch": 29.066, "grad_norm": 1.137338399887085, "learning_rate": 2e-05, "loss": 0.04930963, "step": 14533 }, { "epoch": 29.068, "grad_norm": 1.9656972885131836, "learning_rate": 2e-05, "loss": 0.04448313, "step": 14534 }, { "epoch": 29.07, "grad_norm": 2.019756555557251, "learning_rate": 2e-05, "loss": 0.04721318, "step": 14535 }, { "epoch": 29.072, "grad_norm": 1.4459341764450073, "learning_rate": 2e-05, "loss": 0.0446334, "step": 14536 }, { "epoch": 29.074, "grad_norm": 0.9447890520095825, "learning_rate": 2e-05, "loss": 0.03326405, "step": 14537 }, { "epoch": 29.076, "grad_norm": 1.1000421047210693, "learning_rate": 2e-05, "loss": 0.04740841, "step": 14538 }, { "epoch": 29.078, "grad_norm": 1.1910496950149536, "learning_rate": 2e-05, "loss": 0.05364776, "step": 14539 }, { "epoch": 29.08, "grad_norm": 0.9440063834190369, "learning_rate": 2e-05, "loss": 0.0363551, "step": 14540 }, { "epoch": 29.082, "grad_norm": 1.4920638799667358, "learning_rate": 2e-05, "loss": 0.04641712, "step": 14541 }, { "epoch": 29.084, "grad_norm": 1.1498253345489502, "learning_rate": 2e-05, "loss": 0.04408485, "step": 14542 }, { "epoch": 29.086, "grad_norm": 3.643134117126465, "learning_rate": 2e-05, "loss": 0.04884078, "step": 14543 }, { "epoch": 29.088, "grad_norm": 1.0444751977920532, "learning_rate": 2e-05, "loss": 0.04668932, "step": 14544 }, { "epoch": 29.09, "grad_norm": 1.3283710479736328, "learning_rate": 2e-05, "loss": 0.06005841, "step": 14545 }, { "epoch": 29.092, "grad_norm": 1.8041726350784302, "learning_rate": 2e-05, "loss": 0.05193277, "step": 14546 }, { "epoch": 29.094, "grad_norm": 0.9695797562599182, "learning_rate": 2e-05, "loss": 0.0382336, "step": 14547 }, { "epoch": 29.096, "grad_norm": 1.4497522115707397, "learning_rate": 2e-05, "loss": 0.07104689, "step": 14548 }, { "epoch": 29.098, "grad_norm": 1.1780309677124023, "learning_rate": 2e-05, "loss": 0.05272069, "step": 14549 }, { "epoch": 29.1, "grad_norm": 1.2211986780166626, "learning_rate": 2e-05, "loss": 0.04249704, "step": 14550 }, { "epoch": 29.102, "grad_norm": 1.1250183582305908, "learning_rate": 2e-05, "loss": 0.04716767, "step": 14551 }, { "epoch": 29.104, "grad_norm": 1.6281379461288452, "learning_rate": 2e-05, "loss": 0.05063011, "step": 14552 }, { "epoch": 29.106, "grad_norm": 1.5526717901229858, "learning_rate": 2e-05, "loss": 0.03385402, "step": 14553 }, { "epoch": 29.108, "grad_norm": 2.133297920227051, "learning_rate": 2e-05, "loss": 0.06163947, "step": 14554 }, { "epoch": 29.11, "grad_norm": 1.1487115621566772, "learning_rate": 2e-05, "loss": 0.03963308, "step": 14555 }, { "epoch": 29.112, "grad_norm": 0.8902839422225952, "learning_rate": 2e-05, "loss": 0.03324031, "step": 14556 }, { "epoch": 29.114, "grad_norm": 1.126344084739685, "learning_rate": 2e-05, "loss": 0.05435327, "step": 14557 }, { "epoch": 29.116, "grad_norm": 1.0338397026062012, "learning_rate": 2e-05, "loss": 0.05142828, "step": 14558 }, { "epoch": 29.118, "grad_norm": 2.41413950920105, "learning_rate": 2e-05, "loss": 0.05265431, "step": 14559 }, { "epoch": 29.12, "grad_norm": 2.076429843902588, "learning_rate": 2e-05, "loss": 0.0580758, "step": 14560 }, { "epoch": 29.122, "grad_norm": 1.3068650960922241, "learning_rate": 2e-05, "loss": 0.05418529, "step": 14561 }, { "epoch": 29.124, "grad_norm": 1.8168143033981323, "learning_rate": 2e-05, "loss": 0.05910784, "step": 14562 }, { "epoch": 29.126, "grad_norm": 1.2636431455612183, "learning_rate": 2e-05, "loss": 0.06078397, "step": 14563 }, { "epoch": 29.128, "grad_norm": 1.0113919973373413, "learning_rate": 2e-05, "loss": 0.03703035, "step": 14564 }, { "epoch": 29.13, "grad_norm": 1.1755342483520508, "learning_rate": 2e-05, "loss": 0.05936196, "step": 14565 }, { "epoch": 29.132, "grad_norm": 1.105843186378479, "learning_rate": 2e-05, "loss": 0.0556055, "step": 14566 }, { "epoch": 29.134, "grad_norm": 1.2856106758117676, "learning_rate": 2e-05, "loss": 0.04061612, "step": 14567 }, { "epoch": 29.136, "grad_norm": 1.5191739797592163, "learning_rate": 2e-05, "loss": 0.04828111, "step": 14568 }, { "epoch": 29.138, "grad_norm": 3.3277747631073, "learning_rate": 2e-05, "loss": 0.06681181, "step": 14569 }, { "epoch": 29.14, "grad_norm": 1.1025376319885254, "learning_rate": 2e-05, "loss": 0.04264204, "step": 14570 }, { "epoch": 29.142, "grad_norm": 2.1045796871185303, "learning_rate": 2e-05, "loss": 0.04907132, "step": 14571 }, { "epoch": 29.144, "grad_norm": 1.0413649082183838, "learning_rate": 2e-05, "loss": 0.04416391, "step": 14572 }, { "epoch": 29.146, "grad_norm": 1.8710259199142456, "learning_rate": 2e-05, "loss": 0.0451693, "step": 14573 }, { "epoch": 29.148, "grad_norm": 1.1522353887557983, "learning_rate": 2e-05, "loss": 0.05415502, "step": 14574 }, { "epoch": 29.15, "grad_norm": 1.1865557432174683, "learning_rate": 2e-05, "loss": 0.04653024, "step": 14575 }, { "epoch": 29.152, "grad_norm": 2.7270586490631104, "learning_rate": 2e-05, "loss": 0.06831232, "step": 14576 }, { "epoch": 29.154, "grad_norm": 1.2172502279281616, "learning_rate": 2e-05, "loss": 0.04930144, "step": 14577 }, { "epoch": 29.156, "grad_norm": 1.106791615486145, "learning_rate": 2e-05, "loss": 0.04709557, "step": 14578 }, { "epoch": 29.158, "grad_norm": 1.3381776809692383, "learning_rate": 2e-05, "loss": 0.04779219, "step": 14579 }, { "epoch": 29.16, "grad_norm": 1.4563618898391724, "learning_rate": 2e-05, "loss": 0.06446205, "step": 14580 }, { "epoch": 29.162, "grad_norm": 1.3524459600448608, "learning_rate": 2e-05, "loss": 0.05245928, "step": 14581 }, { "epoch": 29.164, "grad_norm": 1.1118983030319214, "learning_rate": 2e-05, "loss": 0.04176072, "step": 14582 }, { "epoch": 29.166, "grad_norm": 0.9758725166320801, "learning_rate": 2e-05, "loss": 0.03566039, "step": 14583 }, { "epoch": 29.168, "grad_norm": 1.7110552787780762, "learning_rate": 2e-05, "loss": 0.07291068, "step": 14584 }, { "epoch": 29.17, "grad_norm": 1.6812546253204346, "learning_rate": 2e-05, "loss": 0.06456259, "step": 14585 }, { "epoch": 29.172, "grad_norm": 0.9834657311439514, "learning_rate": 2e-05, "loss": 0.04451413, "step": 14586 }, { "epoch": 29.174, "grad_norm": 1.0182805061340332, "learning_rate": 2e-05, "loss": 0.04587712, "step": 14587 }, { "epoch": 29.176, "grad_norm": 1.8229188919067383, "learning_rate": 2e-05, "loss": 0.07322234, "step": 14588 }, { "epoch": 29.178, "grad_norm": 1.4152324199676514, "learning_rate": 2e-05, "loss": 0.08451134, "step": 14589 }, { "epoch": 29.18, "grad_norm": 1.163651466369629, "learning_rate": 2e-05, "loss": 0.04929867, "step": 14590 }, { "epoch": 29.182, "grad_norm": 1.1932106018066406, "learning_rate": 2e-05, "loss": 0.05791388, "step": 14591 }, { "epoch": 29.184, "grad_norm": 0.8568279147148132, "learning_rate": 2e-05, "loss": 0.04664369, "step": 14592 }, { "epoch": 29.186, "grad_norm": 1.1795885562896729, "learning_rate": 2e-05, "loss": 0.04691701, "step": 14593 }, { "epoch": 29.188, "grad_norm": 1.2816110849380493, "learning_rate": 2e-05, "loss": 0.04789906, "step": 14594 }, { "epoch": 29.19, "grad_norm": 1.4575846195220947, "learning_rate": 2e-05, "loss": 0.0626414, "step": 14595 }, { "epoch": 29.192, "grad_norm": 1.0870484113693237, "learning_rate": 2e-05, "loss": 0.04723601, "step": 14596 }, { "epoch": 29.194, "grad_norm": 1.0973563194274902, "learning_rate": 2e-05, "loss": 0.04625481, "step": 14597 }, { "epoch": 29.196, "grad_norm": 1.091604232788086, "learning_rate": 2e-05, "loss": 0.04888172, "step": 14598 }, { "epoch": 29.198, "grad_norm": 1.1205462217330933, "learning_rate": 2e-05, "loss": 0.04172144, "step": 14599 }, { "epoch": 29.2, "grad_norm": 1.2591100931167603, "learning_rate": 2e-05, "loss": 0.0415084, "step": 14600 }, { "epoch": 29.202, "grad_norm": 0.998203456401825, "learning_rate": 2e-05, "loss": 0.04000388, "step": 14601 }, { "epoch": 29.204, "grad_norm": 1.1349284648895264, "learning_rate": 2e-05, "loss": 0.04993595, "step": 14602 }, { "epoch": 29.206, "grad_norm": 0.9733943939208984, "learning_rate": 2e-05, "loss": 0.04053618, "step": 14603 }, { "epoch": 29.208, "grad_norm": 8.016902923583984, "learning_rate": 2e-05, "loss": 0.06539488, "step": 14604 }, { "epoch": 29.21, "grad_norm": 1.027214765548706, "learning_rate": 2e-05, "loss": 0.05071543, "step": 14605 }, { "epoch": 29.212, "grad_norm": 0.9002553820610046, "learning_rate": 2e-05, "loss": 0.03549718, "step": 14606 }, { "epoch": 29.214, "grad_norm": 1.0845834016799927, "learning_rate": 2e-05, "loss": 0.05510483, "step": 14607 }, { "epoch": 29.216, "grad_norm": 1.5042401552200317, "learning_rate": 2e-05, "loss": 0.05468114, "step": 14608 }, { "epoch": 29.218, "grad_norm": 1.4699920415878296, "learning_rate": 2e-05, "loss": 0.0506186, "step": 14609 }, { "epoch": 29.22, "grad_norm": 1.1316293478012085, "learning_rate": 2e-05, "loss": 0.04086535, "step": 14610 }, { "epoch": 29.222, "grad_norm": 1.0498464107513428, "learning_rate": 2e-05, "loss": 0.05270526, "step": 14611 }, { "epoch": 29.224, "grad_norm": 1.7726497650146484, "learning_rate": 2e-05, "loss": 0.04032504, "step": 14612 }, { "epoch": 29.226, "grad_norm": 1.7962431907653809, "learning_rate": 2e-05, "loss": 0.05134235, "step": 14613 }, { "epoch": 29.228, "grad_norm": 1.3098227977752686, "learning_rate": 2e-05, "loss": 0.04228522, "step": 14614 }, { "epoch": 29.23, "grad_norm": 1.3459023237228394, "learning_rate": 2e-05, "loss": 0.05617429, "step": 14615 }, { "epoch": 29.232, "grad_norm": 1.0614209175109863, "learning_rate": 2e-05, "loss": 0.05180053, "step": 14616 }, { "epoch": 29.234, "grad_norm": 1.2984974384307861, "learning_rate": 2e-05, "loss": 0.05279472, "step": 14617 }, { "epoch": 29.236, "grad_norm": 1.8154972791671753, "learning_rate": 2e-05, "loss": 0.07154293, "step": 14618 }, { "epoch": 29.238, "grad_norm": 0.8350774049758911, "learning_rate": 2e-05, "loss": 0.03350465, "step": 14619 }, { "epoch": 29.24, "grad_norm": 0.9848772883415222, "learning_rate": 2e-05, "loss": 0.0417083, "step": 14620 }, { "epoch": 29.242, "grad_norm": 0.9930858016014099, "learning_rate": 2e-05, "loss": 0.04003877, "step": 14621 }, { "epoch": 29.244, "grad_norm": 1.0138460397720337, "learning_rate": 2e-05, "loss": 0.04619605, "step": 14622 }, { "epoch": 29.246, "grad_norm": 1.220471739768982, "learning_rate": 2e-05, "loss": 0.04914767, "step": 14623 }, { "epoch": 29.248, "grad_norm": 0.9649516940116882, "learning_rate": 2e-05, "loss": 0.04303811, "step": 14624 }, { "epoch": 29.25, "grad_norm": 2.1167871952056885, "learning_rate": 2e-05, "loss": 0.0574264, "step": 14625 }, { "epoch": 29.252, "grad_norm": 1.4682259559631348, "learning_rate": 2e-05, "loss": 0.06257634, "step": 14626 }, { "epoch": 29.254, "grad_norm": 1.2159279584884644, "learning_rate": 2e-05, "loss": 0.05224491, "step": 14627 }, { "epoch": 29.256, "grad_norm": 1.7574195861816406, "learning_rate": 2e-05, "loss": 0.05536553, "step": 14628 }, { "epoch": 29.258, "grad_norm": 1.2718312740325928, "learning_rate": 2e-05, "loss": 0.03687281, "step": 14629 }, { "epoch": 29.26, "grad_norm": 1.5182291269302368, "learning_rate": 2e-05, "loss": 0.05737795, "step": 14630 }, { "epoch": 29.262, "grad_norm": 1.1544889211654663, "learning_rate": 2e-05, "loss": 0.04734516, "step": 14631 }, { "epoch": 29.264, "grad_norm": 1.0665253400802612, "learning_rate": 2e-05, "loss": 0.03659611, "step": 14632 }, { "epoch": 29.266, "grad_norm": 1.1214375495910645, "learning_rate": 2e-05, "loss": 0.05597034, "step": 14633 }, { "epoch": 29.268, "grad_norm": 1.5191038846969604, "learning_rate": 2e-05, "loss": 0.06425489, "step": 14634 }, { "epoch": 29.27, "grad_norm": 1.0266889333724976, "learning_rate": 2e-05, "loss": 0.04625285, "step": 14635 }, { "epoch": 29.272, "grad_norm": 1.3538984060287476, "learning_rate": 2e-05, "loss": 0.05958322, "step": 14636 }, { "epoch": 29.274, "grad_norm": 2.1723945140838623, "learning_rate": 2e-05, "loss": 0.05104686, "step": 14637 }, { "epoch": 29.276, "grad_norm": 1.1509290933609009, "learning_rate": 2e-05, "loss": 0.05878568, "step": 14638 }, { "epoch": 29.278, "grad_norm": 1.212289810180664, "learning_rate": 2e-05, "loss": 0.05765344, "step": 14639 }, { "epoch": 29.28, "grad_norm": 2.35158371925354, "learning_rate": 2e-05, "loss": 0.0659305, "step": 14640 }, { "epoch": 29.282, "grad_norm": 1.233964443206787, "learning_rate": 2e-05, "loss": 0.05544458, "step": 14641 }, { "epoch": 29.284, "grad_norm": 1.507964849472046, "learning_rate": 2e-05, "loss": 0.04394262, "step": 14642 }, { "epoch": 29.286, "grad_norm": 1.1330156326293945, "learning_rate": 2e-05, "loss": 0.04477824, "step": 14643 }, { "epoch": 29.288, "grad_norm": 1.0223817825317383, "learning_rate": 2e-05, "loss": 0.05345184, "step": 14644 }, { "epoch": 29.29, "grad_norm": 1.1746834516525269, "learning_rate": 2e-05, "loss": 0.0401567, "step": 14645 }, { "epoch": 29.292, "grad_norm": 1.0182688236236572, "learning_rate": 2e-05, "loss": 0.05066855, "step": 14646 }, { "epoch": 29.294, "grad_norm": 1.886186957359314, "learning_rate": 2e-05, "loss": 0.0531477, "step": 14647 }, { "epoch": 29.296, "grad_norm": 1.3804967403411865, "learning_rate": 2e-05, "loss": 0.05080878, "step": 14648 }, { "epoch": 29.298, "grad_norm": 1.341496467590332, "learning_rate": 2e-05, "loss": 0.06283604, "step": 14649 }, { "epoch": 29.3, "grad_norm": 1.6476836204528809, "learning_rate": 2e-05, "loss": 0.06179643, "step": 14650 }, { "epoch": 29.302, "grad_norm": 1.2837002277374268, "learning_rate": 2e-05, "loss": 0.05211896, "step": 14651 }, { "epoch": 29.304, "grad_norm": 1.4578500986099243, "learning_rate": 2e-05, "loss": 0.05436377, "step": 14652 }, { "epoch": 29.306, "grad_norm": 1.2561371326446533, "learning_rate": 2e-05, "loss": 0.07172128, "step": 14653 }, { "epoch": 29.308, "grad_norm": 1.1390291452407837, "learning_rate": 2e-05, "loss": 0.04171488, "step": 14654 }, { "epoch": 29.31, "grad_norm": 1.2046786546707153, "learning_rate": 2e-05, "loss": 0.05017195, "step": 14655 }, { "epoch": 29.312, "grad_norm": 1.0084682703018188, "learning_rate": 2e-05, "loss": 0.0405324, "step": 14656 }, { "epoch": 29.314, "grad_norm": 1.2062684297561646, "learning_rate": 2e-05, "loss": 0.04681739, "step": 14657 }, { "epoch": 29.316, "grad_norm": 1.0599915981292725, "learning_rate": 2e-05, "loss": 0.03856201, "step": 14658 }, { "epoch": 29.318, "grad_norm": 2.047563076019287, "learning_rate": 2e-05, "loss": 0.0419065, "step": 14659 }, { "epoch": 29.32, "grad_norm": 1.115689992904663, "learning_rate": 2e-05, "loss": 0.04153556, "step": 14660 }, { "epoch": 29.322, "grad_norm": 1.1592249870300293, "learning_rate": 2e-05, "loss": 0.05683518, "step": 14661 }, { "epoch": 29.324, "grad_norm": 1.538271188735962, "learning_rate": 2e-05, "loss": 0.06834175, "step": 14662 }, { "epoch": 29.326, "grad_norm": 2.234126567840576, "learning_rate": 2e-05, "loss": 0.05951777, "step": 14663 }, { "epoch": 29.328, "grad_norm": 1.0337904691696167, "learning_rate": 2e-05, "loss": 0.03293772, "step": 14664 }, { "epoch": 29.33, "grad_norm": 1.669822096824646, "learning_rate": 2e-05, "loss": 0.04071087, "step": 14665 }, { "epoch": 29.332, "grad_norm": 1.262593388557434, "learning_rate": 2e-05, "loss": 0.05802722, "step": 14666 }, { "epoch": 29.334, "grad_norm": 0.9023078680038452, "learning_rate": 2e-05, "loss": 0.03628269, "step": 14667 }, { "epoch": 29.336, "grad_norm": 1.4954606294631958, "learning_rate": 2e-05, "loss": 0.06284186, "step": 14668 }, { "epoch": 29.338, "grad_norm": 0.9653249979019165, "learning_rate": 2e-05, "loss": 0.04765797, "step": 14669 }, { "epoch": 29.34, "grad_norm": 1.1193827390670776, "learning_rate": 2e-05, "loss": 0.03923205, "step": 14670 }, { "epoch": 29.342, "grad_norm": 1.1053235530853271, "learning_rate": 2e-05, "loss": 0.04074538, "step": 14671 }, { "epoch": 29.344, "grad_norm": 1.1633018255233765, "learning_rate": 2e-05, "loss": 0.05128383, "step": 14672 }, { "epoch": 29.346, "grad_norm": 1.112653374671936, "learning_rate": 2e-05, "loss": 0.05133664, "step": 14673 }, { "epoch": 29.348, "grad_norm": 1.0234519243240356, "learning_rate": 2e-05, "loss": 0.05231892, "step": 14674 }, { "epoch": 29.35, "grad_norm": 0.9808361530303955, "learning_rate": 2e-05, "loss": 0.04154306, "step": 14675 }, { "epoch": 29.352, "grad_norm": 1.1122957468032837, "learning_rate": 2e-05, "loss": 0.03155535, "step": 14676 }, { "epoch": 29.354, "grad_norm": 1.0903563499450684, "learning_rate": 2e-05, "loss": 0.05068847, "step": 14677 }, { "epoch": 29.356, "grad_norm": 2.858180284500122, "learning_rate": 2e-05, "loss": 0.06310552, "step": 14678 }, { "epoch": 29.358, "grad_norm": 2.068528890609741, "learning_rate": 2e-05, "loss": 0.07013312, "step": 14679 }, { "epoch": 29.36, "grad_norm": 1.0990180969238281, "learning_rate": 2e-05, "loss": 0.0548052, "step": 14680 }, { "epoch": 29.362, "grad_norm": 1.1553679704666138, "learning_rate": 2e-05, "loss": 0.05085988, "step": 14681 }, { "epoch": 29.364, "grad_norm": 1.3248648643493652, "learning_rate": 2e-05, "loss": 0.06186166, "step": 14682 }, { "epoch": 29.366, "grad_norm": 1.2674643993377686, "learning_rate": 2e-05, "loss": 0.05766401, "step": 14683 }, { "epoch": 29.368, "grad_norm": 1.351503610610962, "learning_rate": 2e-05, "loss": 0.05115391, "step": 14684 }, { "epoch": 29.37, "grad_norm": 1.177047610282898, "learning_rate": 2e-05, "loss": 0.04473732, "step": 14685 }, { "epoch": 29.372, "grad_norm": 1.3594081401824951, "learning_rate": 2e-05, "loss": 0.05119801, "step": 14686 }, { "epoch": 29.374, "grad_norm": 1.470008134841919, "learning_rate": 2e-05, "loss": 0.04877947, "step": 14687 }, { "epoch": 29.376, "grad_norm": 1.3576009273529053, "learning_rate": 2e-05, "loss": 0.05592244, "step": 14688 }, { "epoch": 29.378, "grad_norm": 0.9810523390769958, "learning_rate": 2e-05, "loss": 0.03189497, "step": 14689 }, { "epoch": 29.38, "grad_norm": 1.2334705591201782, "learning_rate": 2e-05, "loss": 0.05534721, "step": 14690 }, { "epoch": 29.382, "grad_norm": 1.1734570264816284, "learning_rate": 2e-05, "loss": 0.05387602, "step": 14691 }, { "epoch": 29.384, "grad_norm": 1.1036509275436401, "learning_rate": 2e-05, "loss": 0.04090476, "step": 14692 }, { "epoch": 29.386, "grad_norm": 1.2603700160980225, "learning_rate": 2e-05, "loss": 0.04242028, "step": 14693 }, { "epoch": 29.388, "grad_norm": 0.9914978742599487, "learning_rate": 2e-05, "loss": 0.03154882, "step": 14694 }, { "epoch": 29.39, "grad_norm": 1.9569027423858643, "learning_rate": 2e-05, "loss": 0.05464642, "step": 14695 }, { "epoch": 29.392, "grad_norm": 1.12490975856781, "learning_rate": 2e-05, "loss": 0.03909998, "step": 14696 }, { "epoch": 29.394, "grad_norm": 1.1228140592575073, "learning_rate": 2e-05, "loss": 0.05075973, "step": 14697 }, { "epoch": 29.396, "grad_norm": 1.1393128633499146, "learning_rate": 2e-05, "loss": 0.05256918, "step": 14698 }, { "epoch": 29.398, "grad_norm": 1.0477137565612793, "learning_rate": 2e-05, "loss": 0.03958245, "step": 14699 }, { "epoch": 29.4, "grad_norm": 0.745050847530365, "learning_rate": 2e-05, "loss": 0.0253874, "step": 14700 }, { "epoch": 29.402, "grad_norm": 1.7465698719024658, "learning_rate": 2e-05, "loss": 0.04919294, "step": 14701 }, { "epoch": 29.404, "grad_norm": 1.6304645538330078, "learning_rate": 2e-05, "loss": 0.06263178, "step": 14702 }, { "epoch": 29.406, "grad_norm": 0.8022410869598389, "learning_rate": 2e-05, "loss": 0.02770796, "step": 14703 }, { "epoch": 29.408, "grad_norm": 1.234095811843872, "learning_rate": 2e-05, "loss": 0.06191814, "step": 14704 }, { "epoch": 29.41, "grad_norm": 1.2869131565093994, "learning_rate": 2e-05, "loss": 0.05533356, "step": 14705 }, { "epoch": 29.412, "grad_norm": 1.1138930320739746, "learning_rate": 2e-05, "loss": 0.0466451, "step": 14706 }, { "epoch": 29.414, "grad_norm": 0.997545599937439, "learning_rate": 2e-05, "loss": 0.03257848, "step": 14707 }, { "epoch": 29.416, "grad_norm": 1.0545340776443481, "learning_rate": 2e-05, "loss": 0.04645609, "step": 14708 }, { "epoch": 29.418, "grad_norm": 1.6135858297348022, "learning_rate": 2e-05, "loss": 0.06839493, "step": 14709 }, { "epoch": 29.42, "grad_norm": 1.3204196691513062, "learning_rate": 2e-05, "loss": 0.03796653, "step": 14710 }, { "epoch": 29.422, "grad_norm": 1.045873761177063, "learning_rate": 2e-05, "loss": 0.04442728, "step": 14711 }, { "epoch": 29.424, "grad_norm": 1.4340336322784424, "learning_rate": 2e-05, "loss": 0.04773493, "step": 14712 }, { "epoch": 29.426, "grad_norm": 7.48968505859375, "learning_rate": 2e-05, "loss": 0.07283084, "step": 14713 }, { "epoch": 29.428, "grad_norm": 0.9398936033248901, "learning_rate": 2e-05, "loss": 0.0321989, "step": 14714 }, { "epoch": 29.43, "grad_norm": 0.9431772232055664, "learning_rate": 2e-05, "loss": 0.03805528, "step": 14715 }, { "epoch": 29.432, "grad_norm": 1.104968786239624, "learning_rate": 2e-05, "loss": 0.04553916, "step": 14716 }, { "epoch": 29.434, "grad_norm": 1.162147045135498, "learning_rate": 2e-05, "loss": 0.04931792, "step": 14717 }, { "epoch": 29.436, "grad_norm": 1.1698349714279175, "learning_rate": 2e-05, "loss": 0.04472835, "step": 14718 }, { "epoch": 29.438, "grad_norm": 1.2540584802627563, "learning_rate": 2e-05, "loss": 0.05026079, "step": 14719 }, { "epoch": 29.44, "grad_norm": 1.3444980382919312, "learning_rate": 2e-05, "loss": 0.0614675, "step": 14720 }, { "epoch": 29.442, "grad_norm": 1.004560112953186, "learning_rate": 2e-05, "loss": 0.0429499, "step": 14721 }, { "epoch": 29.444, "grad_norm": 1.2670981884002686, "learning_rate": 2e-05, "loss": 0.04842153, "step": 14722 }, { "epoch": 29.446, "grad_norm": 0.970145046710968, "learning_rate": 2e-05, "loss": 0.04278766, "step": 14723 }, { "epoch": 29.448, "grad_norm": 1.356581211090088, "learning_rate": 2e-05, "loss": 0.04835079, "step": 14724 }, { "epoch": 29.45, "grad_norm": 4.347435474395752, "learning_rate": 2e-05, "loss": 0.06692852, "step": 14725 }, { "epoch": 29.452, "grad_norm": 1.187310814857483, "learning_rate": 2e-05, "loss": 0.04165148, "step": 14726 }, { "epoch": 29.454, "grad_norm": 1.1439809799194336, "learning_rate": 2e-05, "loss": 0.03919187, "step": 14727 }, { "epoch": 29.456, "grad_norm": 1.323128342628479, "learning_rate": 2e-05, "loss": 0.05739671, "step": 14728 }, { "epoch": 29.458, "grad_norm": 3.8563902378082275, "learning_rate": 2e-05, "loss": 0.06268008, "step": 14729 }, { "epoch": 29.46, "grad_norm": 2.2178585529327393, "learning_rate": 2e-05, "loss": 0.07160832, "step": 14730 }, { "epoch": 29.462, "grad_norm": 1.3105313777923584, "learning_rate": 2e-05, "loss": 0.05782294, "step": 14731 }, { "epoch": 29.464, "grad_norm": 1.876355767250061, "learning_rate": 2e-05, "loss": 0.04291119, "step": 14732 }, { "epoch": 29.466, "grad_norm": 1.0919402837753296, "learning_rate": 2e-05, "loss": 0.04580719, "step": 14733 }, { "epoch": 29.468, "grad_norm": 1.0317652225494385, "learning_rate": 2e-05, "loss": 0.04855602, "step": 14734 }, { "epoch": 29.47, "grad_norm": 1.6238116025924683, "learning_rate": 2e-05, "loss": 0.06447852, "step": 14735 }, { "epoch": 29.472, "grad_norm": 1.7688415050506592, "learning_rate": 2e-05, "loss": 0.05636932, "step": 14736 }, { "epoch": 29.474, "grad_norm": 1.2287774085998535, "learning_rate": 2e-05, "loss": 0.04248101, "step": 14737 }, { "epoch": 29.476, "grad_norm": 1.4026364088058472, "learning_rate": 2e-05, "loss": 0.06828094, "step": 14738 }, { "epoch": 29.478, "grad_norm": 1.189780354499817, "learning_rate": 2e-05, "loss": 0.04756305, "step": 14739 }, { "epoch": 29.48, "grad_norm": 1.253515601158142, "learning_rate": 2e-05, "loss": 0.04946483, "step": 14740 }, { "epoch": 29.482, "grad_norm": 1.6326699256896973, "learning_rate": 2e-05, "loss": 0.0575811, "step": 14741 }, { "epoch": 29.484, "grad_norm": 1.0680763721466064, "learning_rate": 2e-05, "loss": 0.04759519, "step": 14742 }, { "epoch": 29.486, "grad_norm": 1.4175890684127808, "learning_rate": 2e-05, "loss": 0.05485499, "step": 14743 }, { "epoch": 29.488, "grad_norm": 1.4893817901611328, "learning_rate": 2e-05, "loss": 0.05062412, "step": 14744 }, { "epoch": 29.49, "grad_norm": 1.16343092918396, "learning_rate": 2e-05, "loss": 0.05281184, "step": 14745 }, { "epoch": 29.492, "grad_norm": 1.57941734790802, "learning_rate": 2e-05, "loss": 0.04814292, "step": 14746 }, { "epoch": 29.494, "grad_norm": 1.2099472284317017, "learning_rate": 2e-05, "loss": 0.04860268, "step": 14747 }, { "epoch": 29.496, "grad_norm": 1.0145338773727417, "learning_rate": 2e-05, "loss": 0.03661798, "step": 14748 }, { "epoch": 29.498, "grad_norm": 2.7268731594085693, "learning_rate": 2e-05, "loss": 0.0633743, "step": 14749 }, { "epoch": 29.5, "grad_norm": 3.6396660804748535, "learning_rate": 2e-05, "loss": 0.06496432, "step": 14750 }, { "epoch": 29.502, "grad_norm": 1.26171875, "learning_rate": 2e-05, "loss": 0.05443643, "step": 14751 }, { "epoch": 29.504, "grad_norm": 4.943302154541016, "learning_rate": 2e-05, "loss": 0.06193203, "step": 14752 }, { "epoch": 29.506, "grad_norm": 1.6623215675354004, "learning_rate": 2e-05, "loss": 0.06158051, "step": 14753 }, { "epoch": 29.508, "grad_norm": 1.1255650520324707, "learning_rate": 2e-05, "loss": 0.04763262, "step": 14754 }, { "epoch": 29.51, "grad_norm": 1.3383978605270386, "learning_rate": 2e-05, "loss": 0.06160851, "step": 14755 }, { "epoch": 29.512, "grad_norm": 1.3095492124557495, "learning_rate": 2e-05, "loss": 0.05859499, "step": 14756 }, { "epoch": 29.514, "grad_norm": 1.1468077898025513, "learning_rate": 2e-05, "loss": 0.05325584, "step": 14757 }, { "epoch": 29.516, "grad_norm": 2.0913736820220947, "learning_rate": 2e-05, "loss": 0.07040726, "step": 14758 }, { "epoch": 29.518, "grad_norm": 1.1455532312393188, "learning_rate": 2e-05, "loss": 0.0425918, "step": 14759 }, { "epoch": 29.52, "grad_norm": 4.703222751617432, "learning_rate": 2e-05, "loss": 0.08113731, "step": 14760 }, { "epoch": 29.522, "grad_norm": 1.113279104232788, "learning_rate": 2e-05, "loss": 0.04876316, "step": 14761 }, { "epoch": 29.524, "grad_norm": 1.191052794456482, "learning_rate": 2e-05, "loss": 0.0491624, "step": 14762 }, { "epoch": 29.526, "grad_norm": 1.096157193183899, "learning_rate": 2e-05, "loss": 0.04373828, "step": 14763 }, { "epoch": 29.528, "grad_norm": 1.0604071617126465, "learning_rate": 2e-05, "loss": 0.04561218, "step": 14764 }, { "epoch": 29.53, "grad_norm": 1.2262035608291626, "learning_rate": 2e-05, "loss": 0.05315754, "step": 14765 }, { "epoch": 29.532, "grad_norm": 1.401898741722107, "learning_rate": 2e-05, "loss": 0.05337591, "step": 14766 }, { "epoch": 29.534, "grad_norm": 0.9796097874641418, "learning_rate": 2e-05, "loss": 0.04725763, "step": 14767 }, { "epoch": 29.536, "grad_norm": 1.9813481569290161, "learning_rate": 2e-05, "loss": 0.04495794, "step": 14768 }, { "epoch": 29.538, "grad_norm": 4.7819318771362305, "learning_rate": 2e-05, "loss": 0.06000122, "step": 14769 }, { "epoch": 29.54, "grad_norm": 1.1621376276016235, "learning_rate": 2e-05, "loss": 0.04303102, "step": 14770 }, { "epoch": 29.542, "grad_norm": 1.1797144412994385, "learning_rate": 2e-05, "loss": 0.06192151, "step": 14771 }, { "epoch": 29.544, "grad_norm": 1.1721657514572144, "learning_rate": 2e-05, "loss": 0.04527795, "step": 14772 }, { "epoch": 29.546, "grad_norm": 1.203142762184143, "learning_rate": 2e-05, "loss": 0.03561005, "step": 14773 }, { "epoch": 29.548000000000002, "grad_norm": 1.5321545600891113, "learning_rate": 2e-05, "loss": 0.07178615, "step": 14774 }, { "epoch": 29.55, "grad_norm": 1.1873880624771118, "learning_rate": 2e-05, "loss": 0.04872207, "step": 14775 }, { "epoch": 29.552, "grad_norm": 1.130964994430542, "learning_rate": 2e-05, "loss": 0.04060852, "step": 14776 }, { "epoch": 29.554, "grad_norm": 0.8286651968955994, "learning_rate": 2e-05, "loss": 0.03562941, "step": 14777 }, { "epoch": 29.556, "grad_norm": 1.058500051498413, "learning_rate": 2e-05, "loss": 0.04390832, "step": 14778 }, { "epoch": 29.558, "grad_norm": 1.5254136323928833, "learning_rate": 2e-05, "loss": 0.04121511, "step": 14779 }, { "epoch": 29.56, "grad_norm": 1.357078194618225, "learning_rate": 2e-05, "loss": 0.05858416, "step": 14780 }, { "epoch": 29.562, "grad_norm": 1.043289065361023, "learning_rate": 2e-05, "loss": 0.05653848, "step": 14781 }, { "epoch": 29.564, "grad_norm": 1.0629271268844604, "learning_rate": 2e-05, "loss": 0.0446586, "step": 14782 }, { "epoch": 29.566, "grad_norm": 6.8624267578125, "learning_rate": 2e-05, "loss": 0.06401547, "step": 14783 }, { "epoch": 29.568, "grad_norm": 1.1978102922439575, "learning_rate": 2e-05, "loss": 0.06649347, "step": 14784 }, { "epoch": 29.57, "grad_norm": 1.7234982252120972, "learning_rate": 2e-05, "loss": 0.05263923, "step": 14785 }, { "epoch": 29.572, "grad_norm": 1.04872727394104, "learning_rate": 2e-05, "loss": 0.04219922, "step": 14786 }, { "epoch": 29.574, "grad_norm": 1.1600784063339233, "learning_rate": 2e-05, "loss": 0.05723117, "step": 14787 }, { "epoch": 29.576, "grad_norm": 2.0114188194274902, "learning_rate": 2e-05, "loss": 0.05000266, "step": 14788 }, { "epoch": 29.578, "grad_norm": 1.0577340126037598, "learning_rate": 2e-05, "loss": 0.04500862, "step": 14789 }, { "epoch": 29.58, "grad_norm": 1.812585711479187, "learning_rate": 2e-05, "loss": 0.05072708, "step": 14790 }, { "epoch": 29.582, "grad_norm": 1.4282678365707397, "learning_rate": 2e-05, "loss": 0.0474305, "step": 14791 }, { "epoch": 29.584, "grad_norm": 1.0584975481033325, "learning_rate": 2e-05, "loss": 0.0468592, "step": 14792 }, { "epoch": 29.586, "grad_norm": 1.0384596586227417, "learning_rate": 2e-05, "loss": 0.04158233, "step": 14793 }, { "epoch": 29.588, "grad_norm": 1.3110705614089966, "learning_rate": 2e-05, "loss": 0.06195113, "step": 14794 }, { "epoch": 29.59, "grad_norm": 1.9236606359481812, "learning_rate": 2e-05, "loss": 0.03995068, "step": 14795 }, { "epoch": 29.592, "grad_norm": 1.3040701150894165, "learning_rate": 2e-05, "loss": 0.06249913, "step": 14796 }, { "epoch": 29.594, "grad_norm": 1.0245954990386963, "learning_rate": 2e-05, "loss": 0.03910267, "step": 14797 }, { "epoch": 29.596, "grad_norm": 1.5275949239730835, "learning_rate": 2e-05, "loss": 0.07406852, "step": 14798 }, { "epoch": 29.598, "grad_norm": 1.137412190437317, "learning_rate": 2e-05, "loss": 0.0501687, "step": 14799 }, { "epoch": 29.6, "grad_norm": 1.2978339195251465, "learning_rate": 2e-05, "loss": 0.05286542, "step": 14800 }, { "epoch": 29.602, "grad_norm": 1.3246186971664429, "learning_rate": 2e-05, "loss": 0.05345874, "step": 14801 }, { "epoch": 29.604, "grad_norm": 1.1409353017807007, "learning_rate": 2e-05, "loss": 0.04747654, "step": 14802 }, { "epoch": 29.606, "grad_norm": 1.9008004665374756, "learning_rate": 2e-05, "loss": 0.05351958, "step": 14803 }, { "epoch": 29.608, "grad_norm": 1.1407300233840942, "learning_rate": 2e-05, "loss": 0.04470138, "step": 14804 }, { "epoch": 29.61, "grad_norm": 0.9720564484596252, "learning_rate": 2e-05, "loss": 0.03879238, "step": 14805 }, { "epoch": 29.612, "grad_norm": 1.5560641288757324, "learning_rate": 2e-05, "loss": 0.0610476, "step": 14806 }, { "epoch": 29.614, "grad_norm": 1.1591254472732544, "learning_rate": 2e-05, "loss": 0.05107819, "step": 14807 }, { "epoch": 29.616, "grad_norm": 1.3751274347305298, "learning_rate": 2e-05, "loss": 0.05541167, "step": 14808 }, { "epoch": 29.618, "grad_norm": 1.117872953414917, "learning_rate": 2e-05, "loss": 0.04720469, "step": 14809 }, { "epoch": 29.62, "grad_norm": 1.0727217197418213, "learning_rate": 2e-05, "loss": 0.04105214, "step": 14810 }, { "epoch": 29.622, "grad_norm": 1.2031030654907227, "learning_rate": 2e-05, "loss": 0.04398348, "step": 14811 }, { "epoch": 29.624, "grad_norm": 1.1488031148910522, "learning_rate": 2e-05, "loss": 0.05041964, "step": 14812 }, { "epoch": 29.626, "grad_norm": 1.4068735837936401, "learning_rate": 2e-05, "loss": 0.06478822, "step": 14813 }, { "epoch": 29.628, "grad_norm": 0.9568789601325989, "learning_rate": 2e-05, "loss": 0.04893179, "step": 14814 }, { "epoch": 29.63, "grad_norm": 1.148837685585022, "learning_rate": 2e-05, "loss": 0.0451222, "step": 14815 }, { "epoch": 29.632, "grad_norm": 1.1132186651229858, "learning_rate": 2e-05, "loss": 0.05249838, "step": 14816 }, { "epoch": 29.634, "grad_norm": 1.2183659076690674, "learning_rate": 2e-05, "loss": 0.0456233, "step": 14817 }, { "epoch": 29.636, "grad_norm": 0.9975796341896057, "learning_rate": 2e-05, "loss": 0.04649666, "step": 14818 }, { "epoch": 29.638, "grad_norm": 1.1829040050506592, "learning_rate": 2e-05, "loss": 0.05162161, "step": 14819 }, { "epoch": 29.64, "grad_norm": 1.1890875101089478, "learning_rate": 2e-05, "loss": 0.0378981, "step": 14820 }, { "epoch": 29.642, "grad_norm": 1.4028831720352173, "learning_rate": 2e-05, "loss": 0.04929274, "step": 14821 }, { "epoch": 29.644, "grad_norm": 1.1585980653762817, "learning_rate": 2e-05, "loss": 0.04186504, "step": 14822 }, { "epoch": 29.646, "grad_norm": 1.1054970026016235, "learning_rate": 2e-05, "loss": 0.04781925, "step": 14823 }, { "epoch": 29.648, "grad_norm": 1.1879850625991821, "learning_rate": 2e-05, "loss": 0.05460038, "step": 14824 }, { "epoch": 29.65, "grad_norm": 0.9752247333526611, "learning_rate": 2e-05, "loss": 0.03517404, "step": 14825 }, { "epoch": 29.652, "grad_norm": 1.1695125102996826, "learning_rate": 2e-05, "loss": 0.04307378, "step": 14826 }, { "epoch": 29.654, "grad_norm": 1.2330334186553955, "learning_rate": 2e-05, "loss": 0.06242103, "step": 14827 }, { "epoch": 29.656, "grad_norm": 1.0733681917190552, "learning_rate": 2e-05, "loss": 0.06260588, "step": 14828 }, { "epoch": 29.658, "grad_norm": 1.0659830570220947, "learning_rate": 2e-05, "loss": 0.04535402, "step": 14829 }, { "epoch": 29.66, "grad_norm": 1.204399824142456, "learning_rate": 2e-05, "loss": 0.04552479, "step": 14830 }, { "epoch": 29.662, "grad_norm": 0.9124770760536194, "learning_rate": 2e-05, "loss": 0.03290033, "step": 14831 }, { "epoch": 29.664, "grad_norm": 1.3945351839065552, "learning_rate": 2e-05, "loss": 0.05478886, "step": 14832 }, { "epoch": 29.666, "grad_norm": 1.0971988439559937, "learning_rate": 2e-05, "loss": 0.04489202, "step": 14833 }, { "epoch": 29.668, "grad_norm": 1.150039792060852, "learning_rate": 2e-05, "loss": 0.05290781, "step": 14834 }, { "epoch": 29.67, "grad_norm": 1.0547707080841064, "learning_rate": 2e-05, "loss": 0.05794405, "step": 14835 }, { "epoch": 29.672, "grad_norm": 1.0473607778549194, "learning_rate": 2e-05, "loss": 0.0333562, "step": 14836 }, { "epoch": 29.674, "grad_norm": 1.2762689590454102, "learning_rate": 2e-05, "loss": 0.04594278, "step": 14837 }, { "epoch": 29.676, "grad_norm": 1.1537432670593262, "learning_rate": 2e-05, "loss": 0.0366273, "step": 14838 }, { "epoch": 29.678, "grad_norm": 0.9332769513130188, "learning_rate": 2e-05, "loss": 0.03573775, "step": 14839 }, { "epoch": 29.68, "grad_norm": 1.0766422748565674, "learning_rate": 2e-05, "loss": 0.05421454, "step": 14840 }, { "epoch": 29.682, "grad_norm": 1.0760794878005981, "learning_rate": 2e-05, "loss": 0.05117251, "step": 14841 }, { "epoch": 29.684, "grad_norm": 1.238853096961975, "learning_rate": 2e-05, "loss": 0.05250534, "step": 14842 }, { "epoch": 29.686, "grad_norm": 1.1302331686019897, "learning_rate": 2e-05, "loss": 0.05476222, "step": 14843 }, { "epoch": 29.688, "grad_norm": 0.921413779258728, "learning_rate": 2e-05, "loss": 0.03363632, "step": 14844 }, { "epoch": 29.69, "grad_norm": 1.2104946374893188, "learning_rate": 2e-05, "loss": 0.05507277, "step": 14845 }, { "epoch": 29.692, "grad_norm": 1.4000589847564697, "learning_rate": 2e-05, "loss": 0.05411785, "step": 14846 }, { "epoch": 29.694, "grad_norm": 1.2247709035873413, "learning_rate": 2e-05, "loss": 0.05535247, "step": 14847 }, { "epoch": 29.696, "grad_norm": 1.0729318857192993, "learning_rate": 2e-05, "loss": 0.04050991, "step": 14848 }, { "epoch": 29.698, "grad_norm": 1.0868898630142212, "learning_rate": 2e-05, "loss": 0.04438304, "step": 14849 }, { "epoch": 29.7, "grad_norm": 1.248227596282959, "learning_rate": 2e-05, "loss": 0.04770774, "step": 14850 }, { "epoch": 29.701999999999998, "grad_norm": 1.1971789598464966, "learning_rate": 2e-05, "loss": 0.0600816, "step": 14851 }, { "epoch": 29.704, "grad_norm": 0.9641276597976685, "learning_rate": 2e-05, "loss": 0.03218057, "step": 14852 }, { "epoch": 29.706, "grad_norm": 1.0007423162460327, "learning_rate": 2e-05, "loss": 0.04572677, "step": 14853 }, { "epoch": 29.708, "grad_norm": 1.3295812606811523, "learning_rate": 2e-05, "loss": 0.04448068, "step": 14854 }, { "epoch": 29.71, "grad_norm": 1.0147836208343506, "learning_rate": 2e-05, "loss": 0.04693812, "step": 14855 }, { "epoch": 29.712, "grad_norm": 1.2700620889663696, "learning_rate": 2e-05, "loss": 0.04547036, "step": 14856 }, { "epoch": 29.714, "grad_norm": 1.7169036865234375, "learning_rate": 2e-05, "loss": 0.04902712, "step": 14857 }, { "epoch": 29.716, "grad_norm": 1.2115470170974731, "learning_rate": 2e-05, "loss": 0.05006787, "step": 14858 }, { "epoch": 29.718, "grad_norm": 1.225529432296753, "learning_rate": 2e-05, "loss": 0.05947848, "step": 14859 }, { "epoch": 29.72, "grad_norm": 1.2329349517822266, "learning_rate": 2e-05, "loss": 0.04362201, "step": 14860 }, { "epoch": 29.722, "grad_norm": 1.0946847200393677, "learning_rate": 2e-05, "loss": 0.04252224, "step": 14861 }, { "epoch": 29.724, "grad_norm": 3.731079578399658, "learning_rate": 2e-05, "loss": 0.07354262, "step": 14862 }, { "epoch": 29.726, "grad_norm": 1.1276593208312988, "learning_rate": 2e-05, "loss": 0.04381121, "step": 14863 }, { "epoch": 29.728, "grad_norm": 1.4757577180862427, "learning_rate": 2e-05, "loss": 0.0452536, "step": 14864 }, { "epoch": 29.73, "grad_norm": 1.2154606580734253, "learning_rate": 2e-05, "loss": 0.05195742, "step": 14865 }, { "epoch": 29.732, "grad_norm": 1.4101283550262451, "learning_rate": 2e-05, "loss": 0.06208076, "step": 14866 }, { "epoch": 29.734, "grad_norm": 1.1251825094223022, "learning_rate": 2e-05, "loss": 0.04266191, "step": 14867 }, { "epoch": 29.736, "grad_norm": 1.1992874145507812, "learning_rate": 2e-05, "loss": 0.03651543, "step": 14868 }, { "epoch": 29.738, "grad_norm": 1.082031011581421, "learning_rate": 2e-05, "loss": 0.04923041, "step": 14869 }, { "epoch": 29.74, "grad_norm": 1.0069330930709839, "learning_rate": 2e-05, "loss": 0.03937025, "step": 14870 }, { "epoch": 29.742, "grad_norm": 1.1578210592269897, "learning_rate": 2e-05, "loss": 0.04370643, "step": 14871 }, { "epoch": 29.744, "grad_norm": 1.2974954843521118, "learning_rate": 2e-05, "loss": 0.05195537, "step": 14872 }, { "epoch": 29.746, "grad_norm": 1.2555142641067505, "learning_rate": 2e-05, "loss": 0.05459837, "step": 14873 }, { "epoch": 29.748, "grad_norm": 1.4200224876403809, "learning_rate": 2e-05, "loss": 0.04176768, "step": 14874 }, { "epoch": 29.75, "grad_norm": 1.2939263582229614, "learning_rate": 2e-05, "loss": 0.039629, "step": 14875 }, { "epoch": 29.752, "grad_norm": 1.302347183227539, "learning_rate": 2e-05, "loss": 0.05838158, "step": 14876 }, { "epoch": 29.754, "grad_norm": 1.2799094915390015, "learning_rate": 2e-05, "loss": 0.0594008, "step": 14877 }, { "epoch": 29.756, "grad_norm": 1.4350764751434326, "learning_rate": 2e-05, "loss": 0.06127277, "step": 14878 }, { "epoch": 29.758, "grad_norm": 1.3211568593978882, "learning_rate": 2e-05, "loss": 0.04212873, "step": 14879 }, { "epoch": 29.76, "grad_norm": 1.213484764099121, "learning_rate": 2e-05, "loss": 0.0448489, "step": 14880 }, { "epoch": 29.762, "grad_norm": 1.332980751991272, "learning_rate": 2e-05, "loss": 0.05546209, "step": 14881 }, { "epoch": 29.764, "grad_norm": 1.5014338493347168, "learning_rate": 2e-05, "loss": 0.04124428, "step": 14882 }, { "epoch": 29.766, "grad_norm": 1.371868371963501, "learning_rate": 2e-05, "loss": 0.0514607, "step": 14883 }, { "epoch": 29.768, "grad_norm": 1.320263385772705, "learning_rate": 2e-05, "loss": 0.04685482, "step": 14884 }, { "epoch": 29.77, "grad_norm": 1.2865400314331055, "learning_rate": 2e-05, "loss": 0.05437213, "step": 14885 }, { "epoch": 29.772, "grad_norm": 1.1816749572753906, "learning_rate": 2e-05, "loss": 0.04191442, "step": 14886 }, { "epoch": 29.774, "grad_norm": 1.0500683784484863, "learning_rate": 2e-05, "loss": 0.03623563, "step": 14887 }, { "epoch": 29.776, "grad_norm": 1.2670851945877075, "learning_rate": 2e-05, "loss": 0.04914286, "step": 14888 }, { "epoch": 29.778, "grad_norm": 1.1446475982666016, "learning_rate": 2e-05, "loss": 0.05931064, "step": 14889 }, { "epoch": 29.78, "grad_norm": 1.1619149446487427, "learning_rate": 2e-05, "loss": 0.04825908, "step": 14890 }, { "epoch": 29.782, "grad_norm": 2.349759340286255, "learning_rate": 2e-05, "loss": 0.06487898, "step": 14891 }, { "epoch": 29.784, "grad_norm": 1.06183660030365, "learning_rate": 2e-05, "loss": 0.03985113, "step": 14892 }, { "epoch": 29.786, "grad_norm": 1.1945765018463135, "learning_rate": 2e-05, "loss": 0.04605842, "step": 14893 }, { "epoch": 29.788, "grad_norm": 1.471940040588379, "learning_rate": 2e-05, "loss": 0.05377054, "step": 14894 }, { "epoch": 29.79, "grad_norm": 1.2246601581573486, "learning_rate": 2e-05, "loss": 0.04887883, "step": 14895 }, { "epoch": 29.792, "grad_norm": 1.171105146408081, "learning_rate": 2e-05, "loss": 0.03287166, "step": 14896 }, { "epoch": 29.794, "grad_norm": 1.358728051185608, "learning_rate": 2e-05, "loss": 0.04211596, "step": 14897 }, { "epoch": 29.796, "grad_norm": 1.7300292253494263, "learning_rate": 2e-05, "loss": 0.05907832, "step": 14898 }, { "epoch": 29.798000000000002, "grad_norm": 1.7070904970169067, "learning_rate": 2e-05, "loss": 0.05987692, "step": 14899 }, { "epoch": 29.8, "grad_norm": 1.139028549194336, "learning_rate": 2e-05, "loss": 0.05240878, "step": 14900 }, { "epoch": 29.802, "grad_norm": 1.0453729629516602, "learning_rate": 2e-05, "loss": 0.03811006, "step": 14901 }, { "epoch": 29.804, "grad_norm": 1.1976146697998047, "learning_rate": 2e-05, "loss": 0.05647476, "step": 14902 }, { "epoch": 29.806, "grad_norm": 1.8413280248641968, "learning_rate": 2e-05, "loss": 0.05128473, "step": 14903 }, { "epoch": 29.808, "grad_norm": 1.0741360187530518, "learning_rate": 2e-05, "loss": 0.04304665, "step": 14904 }, { "epoch": 29.81, "grad_norm": 1.1625189781188965, "learning_rate": 2e-05, "loss": 0.0548712, "step": 14905 }, { "epoch": 29.812, "grad_norm": 1.0264350175857544, "learning_rate": 2e-05, "loss": 0.04953387, "step": 14906 }, { "epoch": 29.814, "grad_norm": 3.2086169719696045, "learning_rate": 2e-05, "loss": 0.09361836, "step": 14907 }, { "epoch": 29.816, "grad_norm": 1.6340194940567017, "learning_rate": 2e-05, "loss": 0.05941055, "step": 14908 }, { "epoch": 29.818, "grad_norm": 1.2986159324645996, "learning_rate": 2e-05, "loss": 0.04422218, "step": 14909 }, { "epoch": 29.82, "grad_norm": 0.9447638392448425, "learning_rate": 2e-05, "loss": 0.03515967, "step": 14910 }, { "epoch": 29.822, "grad_norm": 1.5027248859405518, "learning_rate": 2e-05, "loss": 0.04593032, "step": 14911 }, { "epoch": 29.824, "grad_norm": 1.5176584720611572, "learning_rate": 2e-05, "loss": 0.04994397, "step": 14912 }, { "epoch": 29.826, "grad_norm": 0.9996933937072754, "learning_rate": 2e-05, "loss": 0.04050322, "step": 14913 }, { "epoch": 29.828, "grad_norm": 1.0325028896331787, "learning_rate": 2e-05, "loss": 0.04202798, "step": 14914 }, { "epoch": 29.83, "grad_norm": 1.1517587900161743, "learning_rate": 2e-05, "loss": 0.04093495, "step": 14915 }, { "epoch": 29.832, "grad_norm": 1.1439907550811768, "learning_rate": 2e-05, "loss": 0.0430674, "step": 14916 }, { "epoch": 29.834, "grad_norm": 1.1873035430908203, "learning_rate": 2e-05, "loss": 0.05577445, "step": 14917 }, { "epoch": 29.836, "grad_norm": 1.5294311046600342, "learning_rate": 2e-05, "loss": 0.06119245, "step": 14918 }, { "epoch": 29.838, "grad_norm": 1.2165940999984741, "learning_rate": 2e-05, "loss": 0.05391408, "step": 14919 }, { "epoch": 29.84, "grad_norm": 1.5106019973754883, "learning_rate": 2e-05, "loss": 0.06814585, "step": 14920 }, { "epoch": 29.842, "grad_norm": 1.1251808404922485, "learning_rate": 2e-05, "loss": 0.04673365, "step": 14921 }, { "epoch": 29.844, "grad_norm": 1.818031668663025, "learning_rate": 2e-05, "loss": 0.05861222, "step": 14922 }, { "epoch": 29.846, "grad_norm": 1.2617063522338867, "learning_rate": 2e-05, "loss": 0.05890946, "step": 14923 }, { "epoch": 29.848, "grad_norm": 1.022739052772522, "learning_rate": 2e-05, "loss": 0.03975084, "step": 14924 }, { "epoch": 29.85, "grad_norm": 1.0058151483535767, "learning_rate": 2e-05, "loss": 0.04728372, "step": 14925 }, { "epoch": 29.852, "grad_norm": 1.6942484378814697, "learning_rate": 2e-05, "loss": 0.04738712, "step": 14926 }, { "epoch": 29.854, "grad_norm": 1.322330355644226, "learning_rate": 2e-05, "loss": 0.04833616, "step": 14927 }, { "epoch": 29.856, "grad_norm": 1.1332767009735107, "learning_rate": 2e-05, "loss": 0.0543071, "step": 14928 }, { "epoch": 29.858, "grad_norm": 1.4775837659835815, "learning_rate": 2e-05, "loss": 0.05499481, "step": 14929 }, { "epoch": 29.86, "grad_norm": 1.1733126640319824, "learning_rate": 2e-05, "loss": 0.0483956, "step": 14930 }, { "epoch": 29.862, "grad_norm": 1.377938985824585, "learning_rate": 2e-05, "loss": 0.05603983, "step": 14931 }, { "epoch": 29.864, "grad_norm": 1.5178488492965698, "learning_rate": 2e-05, "loss": 0.05634063, "step": 14932 }, { "epoch": 29.866, "grad_norm": 1.3337221145629883, "learning_rate": 2e-05, "loss": 0.05200032, "step": 14933 }, { "epoch": 29.868, "grad_norm": 0.8406437039375305, "learning_rate": 2e-05, "loss": 0.03009769, "step": 14934 }, { "epoch": 29.87, "grad_norm": 0.9604916572570801, "learning_rate": 2e-05, "loss": 0.04166096, "step": 14935 }, { "epoch": 29.872, "grad_norm": 1.1787796020507812, "learning_rate": 2e-05, "loss": 0.05383525, "step": 14936 }, { "epoch": 29.874, "grad_norm": 1.342874526977539, "learning_rate": 2e-05, "loss": 0.04904489, "step": 14937 }, { "epoch": 29.876, "grad_norm": 1.8389099836349487, "learning_rate": 2e-05, "loss": 0.06452634, "step": 14938 }, { "epoch": 29.878, "grad_norm": 1.2440541982650757, "learning_rate": 2e-05, "loss": 0.04723218, "step": 14939 }, { "epoch": 29.88, "grad_norm": 1.8893139362335205, "learning_rate": 2e-05, "loss": 0.04528648, "step": 14940 }, { "epoch": 29.882, "grad_norm": 1.13479483127594, "learning_rate": 2e-05, "loss": 0.05116771, "step": 14941 }, { "epoch": 29.884, "grad_norm": 1.0043359994888306, "learning_rate": 2e-05, "loss": 0.03884661, "step": 14942 }, { "epoch": 29.886, "grad_norm": 1.2674411535263062, "learning_rate": 2e-05, "loss": 0.04154016, "step": 14943 }, { "epoch": 29.888, "grad_norm": 1.0028821229934692, "learning_rate": 2e-05, "loss": 0.04834623, "step": 14944 }, { "epoch": 29.89, "grad_norm": 0.998628556728363, "learning_rate": 2e-05, "loss": 0.04867072, "step": 14945 }, { "epoch": 29.892, "grad_norm": 1.2666600942611694, "learning_rate": 2e-05, "loss": 0.05226351, "step": 14946 }, { "epoch": 29.894, "grad_norm": 3.247112989425659, "learning_rate": 2e-05, "loss": 0.04728385, "step": 14947 }, { "epoch": 29.896, "grad_norm": 1.1648375988006592, "learning_rate": 2e-05, "loss": 0.05346557, "step": 14948 }, { "epoch": 29.898, "grad_norm": 1.1274863481521606, "learning_rate": 2e-05, "loss": 0.05811203, "step": 14949 }, { "epoch": 29.9, "grad_norm": 4.038968563079834, "learning_rate": 2e-05, "loss": 0.06926522, "step": 14950 }, { "epoch": 29.902, "grad_norm": 1.0713688135147095, "learning_rate": 2e-05, "loss": 0.06317385, "step": 14951 }, { "epoch": 29.904, "grad_norm": 1.3020460605621338, "learning_rate": 2e-05, "loss": 0.04809433, "step": 14952 }, { "epoch": 29.906, "grad_norm": 2.099637508392334, "learning_rate": 2e-05, "loss": 0.06051829, "step": 14953 }, { "epoch": 29.908, "grad_norm": 1.169245958328247, "learning_rate": 2e-05, "loss": 0.05262773, "step": 14954 }, { "epoch": 29.91, "grad_norm": 1.1757488250732422, "learning_rate": 2e-05, "loss": 0.06025054, "step": 14955 }, { "epoch": 29.912, "grad_norm": 1.3784074783325195, "learning_rate": 2e-05, "loss": 0.05603523, "step": 14956 }, { "epoch": 29.914, "grad_norm": 1.1648166179656982, "learning_rate": 2e-05, "loss": 0.05005089, "step": 14957 }, { "epoch": 29.916, "grad_norm": 1.864701747894287, "learning_rate": 2e-05, "loss": 0.05350699, "step": 14958 }, { "epoch": 29.918, "grad_norm": 1.7502491474151611, "learning_rate": 2e-05, "loss": 0.06494678, "step": 14959 }, { "epoch": 29.92, "grad_norm": 1.1196271181106567, "learning_rate": 2e-05, "loss": 0.04757488, "step": 14960 }, { "epoch": 29.922, "grad_norm": 0.9794387817382812, "learning_rate": 2e-05, "loss": 0.04284685, "step": 14961 }, { "epoch": 29.924, "grad_norm": 1.136120319366455, "learning_rate": 2e-05, "loss": 0.05015803, "step": 14962 }, { "epoch": 29.926, "grad_norm": 1.297243595123291, "learning_rate": 2e-05, "loss": 0.05139553, "step": 14963 }, { "epoch": 29.928, "grad_norm": 1.02262282371521, "learning_rate": 2e-05, "loss": 0.04685052, "step": 14964 }, { "epoch": 29.93, "grad_norm": 0.9629802703857422, "learning_rate": 2e-05, "loss": 0.04599029, "step": 14965 }, { "epoch": 29.932, "grad_norm": 1.0964592695236206, "learning_rate": 2e-05, "loss": 0.04954575, "step": 14966 }, { "epoch": 29.934, "grad_norm": 1.3189095258712769, "learning_rate": 2e-05, "loss": 0.05248156, "step": 14967 }, { "epoch": 29.936, "grad_norm": 1.1367837190628052, "learning_rate": 2e-05, "loss": 0.04873399, "step": 14968 }, { "epoch": 29.938, "grad_norm": 1.3206732273101807, "learning_rate": 2e-05, "loss": 0.04607543, "step": 14969 }, { "epoch": 29.94, "grad_norm": 1.0260512828826904, "learning_rate": 2e-05, "loss": 0.04920332, "step": 14970 }, { "epoch": 29.942, "grad_norm": 0.9997471570968628, "learning_rate": 2e-05, "loss": 0.04855381, "step": 14971 }, { "epoch": 29.944, "grad_norm": 1.0396299362182617, "learning_rate": 2e-05, "loss": 0.05089846, "step": 14972 }, { "epoch": 29.946, "grad_norm": 1.0523890256881714, "learning_rate": 2e-05, "loss": 0.04708329, "step": 14973 }, { "epoch": 29.948, "grad_norm": 1.9589401483535767, "learning_rate": 2e-05, "loss": 0.04800857, "step": 14974 }, { "epoch": 29.95, "grad_norm": 1.0033341646194458, "learning_rate": 2e-05, "loss": 0.04122148, "step": 14975 }, { "epoch": 29.951999999999998, "grad_norm": 1.1320830583572388, "learning_rate": 2e-05, "loss": 0.05243385, "step": 14976 }, { "epoch": 29.954, "grad_norm": 1.0339202880859375, "learning_rate": 2e-05, "loss": 0.05909042, "step": 14977 }, { "epoch": 29.956, "grad_norm": 0.9543154835700989, "learning_rate": 2e-05, "loss": 0.04723663, "step": 14978 }, { "epoch": 29.958, "grad_norm": 1.1777095794677734, "learning_rate": 2e-05, "loss": 0.04744379, "step": 14979 }, { "epoch": 29.96, "grad_norm": 1.223532795906067, "learning_rate": 2e-05, "loss": 0.05452037, "step": 14980 }, { "epoch": 29.962, "grad_norm": 2.040379762649536, "learning_rate": 2e-05, "loss": 0.04021743, "step": 14981 }, { "epoch": 29.964, "grad_norm": 1.0985792875289917, "learning_rate": 2e-05, "loss": 0.04580819, "step": 14982 }, { "epoch": 29.966, "grad_norm": 1.161241888999939, "learning_rate": 2e-05, "loss": 0.04669328, "step": 14983 }, { "epoch": 29.968, "grad_norm": 0.9213882684707642, "learning_rate": 2e-05, "loss": 0.04037134, "step": 14984 }, { "epoch": 29.97, "grad_norm": 1.2197481393814087, "learning_rate": 2e-05, "loss": 0.0502822, "step": 14985 }, { "epoch": 29.972, "grad_norm": 1.1168701648712158, "learning_rate": 2e-05, "loss": 0.04768528, "step": 14986 }, { "epoch": 29.974, "grad_norm": 0.9757674336433411, "learning_rate": 2e-05, "loss": 0.03474696, "step": 14987 }, { "epoch": 29.976, "grad_norm": 1.1759353876113892, "learning_rate": 2e-05, "loss": 0.06116701, "step": 14988 }, { "epoch": 29.978, "grad_norm": 2.7346103191375732, "learning_rate": 2e-05, "loss": 0.05853321, "step": 14989 }, { "epoch": 29.98, "grad_norm": 1.1065059900283813, "learning_rate": 2e-05, "loss": 0.0592829, "step": 14990 }, { "epoch": 29.982, "grad_norm": 1.2685704231262207, "learning_rate": 2e-05, "loss": 0.05609816, "step": 14991 }, { "epoch": 29.984, "grad_norm": 1.1528477668762207, "learning_rate": 2e-05, "loss": 0.05313877, "step": 14992 }, { "epoch": 29.986, "grad_norm": 0.9213481545448303, "learning_rate": 2e-05, "loss": 0.03648861, "step": 14993 }, { "epoch": 29.988, "grad_norm": 1.4056084156036377, "learning_rate": 2e-05, "loss": 0.04710983, "step": 14994 }, { "epoch": 29.99, "grad_norm": 1.281116247177124, "learning_rate": 2e-05, "loss": 0.04585171, "step": 14995 }, { "epoch": 29.992, "grad_norm": 1.3291391134262085, "learning_rate": 2e-05, "loss": 0.03957698, "step": 14996 }, { "epoch": 29.994, "grad_norm": 1.1018824577331543, "learning_rate": 2e-05, "loss": 0.04200132, "step": 14997 }, { "epoch": 29.996, "grad_norm": 1.3506656885147095, "learning_rate": 2e-05, "loss": 0.06009495, "step": 14998 }, { "epoch": 29.998, "grad_norm": 1.2235674858093262, "learning_rate": 2e-05, "loss": 0.05062375, "step": 14999 }, { "epoch": 30.0, "grad_norm": 1.305391550064087, "learning_rate": 2e-05, "loss": 0.05013356, "step": 15000 }, { "epoch": 30.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.996, "Equal_2": 0.9780439121756487, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.992, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8777555110220441, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9812000000000001, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9780439121756487 }, "eval_runtime": 224.7843, "eval_samples_per_second": 46.711, "eval_steps_per_second": 0.934, "step": 15000 }, { "epoch": 30.002, "grad_norm": 1.0308068990707397, "learning_rate": 2e-05, "loss": 0.03184136, "step": 15001 }, { "epoch": 30.004, "grad_norm": 2.7533833980560303, "learning_rate": 2e-05, "loss": 0.05521324, "step": 15002 }, { "epoch": 30.006, "grad_norm": 1.695167064666748, "learning_rate": 2e-05, "loss": 0.04085717, "step": 15003 }, { "epoch": 30.008, "grad_norm": 0.7422577142715454, "learning_rate": 2e-05, "loss": 0.02605377, "step": 15004 }, { "epoch": 30.01, "grad_norm": 2.396179437637329, "learning_rate": 2e-05, "loss": 0.05817553, "step": 15005 }, { "epoch": 30.012, "grad_norm": 1.0059340000152588, "learning_rate": 2e-05, "loss": 0.03151233, "step": 15006 }, { "epoch": 30.014, "grad_norm": 0.9222607612609863, "learning_rate": 2e-05, "loss": 0.0398244, "step": 15007 }, { "epoch": 30.016, "grad_norm": 1.120142936706543, "learning_rate": 2e-05, "loss": 0.04367281, "step": 15008 }, { "epoch": 30.018, "grad_norm": 1.2301863431930542, "learning_rate": 2e-05, "loss": 0.04112256, "step": 15009 }, { "epoch": 30.02, "grad_norm": 1.5547850131988525, "learning_rate": 2e-05, "loss": 0.03911123, "step": 15010 }, { "epoch": 30.022, "grad_norm": 1.4882464408874512, "learning_rate": 2e-05, "loss": 0.05300993, "step": 15011 }, { "epoch": 30.024, "grad_norm": 1.5543612241744995, "learning_rate": 2e-05, "loss": 0.05405387, "step": 15012 }, { "epoch": 30.026, "grad_norm": 1.6829205751419067, "learning_rate": 2e-05, "loss": 0.04546053, "step": 15013 }, { "epoch": 30.028, "grad_norm": 1.033799648284912, "learning_rate": 2e-05, "loss": 0.04306719, "step": 15014 }, { "epoch": 30.03, "grad_norm": 1.373805046081543, "learning_rate": 2e-05, "loss": 0.04412587, "step": 15015 }, { "epoch": 30.032, "grad_norm": 1.0974349975585938, "learning_rate": 2e-05, "loss": 0.05059412, "step": 15016 }, { "epoch": 30.034, "grad_norm": 2.554478406906128, "learning_rate": 2e-05, "loss": 0.04547254, "step": 15017 }, { "epoch": 30.036, "grad_norm": 1.4960787296295166, "learning_rate": 2e-05, "loss": 0.0430349, "step": 15018 }, { "epoch": 30.038, "grad_norm": 2.1566174030303955, "learning_rate": 2e-05, "loss": 0.0581177, "step": 15019 }, { "epoch": 30.04, "grad_norm": 1.18798828125, "learning_rate": 2e-05, "loss": 0.05051094, "step": 15020 }, { "epoch": 30.042, "grad_norm": 0.962690532207489, "learning_rate": 2e-05, "loss": 0.03692138, "step": 15021 }, { "epoch": 30.044, "grad_norm": 2.5550730228424072, "learning_rate": 2e-05, "loss": 0.06318019, "step": 15022 }, { "epoch": 30.046, "grad_norm": 0.9547829031944275, "learning_rate": 2e-05, "loss": 0.02648566, "step": 15023 }, { "epoch": 30.048, "grad_norm": 1.489747166633606, "learning_rate": 2e-05, "loss": 0.03818091, "step": 15024 }, { "epoch": 30.05, "grad_norm": 1.0894618034362793, "learning_rate": 2e-05, "loss": 0.03644903, "step": 15025 }, { "epoch": 30.052, "grad_norm": 1.1374642848968506, "learning_rate": 2e-05, "loss": 0.0539354, "step": 15026 }, { "epoch": 30.054, "grad_norm": 1.0166363716125488, "learning_rate": 2e-05, "loss": 0.05391256, "step": 15027 }, { "epoch": 30.056, "grad_norm": 1.260919213294983, "learning_rate": 2e-05, "loss": 0.03132448, "step": 15028 }, { "epoch": 30.058, "grad_norm": 2.626197338104248, "learning_rate": 2e-05, "loss": 0.06494924, "step": 15029 }, { "epoch": 30.06, "grad_norm": 1.8384228944778442, "learning_rate": 2e-05, "loss": 0.04709835, "step": 15030 }, { "epoch": 30.062, "grad_norm": 1.6264188289642334, "learning_rate": 2e-05, "loss": 0.04376096, "step": 15031 }, { "epoch": 30.064, "grad_norm": 3.299708843231201, "learning_rate": 2e-05, "loss": 0.04652292, "step": 15032 }, { "epoch": 30.066, "grad_norm": 0.9948878884315491, "learning_rate": 2e-05, "loss": 0.04383293, "step": 15033 }, { "epoch": 30.068, "grad_norm": 0.9251421093940735, "learning_rate": 2e-05, "loss": 0.03158117, "step": 15034 }, { "epoch": 30.07, "grad_norm": 0.9317488074302673, "learning_rate": 2e-05, "loss": 0.03238297, "step": 15035 }, { "epoch": 30.072, "grad_norm": 1.221340537071228, "learning_rate": 2e-05, "loss": 0.0462931, "step": 15036 }, { "epoch": 30.074, "grad_norm": 2.551642894744873, "learning_rate": 2e-05, "loss": 0.03541016, "step": 15037 }, { "epoch": 30.076, "grad_norm": 2.177013874053955, "learning_rate": 2e-05, "loss": 0.06491027, "step": 15038 }, { "epoch": 30.078, "grad_norm": 1.1649829149246216, "learning_rate": 2e-05, "loss": 0.05196427, "step": 15039 }, { "epoch": 30.08, "grad_norm": 1.1445386409759521, "learning_rate": 2e-05, "loss": 0.03740873, "step": 15040 }, { "epoch": 30.082, "grad_norm": 1.055728554725647, "learning_rate": 2e-05, "loss": 0.03764991, "step": 15041 }, { "epoch": 30.084, "grad_norm": 0.9928691387176514, "learning_rate": 2e-05, "loss": 0.04613523, "step": 15042 }, { "epoch": 30.086, "grad_norm": 4.624756336212158, "learning_rate": 2e-05, "loss": 0.05510693, "step": 15043 }, { "epoch": 30.088, "grad_norm": 1.0693583488464355, "learning_rate": 2e-05, "loss": 0.03797982, "step": 15044 }, { "epoch": 30.09, "grad_norm": 1.0771105289459229, "learning_rate": 2e-05, "loss": 0.03877475, "step": 15045 }, { "epoch": 30.092, "grad_norm": 0.9826010465621948, "learning_rate": 2e-05, "loss": 0.03939668, "step": 15046 }, { "epoch": 30.094, "grad_norm": 3.2796990871429443, "learning_rate": 2e-05, "loss": 0.04810968, "step": 15047 }, { "epoch": 30.096, "grad_norm": 1.338488221168518, "learning_rate": 2e-05, "loss": 0.04373957, "step": 15048 }, { "epoch": 30.098, "grad_norm": 1.2567124366760254, "learning_rate": 2e-05, "loss": 0.0524064, "step": 15049 }, { "epoch": 30.1, "grad_norm": 1.1686525344848633, "learning_rate": 2e-05, "loss": 0.04910736, "step": 15050 }, { "epoch": 30.102, "grad_norm": 1.4824111461639404, "learning_rate": 2e-05, "loss": 0.03334043, "step": 15051 }, { "epoch": 30.104, "grad_norm": 1.4314874410629272, "learning_rate": 2e-05, "loss": 0.05820961, "step": 15052 }, { "epoch": 30.106, "grad_norm": 1.1740633249282837, "learning_rate": 2e-05, "loss": 0.04364073, "step": 15053 }, { "epoch": 30.108, "grad_norm": 1.3219835758209229, "learning_rate": 2e-05, "loss": 0.04333458, "step": 15054 }, { "epoch": 30.11, "grad_norm": 1.673492193222046, "learning_rate": 2e-05, "loss": 0.03851339, "step": 15055 }, { "epoch": 30.112, "grad_norm": 2.1841211318969727, "learning_rate": 2e-05, "loss": 0.04942738, "step": 15056 }, { "epoch": 30.114, "grad_norm": 1.006935715675354, "learning_rate": 2e-05, "loss": 0.03826583, "step": 15057 }, { "epoch": 30.116, "grad_norm": 1.7263792753219604, "learning_rate": 2e-05, "loss": 0.03882686, "step": 15058 }, { "epoch": 30.118, "grad_norm": 1.3632997274398804, "learning_rate": 2e-05, "loss": 0.05122161, "step": 15059 }, { "epoch": 30.12, "grad_norm": 0.9897202849388123, "learning_rate": 2e-05, "loss": 0.03478212, "step": 15060 }, { "epoch": 30.122, "grad_norm": 1.1950058937072754, "learning_rate": 2e-05, "loss": 0.03848312, "step": 15061 }, { "epoch": 30.124, "grad_norm": 0.8965030312538147, "learning_rate": 2e-05, "loss": 0.03769651, "step": 15062 }, { "epoch": 30.126, "grad_norm": 0.9321705102920532, "learning_rate": 2e-05, "loss": 0.03947328, "step": 15063 }, { "epoch": 30.128, "grad_norm": 1.2751407623291016, "learning_rate": 2e-05, "loss": 0.02556699, "step": 15064 }, { "epoch": 30.13, "grad_norm": 1.2257126569747925, "learning_rate": 2e-05, "loss": 0.04372185, "step": 15065 }, { "epoch": 30.132, "grad_norm": 1.3027081489562988, "learning_rate": 2e-05, "loss": 0.04107498, "step": 15066 }, { "epoch": 30.134, "grad_norm": 4.331851959228516, "learning_rate": 2e-05, "loss": 0.04309349, "step": 15067 }, { "epoch": 30.136, "grad_norm": 1.0140440464019775, "learning_rate": 2e-05, "loss": 0.0461864, "step": 15068 }, { "epoch": 30.138, "grad_norm": 1.174332857131958, "learning_rate": 2e-05, "loss": 0.04036353, "step": 15069 }, { "epoch": 30.14, "grad_norm": 1.081958532333374, "learning_rate": 2e-05, "loss": 0.02861772, "step": 15070 }, { "epoch": 30.142, "grad_norm": 1.0919504165649414, "learning_rate": 2e-05, "loss": 0.04842685, "step": 15071 }, { "epoch": 30.144, "grad_norm": 1.223402500152588, "learning_rate": 2e-05, "loss": 0.03308058, "step": 15072 }, { "epoch": 30.146, "grad_norm": 1.9380789995193481, "learning_rate": 2e-05, "loss": 0.05454859, "step": 15073 }, { "epoch": 30.148, "grad_norm": 1.0059641599655151, "learning_rate": 2e-05, "loss": 0.03418054, "step": 15074 }, { "epoch": 30.15, "grad_norm": 0.9891126751899719, "learning_rate": 2e-05, "loss": 0.03045425, "step": 15075 }, { "epoch": 30.152, "grad_norm": 1.3871240615844727, "learning_rate": 2e-05, "loss": 0.04953337, "step": 15076 }, { "epoch": 30.154, "grad_norm": 1.1540429592132568, "learning_rate": 2e-05, "loss": 0.05309816, "step": 15077 }, { "epoch": 30.156, "grad_norm": 0.9253750443458557, "learning_rate": 2e-05, "loss": 0.0323764, "step": 15078 }, { "epoch": 30.158, "grad_norm": 1.028652310371399, "learning_rate": 2e-05, "loss": 0.03621462, "step": 15079 }, { "epoch": 30.16, "grad_norm": 1.3620299100875854, "learning_rate": 2e-05, "loss": 0.06867465, "step": 15080 }, { "epoch": 30.162, "grad_norm": 2.5810792446136475, "learning_rate": 2e-05, "loss": 0.04975689, "step": 15081 }, { "epoch": 30.164, "grad_norm": 1.5757436752319336, "learning_rate": 2e-05, "loss": 0.06180906, "step": 15082 }, { "epoch": 30.166, "grad_norm": 1.153294324874878, "learning_rate": 2e-05, "loss": 0.04477157, "step": 15083 }, { "epoch": 30.168, "grad_norm": 1.4107401371002197, "learning_rate": 2e-05, "loss": 0.03887236, "step": 15084 }, { "epoch": 30.17, "grad_norm": 1.0855722427368164, "learning_rate": 2e-05, "loss": 0.04560476, "step": 15085 }, { "epoch": 30.172, "grad_norm": 1.2820496559143066, "learning_rate": 2e-05, "loss": 0.05504338, "step": 15086 }, { "epoch": 30.174, "grad_norm": 1.2846413850784302, "learning_rate": 2e-05, "loss": 0.05321379, "step": 15087 }, { "epoch": 30.176, "grad_norm": 1.250867486000061, "learning_rate": 2e-05, "loss": 0.03819541, "step": 15088 }, { "epoch": 30.178, "grad_norm": 1.02994704246521, "learning_rate": 2e-05, "loss": 0.04428207, "step": 15089 }, { "epoch": 30.18, "grad_norm": 1.1102049350738525, "learning_rate": 2e-05, "loss": 0.03963427, "step": 15090 }, { "epoch": 30.182, "grad_norm": 1.6807299852371216, "learning_rate": 2e-05, "loss": 0.05213862, "step": 15091 }, { "epoch": 30.184, "grad_norm": 1.124149203300476, "learning_rate": 2e-05, "loss": 0.03568637, "step": 15092 }, { "epoch": 30.186, "grad_norm": 1.1228359937667847, "learning_rate": 2e-05, "loss": 0.05287477, "step": 15093 }, { "epoch": 30.188, "grad_norm": 1.2726771831512451, "learning_rate": 2e-05, "loss": 0.06411633, "step": 15094 }, { "epoch": 30.19, "grad_norm": 1.3177509307861328, "learning_rate": 2e-05, "loss": 0.0598552, "step": 15095 }, { "epoch": 30.192, "grad_norm": 1.6233930587768555, "learning_rate": 2e-05, "loss": 0.04752813, "step": 15096 }, { "epoch": 30.194, "grad_norm": 1.1665074825286865, "learning_rate": 2e-05, "loss": 0.05199505, "step": 15097 }, { "epoch": 30.196, "grad_norm": 1.0650789737701416, "learning_rate": 2e-05, "loss": 0.03830632, "step": 15098 }, { "epoch": 30.198, "grad_norm": 1.7656784057617188, "learning_rate": 2e-05, "loss": 0.0690074, "step": 15099 }, { "epoch": 30.2, "grad_norm": 1.0086908340454102, "learning_rate": 2e-05, "loss": 0.04296269, "step": 15100 }, { "epoch": 30.202, "grad_norm": 1.0333184003829956, "learning_rate": 2e-05, "loss": 0.03289052, "step": 15101 }, { "epoch": 30.204, "grad_norm": 1.0218408107757568, "learning_rate": 2e-05, "loss": 0.03519783, "step": 15102 }, { "epoch": 30.206, "grad_norm": 1.8180515766143799, "learning_rate": 2e-05, "loss": 0.06663071, "step": 15103 }, { "epoch": 30.208, "grad_norm": 1.272222638130188, "learning_rate": 2e-05, "loss": 0.0570414, "step": 15104 }, { "epoch": 30.21, "grad_norm": 1.7831804752349854, "learning_rate": 2e-05, "loss": 0.04471539, "step": 15105 }, { "epoch": 30.212, "grad_norm": 0.9384122490882874, "learning_rate": 2e-05, "loss": 0.03888484, "step": 15106 }, { "epoch": 30.214, "grad_norm": 1.2506427764892578, "learning_rate": 2e-05, "loss": 0.0423446, "step": 15107 }, { "epoch": 30.216, "grad_norm": 1.0899723768234253, "learning_rate": 2e-05, "loss": 0.04235667, "step": 15108 }, { "epoch": 30.218, "grad_norm": 1.1475201845169067, "learning_rate": 2e-05, "loss": 0.04124061, "step": 15109 }, { "epoch": 30.22, "grad_norm": 1.2647457122802734, "learning_rate": 2e-05, "loss": 0.05824676, "step": 15110 }, { "epoch": 30.222, "grad_norm": 0.973704993724823, "learning_rate": 2e-05, "loss": 0.04031935, "step": 15111 }, { "epoch": 30.224, "grad_norm": 1.8009536266326904, "learning_rate": 2e-05, "loss": 0.04997278, "step": 15112 }, { "epoch": 30.226, "grad_norm": 1.1472795009613037, "learning_rate": 2e-05, "loss": 0.04423388, "step": 15113 }, { "epoch": 30.228, "grad_norm": 1.0896915197372437, "learning_rate": 2e-05, "loss": 0.04200044, "step": 15114 }, { "epoch": 30.23, "grad_norm": 1.6685937643051147, "learning_rate": 2e-05, "loss": 0.04130819, "step": 15115 }, { "epoch": 30.232, "grad_norm": 0.9746212363243103, "learning_rate": 2e-05, "loss": 0.03880321, "step": 15116 }, { "epoch": 30.234, "grad_norm": 1.5170867443084717, "learning_rate": 2e-05, "loss": 0.0418476, "step": 15117 }, { "epoch": 30.236, "grad_norm": 1.8708497285842896, "learning_rate": 2e-05, "loss": 0.04597779, "step": 15118 }, { "epoch": 30.238, "grad_norm": 1.1558741331100464, "learning_rate": 2e-05, "loss": 0.04758434, "step": 15119 }, { "epoch": 30.24, "grad_norm": 0.9833442568778992, "learning_rate": 2e-05, "loss": 0.03922521, "step": 15120 }, { "epoch": 30.242, "grad_norm": 1.3472473621368408, "learning_rate": 2e-05, "loss": 0.06021844, "step": 15121 }, { "epoch": 30.244, "grad_norm": 1.1350276470184326, "learning_rate": 2e-05, "loss": 0.04171648, "step": 15122 }, { "epoch": 30.246, "grad_norm": 1.0089110136032104, "learning_rate": 2e-05, "loss": 0.04921135, "step": 15123 }, { "epoch": 30.248, "grad_norm": 0.9299418926239014, "learning_rate": 2e-05, "loss": 0.04035712, "step": 15124 }, { "epoch": 30.25, "grad_norm": 1.6644203662872314, "learning_rate": 2e-05, "loss": 0.05267403, "step": 15125 }, { "epoch": 30.252, "grad_norm": 1.0587055683135986, "learning_rate": 2e-05, "loss": 0.02877274, "step": 15126 }, { "epoch": 30.254, "grad_norm": 1.139699935913086, "learning_rate": 2e-05, "loss": 0.03074811, "step": 15127 }, { "epoch": 30.256, "grad_norm": 1.0326001644134521, "learning_rate": 2e-05, "loss": 0.03744929, "step": 15128 }, { "epoch": 30.258, "grad_norm": 1.127711534500122, "learning_rate": 2e-05, "loss": 0.0434154, "step": 15129 }, { "epoch": 30.26, "grad_norm": 1.6909953355789185, "learning_rate": 2e-05, "loss": 0.05737639, "step": 15130 }, { "epoch": 30.262, "grad_norm": 1.1252226829528809, "learning_rate": 2e-05, "loss": 0.05764807, "step": 15131 }, { "epoch": 30.264, "grad_norm": 1.1084445714950562, "learning_rate": 2e-05, "loss": 0.04913251, "step": 15132 }, { "epoch": 30.266, "grad_norm": 1.1074551343917847, "learning_rate": 2e-05, "loss": 0.03837232, "step": 15133 }, { "epoch": 30.268, "grad_norm": 1.1671526432037354, "learning_rate": 2e-05, "loss": 0.06363723, "step": 15134 }, { "epoch": 30.27, "grad_norm": 0.9258093237876892, "learning_rate": 2e-05, "loss": 0.03241454, "step": 15135 }, { "epoch": 30.272, "grad_norm": 2.2595388889312744, "learning_rate": 2e-05, "loss": 0.04844834, "step": 15136 }, { "epoch": 30.274, "grad_norm": 0.9475272297859192, "learning_rate": 2e-05, "loss": 0.026248, "step": 15137 }, { "epoch": 30.276, "grad_norm": 1.4427014589309692, "learning_rate": 2e-05, "loss": 0.03985434, "step": 15138 }, { "epoch": 30.278, "grad_norm": 1.1263253688812256, "learning_rate": 2e-05, "loss": 0.05336409, "step": 15139 }, { "epoch": 30.28, "grad_norm": 1.1342394351959229, "learning_rate": 2e-05, "loss": 0.03960358, "step": 15140 }, { "epoch": 30.282, "grad_norm": 1.018078088760376, "learning_rate": 2e-05, "loss": 0.04591689, "step": 15141 }, { "epoch": 30.284, "grad_norm": 1.3794876337051392, "learning_rate": 2e-05, "loss": 0.06597047, "step": 15142 }, { "epoch": 30.286, "grad_norm": 1.314192295074463, "learning_rate": 2e-05, "loss": 0.06166491, "step": 15143 }, { "epoch": 30.288, "grad_norm": 1.2351943254470825, "learning_rate": 2e-05, "loss": 0.06171503, "step": 15144 }, { "epoch": 30.29, "grad_norm": 1.7845873832702637, "learning_rate": 2e-05, "loss": 0.03849946, "step": 15145 }, { "epoch": 30.292, "grad_norm": 1.7038358449935913, "learning_rate": 2e-05, "loss": 0.06577089, "step": 15146 }, { "epoch": 30.294, "grad_norm": 1.0476207733154297, "learning_rate": 2e-05, "loss": 0.04647258, "step": 15147 }, { "epoch": 30.296, "grad_norm": 2.8477466106414795, "learning_rate": 2e-05, "loss": 0.04359397, "step": 15148 }, { "epoch": 30.298, "grad_norm": 0.9909746050834656, "learning_rate": 2e-05, "loss": 0.03791937, "step": 15149 }, { "epoch": 30.3, "grad_norm": 0.896663248538971, "learning_rate": 2e-05, "loss": 0.02858303, "step": 15150 }, { "epoch": 30.302, "grad_norm": 1.3414433002471924, "learning_rate": 2e-05, "loss": 0.04678523, "step": 15151 }, { "epoch": 30.304, "grad_norm": 2.5232951641082764, "learning_rate": 2e-05, "loss": 0.03920921, "step": 15152 }, { "epoch": 30.306, "grad_norm": 0.871887743473053, "learning_rate": 2e-05, "loss": 0.03851134, "step": 15153 }, { "epoch": 30.308, "grad_norm": 2.1046650409698486, "learning_rate": 2e-05, "loss": 0.05090259, "step": 15154 }, { "epoch": 30.31, "grad_norm": 1.050541639328003, "learning_rate": 2e-05, "loss": 0.04804298, "step": 15155 }, { "epoch": 30.312, "grad_norm": 0.9703270196914673, "learning_rate": 2e-05, "loss": 0.03884853, "step": 15156 }, { "epoch": 30.314, "grad_norm": 1.2022302150726318, "learning_rate": 2e-05, "loss": 0.05424745, "step": 15157 }, { "epoch": 30.316, "grad_norm": 2.3596882820129395, "learning_rate": 2e-05, "loss": 0.06427781, "step": 15158 }, { "epoch": 30.318, "grad_norm": 0.735532283782959, "learning_rate": 2e-05, "loss": 0.02098755, "step": 15159 }, { "epoch": 30.32, "grad_norm": 2.271441698074341, "learning_rate": 2e-05, "loss": 0.04860394, "step": 15160 }, { "epoch": 30.322, "grad_norm": 0.9820964932441711, "learning_rate": 2e-05, "loss": 0.03813133, "step": 15161 }, { "epoch": 30.324, "grad_norm": 3.6161766052246094, "learning_rate": 2e-05, "loss": 0.04177638, "step": 15162 }, { "epoch": 30.326, "grad_norm": 1.2038625478744507, "learning_rate": 2e-05, "loss": 0.04413422, "step": 15163 }, { "epoch": 30.328, "grad_norm": 1.0804635286331177, "learning_rate": 2e-05, "loss": 0.0475056, "step": 15164 }, { "epoch": 30.33, "grad_norm": 1.2777682542800903, "learning_rate": 2e-05, "loss": 0.04483355, "step": 15165 }, { "epoch": 30.332, "grad_norm": 0.9220979809761047, "learning_rate": 2e-05, "loss": 0.03874589, "step": 15166 }, { "epoch": 30.334, "grad_norm": 1.231019377708435, "learning_rate": 2e-05, "loss": 0.05257763, "step": 15167 }, { "epoch": 30.336, "grad_norm": 0.9976868629455566, "learning_rate": 2e-05, "loss": 0.03522278, "step": 15168 }, { "epoch": 30.338, "grad_norm": 1.0758280754089355, "learning_rate": 2e-05, "loss": 0.05232194, "step": 15169 }, { "epoch": 30.34, "grad_norm": 1.0240188837051392, "learning_rate": 2e-05, "loss": 0.05847413, "step": 15170 }, { "epoch": 30.342, "grad_norm": 0.8838725686073303, "learning_rate": 2e-05, "loss": 0.03419738, "step": 15171 }, { "epoch": 30.344, "grad_norm": 1.43282151222229, "learning_rate": 2e-05, "loss": 0.05383113, "step": 15172 }, { "epoch": 30.346, "grad_norm": 1.1276417970657349, "learning_rate": 2e-05, "loss": 0.03834279, "step": 15173 }, { "epoch": 30.348, "grad_norm": 1.4770399332046509, "learning_rate": 2e-05, "loss": 0.05782711, "step": 15174 }, { "epoch": 30.35, "grad_norm": 1.0348106622695923, "learning_rate": 2e-05, "loss": 0.04162346, "step": 15175 }, { "epoch": 30.352, "grad_norm": 1.8618354797363281, "learning_rate": 2e-05, "loss": 0.07344772, "step": 15176 }, { "epoch": 30.354, "grad_norm": 0.92513507604599, "learning_rate": 2e-05, "loss": 0.02897989, "step": 15177 }, { "epoch": 30.356, "grad_norm": 1.3217964172363281, "learning_rate": 2e-05, "loss": 0.0468182, "step": 15178 }, { "epoch": 30.358, "grad_norm": 1.3669573068618774, "learning_rate": 2e-05, "loss": 0.04254651, "step": 15179 }, { "epoch": 30.36, "grad_norm": 0.8975329995155334, "learning_rate": 2e-05, "loss": 0.03618569, "step": 15180 }, { "epoch": 30.362, "grad_norm": 0.8632411956787109, "learning_rate": 2e-05, "loss": 0.02988159, "step": 15181 }, { "epoch": 30.364, "grad_norm": 1.7000848054885864, "learning_rate": 2e-05, "loss": 0.050744, "step": 15182 }, { "epoch": 30.366, "grad_norm": 1.1393963098526, "learning_rate": 2e-05, "loss": 0.04316861, "step": 15183 }, { "epoch": 30.368, "grad_norm": 1.8605047464370728, "learning_rate": 2e-05, "loss": 0.05693445, "step": 15184 }, { "epoch": 30.37, "grad_norm": 1.0264809131622314, "learning_rate": 2e-05, "loss": 0.04444408, "step": 15185 }, { "epoch": 30.372, "grad_norm": 0.9575855135917664, "learning_rate": 2e-05, "loss": 0.04074533, "step": 15186 }, { "epoch": 30.374, "grad_norm": 0.7749377489089966, "learning_rate": 2e-05, "loss": 0.02775382, "step": 15187 }, { "epoch": 30.376, "grad_norm": 1.1088873147964478, "learning_rate": 2e-05, "loss": 0.05287268, "step": 15188 }, { "epoch": 30.378, "grad_norm": 0.8975803852081299, "learning_rate": 2e-05, "loss": 0.03264301, "step": 15189 }, { "epoch": 30.38, "grad_norm": 1.1287401914596558, "learning_rate": 2e-05, "loss": 0.04806273, "step": 15190 }, { "epoch": 30.382, "grad_norm": 1.093601942062378, "learning_rate": 2e-05, "loss": 0.03556317, "step": 15191 }, { "epoch": 30.384, "grad_norm": 1.0091261863708496, "learning_rate": 2e-05, "loss": 0.03495965, "step": 15192 }, { "epoch": 30.386, "grad_norm": 1.2238987684249878, "learning_rate": 2e-05, "loss": 0.04697929, "step": 15193 }, { "epoch": 30.388, "grad_norm": 0.8605625033378601, "learning_rate": 2e-05, "loss": 0.0456198, "step": 15194 }, { "epoch": 30.39, "grad_norm": 1.0559747219085693, "learning_rate": 2e-05, "loss": 0.03712171, "step": 15195 }, { "epoch": 30.392, "grad_norm": 1.2347922325134277, "learning_rate": 2e-05, "loss": 0.05268703, "step": 15196 }, { "epoch": 30.394, "grad_norm": 1.0191525220870972, "learning_rate": 2e-05, "loss": 0.03989586, "step": 15197 }, { "epoch": 30.396, "grad_norm": 1.2155154943466187, "learning_rate": 2e-05, "loss": 0.05873741, "step": 15198 }, { "epoch": 30.398, "grad_norm": 1.190245509147644, "learning_rate": 2e-05, "loss": 0.04354099, "step": 15199 }, { "epoch": 30.4, "grad_norm": 1.0246021747589111, "learning_rate": 2e-05, "loss": 0.04077869, "step": 15200 }, { "epoch": 30.402, "grad_norm": 1.8616139888763428, "learning_rate": 2e-05, "loss": 0.05622261, "step": 15201 }, { "epoch": 30.404, "grad_norm": 0.9131290912628174, "learning_rate": 2e-05, "loss": 0.03340092, "step": 15202 }, { "epoch": 30.406, "grad_norm": 1.0527607202529907, "learning_rate": 2e-05, "loss": 0.05655628, "step": 15203 }, { "epoch": 30.408, "grad_norm": 0.9244288802146912, "learning_rate": 2e-05, "loss": 0.03568232, "step": 15204 }, { "epoch": 30.41, "grad_norm": 1.2113935947418213, "learning_rate": 2e-05, "loss": 0.0390442, "step": 15205 }, { "epoch": 30.412, "grad_norm": 1.1086965799331665, "learning_rate": 2e-05, "loss": 0.04737919, "step": 15206 }, { "epoch": 30.414, "grad_norm": 1.2952810525894165, "learning_rate": 2e-05, "loss": 0.02725228, "step": 15207 }, { "epoch": 30.416, "grad_norm": 0.9821729063987732, "learning_rate": 2e-05, "loss": 0.03546926, "step": 15208 }, { "epoch": 30.418, "grad_norm": 1.2644320726394653, "learning_rate": 2e-05, "loss": 0.04677264, "step": 15209 }, { "epoch": 30.42, "grad_norm": 1.0288288593292236, "learning_rate": 2e-05, "loss": 0.04035509, "step": 15210 }, { "epoch": 30.422, "grad_norm": 1.3232293128967285, "learning_rate": 2e-05, "loss": 0.04139759, "step": 15211 }, { "epoch": 30.424, "grad_norm": 1.1329476833343506, "learning_rate": 2e-05, "loss": 0.04369304, "step": 15212 }, { "epoch": 30.426, "grad_norm": 2.308251142501831, "learning_rate": 2e-05, "loss": 0.04851383, "step": 15213 }, { "epoch": 30.428, "grad_norm": 1.1261333227157593, "learning_rate": 2e-05, "loss": 0.04626966, "step": 15214 }, { "epoch": 30.43, "grad_norm": 1.2353731393814087, "learning_rate": 2e-05, "loss": 0.04220863, "step": 15215 }, { "epoch": 30.432, "grad_norm": 0.9733811020851135, "learning_rate": 2e-05, "loss": 0.04108451, "step": 15216 }, { "epoch": 30.434, "grad_norm": 1.3542823791503906, "learning_rate": 2e-05, "loss": 0.05243605, "step": 15217 }, { "epoch": 30.436, "grad_norm": 1.1251178979873657, "learning_rate": 2e-05, "loss": 0.04760038, "step": 15218 }, { "epoch": 30.438, "grad_norm": 1.2049202919006348, "learning_rate": 2e-05, "loss": 0.04998695, "step": 15219 }, { "epoch": 30.44, "grad_norm": 1.3496381044387817, "learning_rate": 2e-05, "loss": 0.03914636, "step": 15220 }, { "epoch": 30.442, "grad_norm": 1.1060059070587158, "learning_rate": 2e-05, "loss": 0.04727049, "step": 15221 }, { "epoch": 30.444, "grad_norm": 1.1711000204086304, "learning_rate": 2e-05, "loss": 0.04908316, "step": 15222 }, { "epoch": 30.446, "grad_norm": 1.0700660943984985, "learning_rate": 2e-05, "loss": 0.04927491, "step": 15223 }, { "epoch": 30.448, "grad_norm": 2.662594795227051, "learning_rate": 2e-05, "loss": 0.0320991, "step": 15224 }, { "epoch": 30.45, "grad_norm": 1.0689165592193604, "learning_rate": 2e-05, "loss": 0.04688723, "step": 15225 }, { "epoch": 30.452, "grad_norm": 0.8211218118667603, "learning_rate": 2e-05, "loss": 0.02013472, "step": 15226 }, { "epoch": 30.454, "grad_norm": 1.1198627948760986, "learning_rate": 2e-05, "loss": 0.04327358, "step": 15227 }, { "epoch": 30.456, "grad_norm": 0.9729362726211548, "learning_rate": 2e-05, "loss": 0.03877353, "step": 15228 }, { "epoch": 30.458, "grad_norm": 1.2558162212371826, "learning_rate": 2e-05, "loss": 0.0420769, "step": 15229 }, { "epoch": 30.46, "grad_norm": 1.4423881769180298, "learning_rate": 2e-05, "loss": 0.04620108, "step": 15230 }, { "epoch": 30.462, "grad_norm": 1.3570334911346436, "learning_rate": 2e-05, "loss": 0.04956406, "step": 15231 }, { "epoch": 30.464, "grad_norm": 1.1386452913284302, "learning_rate": 2e-05, "loss": 0.04483388, "step": 15232 }, { "epoch": 30.466, "grad_norm": 1.8052864074707031, "learning_rate": 2e-05, "loss": 0.04640273, "step": 15233 }, { "epoch": 30.468, "grad_norm": 1.1856060028076172, "learning_rate": 2e-05, "loss": 0.03461586, "step": 15234 }, { "epoch": 30.47, "grad_norm": 0.9085116386413574, "learning_rate": 2e-05, "loss": 0.02411113, "step": 15235 }, { "epoch": 30.472, "grad_norm": 1.417917013168335, "learning_rate": 2e-05, "loss": 0.0498498, "step": 15236 }, { "epoch": 30.474, "grad_norm": 1.23486328125, "learning_rate": 2e-05, "loss": 0.04182458, "step": 15237 }, { "epoch": 30.476, "grad_norm": 1.044952630996704, "learning_rate": 2e-05, "loss": 0.03346809, "step": 15238 }, { "epoch": 30.478, "grad_norm": 1.5184699296951294, "learning_rate": 2e-05, "loss": 0.04909082, "step": 15239 }, { "epoch": 30.48, "grad_norm": 1.0278830528259277, "learning_rate": 2e-05, "loss": 0.04546173, "step": 15240 }, { "epoch": 30.482, "grad_norm": 1.1241501569747925, "learning_rate": 2e-05, "loss": 0.04271089, "step": 15241 }, { "epoch": 30.484, "grad_norm": 1.1277130842208862, "learning_rate": 2e-05, "loss": 0.02877668, "step": 15242 }, { "epoch": 30.486, "grad_norm": 1.1124341487884521, "learning_rate": 2e-05, "loss": 0.05010645, "step": 15243 }, { "epoch": 30.488, "grad_norm": 1.4422926902770996, "learning_rate": 2e-05, "loss": 0.05669481, "step": 15244 }, { "epoch": 30.49, "grad_norm": 1.21055269241333, "learning_rate": 2e-05, "loss": 0.04853384, "step": 15245 }, { "epoch": 30.492, "grad_norm": 1.236591100692749, "learning_rate": 2e-05, "loss": 0.05216939, "step": 15246 }, { "epoch": 30.494, "grad_norm": 1.1986603736877441, "learning_rate": 2e-05, "loss": 0.05986943, "step": 15247 }, { "epoch": 30.496, "grad_norm": 0.9812233448028564, "learning_rate": 2e-05, "loss": 0.04504086, "step": 15248 }, { "epoch": 30.498, "grad_norm": 1.398612380027771, "learning_rate": 2e-05, "loss": 0.04422119, "step": 15249 }, { "epoch": 30.5, "grad_norm": 1.1871391534805298, "learning_rate": 2e-05, "loss": 0.04657135, "step": 15250 }, { "epoch": 30.502, "grad_norm": 3.210038661956787, "learning_rate": 2e-05, "loss": 0.05082079, "step": 15251 }, { "epoch": 30.504, "grad_norm": 1.1243926286697388, "learning_rate": 2e-05, "loss": 0.04826551, "step": 15252 }, { "epoch": 30.506, "grad_norm": 1.9084488153457642, "learning_rate": 2e-05, "loss": 0.06315817, "step": 15253 }, { "epoch": 30.508, "grad_norm": 1.2041815519332886, "learning_rate": 2e-05, "loss": 0.04246318, "step": 15254 }, { "epoch": 30.51, "grad_norm": 1.5733197927474976, "learning_rate": 2e-05, "loss": 0.04519498, "step": 15255 }, { "epoch": 30.512, "grad_norm": 1.0910706520080566, "learning_rate": 2e-05, "loss": 0.05395636, "step": 15256 }, { "epoch": 30.514, "grad_norm": 1.0341370105743408, "learning_rate": 2e-05, "loss": 0.02606761, "step": 15257 }, { "epoch": 30.516, "grad_norm": 1.146902084350586, "learning_rate": 2e-05, "loss": 0.05106888, "step": 15258 }, { "epoch": 30.518, "grad_norm": 1.0341204404830933, "learning_rate": 2e-05, "loss": 0.03892763, "step": 15259 }, { "epoch": 30.52, "grad_norm": 1.2079020738601685, "learning_rate": 2e-05, "loss": 0.04901616, "step": 15260 }, { "epoch": 30.522, "grad_norm": 0.8636009693145752, "learning_rate": 2e-05, "loss": 0.02556059, "step": 15261 }, { "epoch": 30.524, "grad_norm": 0.926358163356781, "learning_rate": 2e-05, "loss": 0.03236681, "step": 15262 }, { "epoch": 30.526, "grad_norm": 1.842421054840088, "learning_rate": 2e-05, "loss": 0.05670229, "step": 15263 }, { "epoch": 30.528, "grad_norm": 0.9659653902053833, "learning_rate": 2e-05, "loss": 0.0464028, "step": 15264 }, { "epoch": 30.53, "grad_norm": 1.1663445234298706, "learning_rate": 2e-05, "loss": 0.0340641, "step": 15265 }, { "epoch": 30.532, "grad_norm": 0.98187255859375, "learning_rate": 2e-05, "loss": 0.04276213, "step": 15266 }, { "epoch": 30.534, "grad_norm": 0.8584548234939575, "learning_rate": 2e-05, "loss": 0.02650402, "step": 15267 }, { "epoch": 30.536, "grad_norm": 1.0658044815063477, "learning_rate": 2e-05, "loss": 0.04412679, "step": 15268 }, { "epoch": 30.538, "grad_norm": 3.178711175918579, "learning_rate": 2e-05, "loss": 0.04948563, "step": 15269 }, { "epoch": 30.54, "grad_norm": 1.0548864603042603, "learning_rate": 2e-05, "loss": 0.03955693, "step": 15270 }, { "epoch": 30.542, "grad_norm": 1.5246509313583374, "learning_rate": 2e-05, "loss": 0.04291953, "step": 15271 }, { "epoch": 30.544, "grad_norm": 1.1927629709243774, "learning_rate": 2e-05, "loss": 0.05683687, "step": 15272 }, { "epoch": 30.546, "grad_norm": 1.1328017711639404, "learning_rate": 2e-05, "loss": 0.04265777, "step": 15273 }, { "epoch": 30.548000000000002, "grad_norm": 1.0825515985488892, "learning_rate": 2e-05, "loss": 0.03564101, "step": 15274 }, { "epoch": 30.55, "grad_norm": 1.6229450702667236, "learning_rate": 2e-05, "loss": 0.04115272, "step": 15275 }, { "epoch": 30.552, "grad_norm": 1.0939276218414307, "learning_rate": 2e-05, "loss": 0.04452078, "step": 15276 }, { "epoch": 30.554, "grad_norm": 1.1483478546142578, "learning_rate": 2e-05, "loss": 0.0464289, "step": 15277 }, { "epoch": 30.556, "grad_norm": 1.211912751197815, "learning_rate": 2e-05, "loss": 0.04548561, "step": 15278 }, { "epoch": 30.558, "grad_norm": 0.9169615507125854, "learning_rate": 2e-05, "loss": 0.03179051, "step": 15279 }, { "epoch": 30.56, "grad_norm": 0.9346284866333008, "learning_rate": 2e-05, "loss": 0.02813072, "step": 15280 }, { "epoch": 30.562, "grad_norm": 1.1934715509414673, "learning_rate": 2e-05, "loss": 0.05158596, "step": 15281 }, { "epoch": 30.564, "grad_norm": 3.4119443893432617, "learning_rate": 2e-05, "loss": 0.04592833, "step": 15282 }, { "epoch": 30.566, "grad_norm": 1.2232511043548584, "learning_rate": 2e-05, "loss": 0.05049157, "step": 15283 }, { "epoch": 30.568, "grad_norm": 1.066696047782898, "learning_rate": 2e-05, "loss": 0.041667, "step": 15284 }, { "epoch": 30.57, "grad_norm": 1.1798073053359985, "learning_rate": 2e-05, "loss": 0.04058933, "step": 15285 }, { "epoch": 30.572, "grad_norm": 1.0606446266174316, "learning_rate": 2e-05, "loss": 0.03613669, "step": 15286 }, { "epoch": 30.574, "grad_norm": 1.100513219833374, "learning_rate": 2e-05, "loss": 0.04004398, "step": 15287 }, { "epoch": 30.576, "grad_norm": 1.145613431930542, "learning_rate": 2e-05, "loss": 0.04471353, "step": 15288 }, { "epoch": 30.578, "grad_norm": 1.4291735887527466, "learning_rate": 2e-05, "loss": 0.05351225, "step": 15289 }, { "epoch": 30.58, "grad_norm": 1.2046937942504883, "learning_rate": 2e-05, "loss": 0.05110889, "step": 15290 }, { "epoch": 30.582, "grad_norm": 1.3633382320404053, "learning_rate": 2e-05, "loss": 0.07967256, "step": 15291 }, { "epoch": 30.584, "grad_norm": 1.1848328113555908, "learning_rate": 2e-05, "loss": 0.04018304, "step": 15292 }, { "epoch": 30.586, "grad_norm": 1.0506037473678589, "learning_rate": 2e-05, "loss": 0.04121135, "step": 15293 }, { "epoch": 30.588, "grad_norm": 1.2972482442855835, "learning_rate": 2e-05, "loss": 0.04374395, "step": 15294 }, { "epoch": 30.59, "grad_norm": 1.6701290607452393, "learning_rate": 2e-05, "loss": 0.04646797, "step": 15295 }, { "epoch": 30.592, "grad_norm": 2.8202621936798096, "learning_rate": 2e-05, "loss": 0.04236949, "step": 15296 }, { "epoch": 30.594, "grad_norm": 1.3037232160568237, "learning_rate": 2e-05, "loss": 0.05477159, "step": 15297 }, { "epoch": 30.596, "grad_norm": 1.035437822341919, "learning_rate": 2e-05, "loss": 0.03845677, "step": 15298 }, { "epoch": 30.598, "grad_norm": 1.3020297288894653, "learning_rate": 2e-05, "loss": 0.0544595, "step": 15299 }, { "epoch": 30.6, "grad_norm": 1.1204578876495361, "learning_rate": 2e-05, "loss": 0.04374269, "step": 15300 }, { "epoch": 30.602, "grad_norm": 1.2414798736572266, "learning_rate": 2e-05, "loss": 0.04983538, "step": 15301 }, { "epoch": 30.604, "grad_norm": 1.0599288940429688, "learning_rate": 2e-05, "loss": 0.04418595, "step": 15302 }, { "epoch": 30.606, "grad_norm": 1.505934476852417, "learning_rate": 2e-05, "loss": 0.04934649, "step": 15303 }, { "epoch": 30.608, "grad_norm": 0.8405028581619263, "learning_rate": 2e-05, "loss": 0.02861503, "step": 15304 }, { "epoch": 30.61, "grad_norm": 0.9566628336906433, "learning_rate": 2e-05, "loss": 0.04292151, "step": 15305 }, { "epoch": 30.612, "grad_norm": 1.1562131643295288, "learning_rate": 2e-05, "loss": 0.05511833, "step": 15306 }, { "epoch": 30.614, "grad_norm": 1.8530454635620117, "learning_rate": 2e-05, "loss": 0.04727079, "step": 15307 }, { "epoch": 30.616, "grad_norm": 1.14260733127594, "learning_rate": 2e-05, "loss": 0.03275755, "step": 15308 }, { "epoch": 30.618, "grad_norm": 0.9837680459022522, "learning_rate": 2e-05, "loss": 0.0407011, "step": 15309 }, { "epoch": 30.62, "grad_norm": 1.4966297149658203, "learning_rate": 2e-05, "loss": 0.05148937, "step": 15310 }, { "epoch": 30.622, "grad_norm": 1.049460768699646, "learning_rate": 2e-05, "loss": 0.04829581, "step": 15311 }, { "epoch": 30.624, "grad_norm": 1.8009073734283447, "learning_rate": 2e-05, "loss": 0.05343049, "step": 15312 }, { "epoch": 30.626, "grad_norm": 1.0605156421661377, "learning_rate": 2e-05, "loss": 0.03546093, "step": 15313 }, { "epoch": 30.628, "grad_norm": 1.1334810256958008, "learning_rate": 2e-05, "loss": 0.04230979, "step": 15314 }, { "epoch": 30.63, "grad_norm": 1.15669584274292, "learning_rate": 2e-05, "loss": 0.04007196, "step": 15315 }, { "epoch": 30.632, "grad_norm": 1.0652557611465454, "learning_rate": 2e-05, "loss": 0.03431261, "step": 15316 }, { "epoch": 30.634, "grad_norm": 1.4859952926635742, "learning_rate": 2e-05, "loss": 0.04152938, "step": 15317 }, { "epoch": 30.636, "grad_norm": 1.1583386659622192, "learning_rate": 2e-05, "loss": 0.0319959, "step": 15318 }, { "epoch": 30.638, "grad_norm": 1.1855254173278809, "learning_rate": 2e-05, "loss": 0.06064695, "step": 15319 }, { "epoch": 30.64, "grad_norm": 1.2364964485168457, "learning_rate": 2e-05, "loss": 0.04029825, "step": 15320 }, { "epoch": 30.642, "grad_norm": 1.1663819551467896, "learning_rate": 2e-05, "loss": 0.05407604, "step": 15321 }, { "epoch": 30.644, "grad_norm": 0.965399444103241, "learning_rate": 2e-05, "loss": 0.03759922, "step": 15322 }, { "epoch": 30.646, "grad_norm": 1.150786280632019, "learning_rate": 2e-05, "loss": 0.03996169, "step": 15323 }, { "epoch": 30.648, "grad_norm": 1.821763515472412, "learning_rate": 2e-05, "loss": 0.03955948, "step": 15324 }, { "epoch": 30.65, "grad_norm": 1.8540666103363037, "learning_rate": 2e-05, "loss": 0.04569282, "step": 15325 }, { "epoch": 30.652, "grad_norm": 1.2158501148223877, "learning_rate": 2e-05, "loss": 0.03636731, "step": 15326 }, { "epoch": 30.654, "grad_norm": 1.1168946027755737, "learning_rate": 2e-05, "loss": 0.03944979, "step": 15327 }, { "epoch": 30.656, "grad_norm": 2.462486982345581, "learning_rate": 2e-05, "loss": 0.04294262, "step": 15328 }, { "epoch": 30.658, "grad_norm": 2.151353359222412, "learning_rate": 2e-05, "loss": 0.05101956, "step": 15329 }, { "epoch": 30.66, "grad_norm": 1.167514681816101, "learning_rate": 2e-05, "loss": 0.0504293, "step": 15330 }, { "epoch": 30.662, "grad_norm": 1.0629163980484009, "learning_rate": 2e-05, "loss": 0.05545263, "step": 15331 }, { "epoch": 30.664, "grad_norm": 1.2508022785186768, "learning_rate": 2e-05, "loss": 0.04756314, "step": 15332 }, { "epoch": 30.666, "grad_norm": 2.191587448120117, "learning_rate": 2e-05, "loss": 0.05923266, "step": 15333 }, { "epoch": 30.668, "grad_norm": 1.0343167781829834, "learning_rate": 2e-05, "loss": 0.03771825, "step": 15334 }, { "epoch": 30.67, "grad_norm": 1.1824878454208374, "learning_rate": 2e-05, "loss": 0.0490543, "step": 15335 }, { "epoch": 30.672, "grad_norm": 0.9695870280265808, "learning_rate": 2e-05, "loss": 0.04015572, "step": 15336 }, { "epoch": 30.674, "grad_norm": 1.0497578382492065, "learning_rate": 2e-05, "loss": 0.04781066, "step": 15337 }, { "epoch": 30.676, "grad_norm": 1.8725987672805786, "learning_rate": 2e-05, "loss": 0.03381668, "step": 15338 }, { "epoch": 30.678, "grad_norm": 0.7763895988464355, "learning_rate": 2e-05, "loss": 0.02689964, "step": 15339 }, { "epoch": 30.68, "grad_norm": 1.3845587968826294, "learning_rate": 2e-05, "loss": 0.04089034, "step": 15340 }, { "epoch": 30.682, "grad_norm": 2.5463733673095703, "learning_rate": 2e-05, "loss": 0.0399016, "step": 15341 }, { "epoch": 30.684, "grad_norm": 1.0681096315383911, "learning_rate": 2e-05, "loss": 0.04198876, "step": 15342 }, { "epoch": 30.686, "grad_norm": 0.9461974501609802, "learning_rate": 2e-05, "loss": 0.04064506, "step": 15343 }, { "epoch": 30.688, "grad_norm": 1.4279195070266724, "learning_rate": 2e-05, "loss": 0.03445359, "step": 15344 }, { "epoch": 30.69, "grad_norm": 1.6010353565216064, "learning_rate": 2e-05, "loss": 0.04525471, "step": 15345 }, { "epoch": 30.692, "grad_norm": 2.028573513031006, "learning_rate": 2e-05, "loss": 0.03996722, "step": 15346 }, { "epoch": 30.694, "grad_norm": 1.1454328298568726, "learning_rate": 2e-05, "loss": 0.0536786, "step": 15347 }, { "epoch": 30.696, "grad_norm": 2.0636239051818848, "learning_rate": 2e-05, "loss": 0.05065024, "step": 15348 }, { "epoch": 30.698, "grad_norm": 2.575232744216919, "learning_rate": 2e-05, "loss": 0.05117173, "step": 15349 }, { "epoch": 30.7, "grad_norm": 1.1906006336212158, "learning_rate": 2e-05, "loss": 0.03360413, "step": 15350 }, { "epoch": 30.701999999999998, "grad_norm": 1.0897594690322876, "learning_rate": 2e-05, "loss": 0.05397581, "step": 15351 }, { "epoch": 30.704, "grad_norm": 1.240808367729187, "learning_rate": 2e-05, "loss": 0.05368846, "step": 15352 }, { "epoch": 30.706, "grad_norm": 1.674265742301941, "learning_rate": 2e-05, "loss": 0.04872287, "step": 15353 }, { "epoch": 30.708, "grad_norm": 2.8923308849334717, "learning_rate": 2e-05, "loss": 0.06465062, "step": 15354 }, { "epoch": 30.71, "grad_norm": 1.2651050090789795, "learning_rate": 2e-05, "loss": 0.07288038, "step": 15355 }, { "epoch": 30.712, "grad_norm": 1.17970871925354, "learning_rate": 2e-05, "loss": 0.03595836, "step": 15356 }, { "epoch": 30.714, "grad_norm": 1.038805365562439, "learning_rate": 2e-05, "loss": 0.04104574, "step": 15357 }, { "epoch": 30.716, "grad_norm": 1.9130254983901978, "learning_rate": 2e-05, "loss": 0.04657149, "step": 15358 }, { "epoch": 30.718, "grad_norm": 0.9137285947799683, "learning_rate": 2e-05, "loss": 0.04042149, "step": 15359 }, { "epoch": 30.72, "grad_norm": 1.1319931745529175, "learning_rate": 2e-05, "loss": 0.03976962, "step": 15360 }, { "epoch": 30.722, "grad_norm": 1.8142340183258057, "learning_rate": 2e-05, "loss": 0.05812642, "step": 15361 }, { "epoch": 30.724, "grad_norm": 1.204729676246643, "learning_rate": 2e-05, "loss": 0.04457648, "step": 15362 }, { "epoch": 30.726, "grad_norm": 1.1895710229873657, "learning_rate": 2e-05, "loss": 0.04511045, "step": 15363 }, { "epoch": 30.728, "grad_norm": 0.8947159051895142, "learning_rate": 2e-05, "loss": 0.03313739, "step": 15364 }, { "epoch": 30.73, "grad_norm": 1.6085420846939087, "learning_rate": 2e-05, "loss": 0.05404228, "step": 15365 }, { "epoch": 30.732, "grad_norm": 1.026764154434204, "learning_rate": 2e-05, "loss": 0.04004838, "step": 15366 }, { "epoch": 30.734, "grad_norm": 1.7096081972122192, "learning_rate": 2e-05, "loss": 0.05903807, "step": 15367 }, { "epoch": 30.736, "grad_norm": 1.1001003980636597, "learning_rate": 2e-05, "loss": 0.03341561, "step": 15368 }, { "epoch": 30.738, "grad_norm": 1.1454017162322998, "learning_rate": 2e-05, "loss": 0.03433079, "step": 15369 }, { "epoch": 30.74, "grad_norm": 1.2160550355911255, "learning_rate": 2e-05, "loss": 0.04120779, "step": 15370 }, { "epoch": 30.742, "grad_norm": 1.3594889640808105, "learning_rate": 2e-05, "loss": 0.03800708, "step": 15371 }, { "epoch": 30.744, "grad_norm": 1.3822988271713257, "learning_rate": 2e-05, "loss": 0.06002536, "step": 15372 }, { "epoch": 30.746, "grad_norm": 1.0744295120239258, "learning_rate": 2e-05, "loss": 0.04353554, "step": 15373 }, { "epoch": 30.748, "grad_norm": 1.837883710861206, "learning_rate": 2e-05, "loss": 0.04571882, "step": 15374 }, { "epoch": 30.75, "grad_norm": 1.0086209774017334, "learning_rate": 2e-05, "loss": 0.03041051, "step": 15375 }, { "epoch": 30.752, "grad_norm": 1.0633058547973633, "learning_rate": 2e-05, "loss": 0.05091667, "step": 15376 }, { "epoch": 30.754, "grad_norm": 1.0251227617263794, "learning_rate": 2e-05, "loss": 0.04383545, "step": 15377 }, { "epoch": 30.756, "grad_norm": 0.885025680065155, "learning_rate": 2e-05, "loss": 0.02776055, "step": 15378 }, { "epoch": 30.758, "grad_norm": 1.013938069343567, "learning_rate": 2e-05, "loss": 0.033874, "step": 15379 }, { "epoch": 30.76, "grad_norm": 1.0168402194976807, "learning_rate": 2e-05, "loss": 0.04567941, "step": 15380 }, { "epoch": 30.762, "grad_norm": 0.9793519973754883, "learning_rate": 2e-05, "loss": 0.03800535, "step": 15381 }, { "epoch": 30.764, "grad_norm": 0.9841427206993103, "learning_rate": 2e-05, "loss": 0.03535566, "step": 15382 }, { "epoch": 30.766, "grad_norm": 1.0492746829986572, "learning_rate": 2e-05, "loss": 0.04309023, "step": 15383 }, { "epoch": 30.768, "grad_norm": 1.056688904762268, "learning_rate": 2e-05, "loss": 0.03163824, "step": 15384 }, { "epoch": 30.77, "grad_norm": 1.2172820568084717, "learning_rate": 2e-05, "loss": 0.04012401, "step": 15385 }, { "epoch": 30.772, "grad_norm": 1.4256500005722046, "learning_rate": 2e-05, "loss": 0.06237305, "step": 15386 }, { "epoch": 30.774, "grad_norm": 0.8029309511184692, "learning_rate": 2e-05, "loss": 0.02714739, "step": 15387 }, { "epoch": 30.776, "grad_norm": 1.0328289270401, "learning_rate": 2e-05, "loss": 0.03093981, "step": 15388 }, { "epoch": 30.778, "grad_norm": 1.126611351966858, "learning_rate": 2e-05, "loss": 0.03905267, "step": 15389 }, { "epoch": 30.78, "grad_norm": 1.5406177043914795, "learning_rate": 2e-05, "loss": 0.06235263, "step": 15390 }, { "epoch": 30.782, "grad_norm": 2.3154942989349365, "learning_rate": 2e-05, "loss": 0.04638475, "step": 15391 }, { "epoch": 30.784, "grad_norm": 0.986137330532074, "learning_rate": 2e-05, "loss": 0.03957748, "step": 15392 }, { "epoch": 30.786, "grad_norm": 1.8473138809204102, "learning_rate": 2e-05, "loss": 0.05432895, "step": 15393 }, { "epoch": 30.788, "grad_norm": 1.6493146419525146, "learning_rate": 2e-05, "loss": 0.02780548, "step": 15394 }, { "epoch": 30.79, "grad_norm": 1.2216272354125977, "learning_rate": 2e-05, "loss": 0.05976984, "step": 15395 }, { "epoch": 30.792, "grad_norm": 5.199351787567139, "learning_rate": 2e-05, "loss": 0.03855762, "step": 15396 }, { "epoch": 30.794, "grad_norm": 1.044447898864746, "learning_rate": 2e-05, "loss": 0.03780142, "step": 15397 }, { "epoch": 30.796, "grad_norm": 1.2395384311676025, "learning_rate": 2e-05, "loss": 0.04773392, "step": 15398 }, { "epoch": 30.798000000000002, "grad_norm": 1.2598720788955688, "learning_rate": 2e-05, "loss": 0.04073113, "step": 15399 }, { "epoch": 30.8, "grad_norm": 2.0737268924713135, "learning_rate": 2e-05, "loss": 0.05465418, "step": 15400 }, { "epoch": 30.802, "grad_norm": 1.4324662685394287, "learning_rate": 2e-05, "loss": 0.05240825, "step": 15401 }, { "epoch": 30.804, "grad_norm": 1.2016414403915405, "learning_rate": 2e-05, "loss": 0.0517023, "step": 15402 }, { "epoch": 30.806, "grad_norm": 1.3755743503570557, "learning_rate": 2e-05, "loss": 0.03898351, "step": 15403 }, { "epoch": 30.808, "grad_norm": 0.9742904305458069, "learning_rate": 2e-05, "loss": 0.04295686, "step": 15404 }, { "epoch": 30.81, "grad_norm": 1.8665202856063843, "learning_rate": 2e-05, "loss": 0.06502125, "step": 15405 }, { "epoch": 30.812, "grad_norm": 0.8659533858299255, "learning_rate": 2e-05, "loss": 0.03056756, "step": 15406 }, { "epoch": 30.814, "grad_norm": 1.1280931234359741, "learning_rate": 2e-05, "loss": 0.05169143, "step": 15407 }, { "epoch": 30.816, "grad_norm": 4.226912021636963, "learning_rate": 2e-05, "loss": 0.06439896, "step": 15408 }, { "epoch": 30.818, "grad_norm": 1.3791491985321045, "learning_rate": 2e-05, "loss": 0.04455005, "step": 15409 }, { "epoch": 30.82, "grad_norm": 1.1163569688796997, "learning_rate": 2e-05, "loss": 0.04386018, "step": 15410 }, { "epoch": 30.822, "grad_norm": 2.1642649173736572, "learning_rate": 2e-05, "loss": 0.04944849, "step": 15411 }, { "epoch": 30.824, "grad_norm": 1.1367419958114624, "learning_rate": 2e-05, "loss": 0.04384803, "step": 15412 }, { "epoch": 30.826, "grad_norm": 2.95975661277771, "learning_rate": 2e-05, "loss": 0.0560007, "step": 15413 }, { "epoch": 30.828, "grad_norm": 2.2994563579559326, "learning_rate": 2e-05, "loss": 0.060587, "step": 15414 }, { "epoch": 30.83, "grad_norm": 1.1302201747894287, "learning_rate": 2e-05, "loss": 0.0459856, "step": 15415 }, { "epoch": 30.832, "grad_norm": 1.375911831855774, "learning_rate": 2e-05, "loss": 0.04081506, "step": 15416 }, { "epoch": 30.834, "grad_norm": 1.1415051221847534, "learning_rate": 2e-05, "loss": 0.04379798, "step": 15417 }, { "epoch": 30.836, "grad_norm": 1.0675766468048096, "learning_rate": 2e-05, "loss": 0.04674433, "step": 15418 }, { "epoch": 30.838, "grad_norm": 0.9754721522331238, "learning_rate": 2e-05, "loss": 0.04392415, "step": 15419 }, { "epoch": 30.84, "grad_norm": 0.8399175405502319, "learning_rate": 2e-05, "loss": 0.03567776, "step": 15420 }, { "epoch": 30.842, "grad_norm": 1.0285835266113281, "learning_rate": 2e-05, "loss": 0.03841371, "step": 15421 }, { "epoch": 30.844, "grad_norm": 0.9900828003883362, "learning_rate": 2e-05, "loss": 0.0342926, "step": 15422 }, { "epoch": 30.846, "grad_norm": 3.3070645332336426, "learning_rate": 2e-05, "loss": 0.0424522, "step": 15423 }, { "epoch": 30.848, "grad_norm": 1.0857558250427246, "learning_rate": 2e-05, "loss": 0.04042138, "step": 15424 }, { "epoch": 30.85, "grad_norm": 1.0208579301834106, "learning_rate": 2e-05, "loss": 0.04381087, "step": 15425 }, { "epoch": 30.852, "grad_norm": 1.3216910362243652, "learning_rate": 2e-05, "loss": 0.03878704, "step": 15426 }, { "epoch": 30.854, "grad_norm": 1.3146594762802124, "learning_rate": 2e-05, "loss": 0.05996003, "step": 15427 }, { "epoch": 30.856, "grad_norm": 1.3427999019622803, "learning_rate": 2e-05, "loss": 0.03887988, "step": 15428 }, { "epoch": 30.858, "grad_norm": 1.2174770832061768, "learning_rate": 2e-05, "loss": 0.0479669, "step": 15429 }, { "epoch": 30.86, "grad_norm": 1.0892000198364258, "learning_rate": 2e-05, "loss": 0.04858973, "step": 15430 }, { "epoch": 30.862, "grad_norm": 1.161887526512146, "learning_rate": 2e-05, "loss": 0.03980863, "step": 15431 }, { "epoch": 30.864, "grad_norm": 1.1405856609344482, "learning_rate": 2e-05, "loss": 0.05016105, "step": 15432 }, { "epoch": 30.866, "grad_norm": 2.1021804809570312, "learning_rate": 2e-05, "loss": 0.05378185, "step": 15433 }, { "epoch": 30.868, "grad_norm": 1.1455076932907104, "learning_rate": 2e-05, "loss": 0.04398486, "step": 15434 }, { "epoch": 30.87, "grad_norm": 1.7675573825836182, "learning_rate": 2e-05, "loss": 0.05465432, "step": 15435 }, { "epoch": 30.872, "grad_norm": 1.5450236797332764, "learning_rate": 2e-05, "loss": 0.0561939, "step": 15436 }, { "epoch": 30.874, "grad_norm": 1.4901841878890991, "learning_rate": 2e-05, "loss": 0.05664141, "step": 15437 }, { "epoch": 30.876, "grad_norm": 1.0419028997421265, "learning_rate": 2e-05, "loss": 0.03964368, "step": 15438 }, { "epoch": 30.878, "grad_norm": 1.2022950649261475, "learning_rate": 2e-05, "loss": 0.04822465, "step": 15439 }, { "epoch": 30.88, "grad_norm": 1.2207860946655273, "learning_rate": 2e-05, "loss": 0.05028158, "step": 15440 }, { "epoch": 30.882, "grad_norm": 1.423685908317566, "learning_rate": 2e-05, "loss": 0.07643472, "step": 15441 }, { "epoch": 30.884, "grad_norm": 1.1519205570220947, "learning_rate": 2e-05, "loss": 0.0304066, "step": 15442 }, { "epoch": 30.886, "grad_norm": 1.1526474952697754, "learning_rate": 2e-05, "loss": 0.04110112, "step": 15443 }, { "epoch": 30.888, "grad_norm": 3.1199638843536377, "learning_rate": 2e-05, "loss": 0.05501474, "step": 15444 }, { "epoch": 30.89, "grad_norm": 0.8520972728729248, "learning_rate": 2e-05, "loss": 0.02769852, "step": 15445 }, { "epoch": 30.892, "grad_norm": 1.225395917892456, "learning_rate": 2e-05, "loss": 0.06401318, "step": 15446 }, { "epoch": 30.894, "grad_norm": 0.8839061260223389, "learning_rate": 2e-05, "loss": 0.03327332, "step": 15447 }, { "epoch": 30.896, "grad_norm": 0.8835104703903198, "learning_rate": 2e-05, "loss": 0.04070164, "step": 15448 }, { "epoch": 30.898, "grad_norm": 1.6561013460159302, "learning_rate": 2e-05, "loss": 0.04128964, "step": 15449 }, { "epoch": 30.9, "grad_norm": 1.1424424648284912, "learning_rate": 2e-05, "loss": 0.05308126, "step": 15450 }, { "epoch": 30.902, "grad_norm": 1.0643770694732666, "learning_rate": 2e-05, "loss": 0.04621812, "step": 15451 }, { "epoch": 30.904, "grad_norm": 1.3661915063858032, "learning_rate": 2e-05, "loss": 0.03726829, "step": 15452 }, { "epoch": 30.906, "grad_norm": 1.5214005708694458, "learning_rate": 2e-05, "loss": 0.04527389, "step": 15453 }, { "epoch": 30.908, "grad_norm": 1.6379613876342773, "learning_rate": 2e-05, "loss": 0.04093814, "step": 15454 }, { "epoch": 30.91, "grad_norm": 1.3815046548843384, "learning_rate": 2e-05, "loss": 0.05547067, "step": 15455 }, { "epoch": 30.912, "grad_norm": 1.4670137166976929, "learning_rate": 2e-05, "loss": 0.05137615, "step": 15456 }, { "epoch": 30.914, "grad_norm": 1.4527828693389893, "learning_rate": 2e-05, "loss": 0.0435155, "step": 15457 }, { "epoch": 30.916, "grad_norm": 2.5486485958099365, "learning_rate": 2e-05, "loss": 0.04488666, "step": 15458 }, { "epoch": 30.918, "grad_norm": 0.9249729514122009, "learning_rate": 2e-05, "loss": 0.03851863, "step": 15459 }, { "epoch": 30.92, "grad_norm": 0.9515257477760315, "learning_rate": 2e-05, "loss": 0.02254659, "step": 15460 }, { "epoch": 30.922, "grad_norm": 1.2239837646484375, "learning_rate": 2e-05, "loss": 0.05874565, "step": 15461 }, { "epoch": 30.924, "grad_norm": 1.480686902999878, "learning_rate": 2e-05, "loss": 0.06617691, "step": 15462 }, { "epoch": 30.926, "grad_norm": 2.887449026107788, "learning_rate": 2e-05, "loss": 0.05954497, "step": 15463 }, { "epoch": 30.928, "grad_norm": 1.225870132446289, "learning_rate": 2e-05, "loss": 0.03695433, "step": 15464 }, { "epoch": 30.93, "grad_norm": 1.030002474784851, "learning_rate": 2e-05, "loss": 0.03813251, "step": 15465 }, { "epoch": 30.932, "grad_norm": 1.291985273361206, "learning_rate": 2e-05, "loss": 0.03995899, "step": 15466 }, { "epoch": 30.934, "grad_norm": 1.2552183866500854, "learning_rate": 2e-05, "loss": 0.05444008, "step": 15467 }, { "epoch": 30.936, "grad_norm": 1.4683557748794556, "learning_rate": 2e-05, "loss": 0.04387238, "step": 15468 }, { "epoch": 30.938, "grad_norm": 1.2251843214035034, "learning_rate": 2e-05, "loss": 0.03709442, "step": 15469 }, { "epoch": 30.94, "grad_norm": 1.1350517272949219, "learning_rate": 2e-05, "loss": 0.04515088, "step": 15470 }, { "epoch": 30.942, "grad_norm": 1.066111445426941, "learning_rate": 2e-05, "loss": 0.04165583, "step": 15471 }, { "epoch": 30.944, "grad_norm": 1.1714292764663696, "learning_rate": 2e-05, "loss": 0.0495844, "step": 15472 }, { "epoch": 30.946, "grad_norm": 1.5184941291809082, "learning_rate": 2e-05, "loss": 0.05075445, "step": 15473 }, { "epoch": 30.948, "grad_norm": 1.4089317321777344, "learning_rate": 2e-05, "loss": 0.04436463, "step": 15474 }, { "epoch": 30.95, "grad_norm": 1.071393370628357, "learning_rate": 2e-05, "loss": 0.04614248, "step": 15475 }, { "epoch": 30.951999999999998, "grad_norm": 0.8386540412902832, "learning_rate": 2e-05, "loss": 0.03466738, "step": 15476 }, { "epoch": 30.954, "grad_norm": 1.243491530418396, "learning_rate": 2e-05, "loss": 0.05237167, "step": 15477 }, { "epoch": 30.956, "grad_norm": 1.5387473106384277, "learning_rate": 2e-05, "loss": 0.04224633, "step": 15478 }, { "epoch": 30.958, "grad_norm": 0.9433426260948181, "learning_rate": 2e-05, "loss": 0.05036787, "step": 15479 }, { "epoch": 30.96, "grad_norm": 2.2181012630462646, "learning_rate": 2e-05, "loss": 0.05980574, "step": 15480 }, { "epoch": 30.962, "grad_norm": 1.1665034294128418, "learning_rate": 2e-05, "loss": 0.04869165, "step": 15481 }, { "epoch": 30.964, "grad_norm": 1.078522801399231, "learning_rate": 2e-05, "loss": 0.03750516, "step": 15482 }, { "epoch": 30.966, "grad_norm": 1.5242314338684082, "learning_rate": 2e-05, "loss": 0.05432172, "step": 15483 }, { "epoch": 30.968, "grad_norm": 0.9863197803497314, "learning_rate": 2e-05, "loss": 0.02705454, "step": 15484 }, { "epoch": 30.97, "grad_norm": 1.453996181488037, "learning_rate": 2e-05, "loss": 0.03672886, "step": 15485 }, { "epoch": 30.972, "grad_norm": 1.135066032409668, "learning_rate": 2e-05, "loss": 0.04941032, "step": 15486 }, { "epoch": 30.974, "grad_norm": 1.081764578819275, "learning_rate": 2e-05, "loss": 0.03595746, "step": 15487 }, { "epoch": 30.976, "grad_norm": 1.1244211196899414, "learning_rate": 2e-05, "loss": 0.05484168, "step": 15488 }, { "epoch": 30.978, "grad_norm": 1.1895266771316528, "learning_rate": 2e-05, "loss": 0.0467955, "step": 15489 }, { "epoch": 30.98, "grad_norm": 1.357893943786621, "learning_rate": 2e-05, "loss": 0.06969759, "step": 15490 }, { "epoch": 30.982, "grad_norm": 1.5200549364089966, "learning_rate": 2e-05, "loss": 0.03498986, "step": 15491 }, { "epoch": 30.984, "grad_norm": 1.2470046281814575, "learning_rate": 2e-05, "loss": 0.05784412, "step": 15492 }, { "epoch": 30.986, "grad_norm": 1.0575257539749146, "learning_rate": 2e-05, "loss": 0.03117427, "step": 15493 }, { "epoch": 30.988, "grad_norm": 0.8743823170661926, "learning_rate": 2e-05, "loss": 0.03969342, "step": 15494 }, { "epoch": 30.99, "grad_norm": 0.9914343953132629, "learning_rate": 2e-05, "loss": 0.03674193, "step": 15495 }, { "epoch": 30.992, "grad_norm": 1.0648630857467651, "learning_rate": 2e-05, "loss": 0.04313104, "step": 15496 }, { "epoch": 30.994, "grad_norm": 1.8476170301437378, "learning_rate": 2e-05, "loss": 0.05208312, "step": 15497 }, { "epoch": 30.996, "grad_norm": 1.7663272619247437, "learning_rate": 2e-05, "loss": 0.04593379, "step": 15498 }, { "epoch": 30.998, "grad_norm": 1.1099039316177368, "learning_rate": 2e-05, "loss": 0.0439904, "step": 15499 }, { "epoch": 31.0, "grad_norm": 0.9783741235733032, "learning_rate": 2e-05, "loss": 0.02808396, "step": 15500 }, { "epoch": 31.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.996, "Equal_2": 0.9720558882235529, "Equal_3": 0.9880239520958084, "LineComparison_1": 0.998, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.994, "Perpendicular_1": 0.992, "Perpendicular_2": 0.996, "Perpendicular_3": 0.8937875751503006, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9856666666666666, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 224.2291, "eval_samples_per_second": 46.827, "eval_steps_per_second": 0.937, "step": 15500 }, { "epoch": 31.002, "grad_norm": 1.0762115716934204, "learning_rate": 2e-05, "loss": 0.03254039, "step": 15501 }, { "epoch": 31.004, "grad_norm": 2.8445839881896973, "learning_rate": 2e-05, "loss": 0.03663643, "step": 15502 }, { "epoch": 31.006, "grad_norm": 1.1797196865081787, "learning_rate": 2e-05, "loss": 0.04105611, "step": 15503 }, { "epoch": 31.008, "grad_norm": 2.5839858055114746, "learning_rate": 2e-05, "loss": 0.05568786, "step": 15504 }, { "epoch": 31.01, "grad_norm": 1.1242340803146362, "learning_rate": 2e-05, "loss": 0.03620045, "step": 15505 }, { "epoch": 31.012, "grad_norm": 2.647772789001465, "learning_rate": 2e-05, "loss": 0.05580802, "step": 15506 }, { "epoch": 31.014, "grad_norm": 1.4541312456130981, "learning_rate": 2e-05, "loss": 0.06337918, "step": 15507 }, { "epoch": 31.016, "grad_norm": 1.065464973449707, "learning_rate": 2e-05, "loss": 0.05095109, "step": 15508 }, { "epoch": 31.018, "grad_norm": 1.1259825229644775, "learning_rate": 2e-05, "loss": 0.04452449, "step": 15509 }, { "epoch": 31.02, "grad_norm": 1.1820029020309448, "learning_rate": 2e-05, "loss": 0.05216357, "step": 15510 }, { "epoch": 31.022, "grad_norm": 1.2551478147506714, "learning_rate": 2e-05, "loss": 0.05309939, "step": 15511 }, { "epoch": 31.024, "grad_norm": 0.8424102663993835, "learning_rate": 2e-05, "loss": 0.02768048, "step": 15512 }, { "epoch": 31.026, "grad_norm": 1.2092243432998657, "learning_rate": 2e-05, "loss": 0.04730319, "step": 15513 }, { "epoch": 31.028, "grad_norm": 1.045640230178833, "learning_rate": 2e-05, "loss": 0.04275579, "step": 15514 }, { "epoch": 31.03, "grad_norm": 0.8907133936882019, "learning_rate": 2e-05, "loss": 0.03309635, "step": 15515 }, { "epoch": 31.032, "grad_norm": 1.1393848657608032, "learning_rate": 2e-05, "loss": 0.0534641, "step": 15516 }, { "epoch": 31.034, "grad_norm": 1.0165069103240967, "learning_rate": 2e-05, "loss": 0.04153319, "step": 15517 }, { "epoch": 31.036, "grad_norm": 1.3333238363265991, "learning_rate": 2e-05, "loss": 0.06853731, "step": 15518 }, { "epoch": 31.038, "grad_norm": 1.2803775072097778, "learning_rate": 2e-05, "loss": 0.04879669, "step": 15519 }, { "epoch": 31.04, "grad_norm": 1.2563199996948242, "learning_rate": 2e-05, "loss": 0.03920309, "step": 15520 }, { "epoch": 31.042, "grad_norm": 2.2977094650268555, "learning_rate": 2e-05, "loss": 0.04906676, "step": 15521 }, { "epoch": 31.044, "grad_norm": 1.0788346529006958, "learning_rate": 2e-05, "loss": 0.03881629, "step": 15522 }, { "epoch": 31.046, "grad_norm": 1.5086506605148315, "learning_rate": 2e-05, "loss": 0.06433197, "step": 15523 }, { "epoch": 31.048, "grad_norm": 1.64910888671875, "learning_rate": 2e-05, "loss": 0.03654952, "step": 15524 }, { "epoch": 31.05, "grad_norm": 1.0031471252441406, "learning_rate": 2e-05, "loss": 0.04537314, "step": 15525 }, { "epoch": 31.052, "grad_norm": 1.1695860624313354, "learning_rate": 2e-05, "loss": 0.05057376, "step": 15526 }, { "epoch": 31.054, "grad_norm": 0.9735826253890991, "learning_rate": 2e-05, "loss": 0.04925105, "step": 15527 }, { "epoch": 31.056, "grad_norm": 0.8226759433746338, "learning_rate": 2e-05, "loss": 0.02843454, "step": 15528 }, { "epoch": 31.058, "grad_norm": 1.4787867069244385, "learning_rate": 2e-05, "loss": 0.0424909, "step": 15529 }, { "epoch": 31.06, "grad_norm": 1.179825782775879, "learning_rate": 2e-05, "loss": 0.04075205, "step": 15530 }, { "epoch": 31.062, "grad_norm": 2.663902759552002, "learning_rate": 2e-05, "loss": 0.06308943, "step": 15531 }, { "epoch": 31.064, "grad_norm": 1.238585114479065, "learning_rate": 2e-05, "loss": 0.04971069, "step": 15532 }, { "epoch": 31.066, "grad_norm": 1.2162044048309326, "learning_rate": 2e-05, "loss": 0.04703705, "step": 15533 }, { "epoch": 31.068, "grad_norm": 0.973568856716156, "learning_rate": 2e-05, "loss": 0.0434709, "step": 15534 }, { "epoch": 31.07, "grad_norm": 2.285914897918701, "learning_rate": 2e-05, "loss": 0.05640218, "step": 15535 }, { "epoch": 31.072, "grad_norm": 1.309373378753662, "learning_rate": 2e-05, "loss": 0.04495179, "step": 15536 }, { "epoch": 31.074, "grad_norm": 1.138826847076416, "learning_rate": 2e-05, "loss": 0.04615146, "step": 15537 }, { "epoch": 31.076, "grad_norm": 2.1123273372650146, "learning_rate": 2e-05, "loss": 0.05584462, "step": 15538 }, { "epoch": 31.078, "grad_norm": 1.1084779500961304, "learning_rate": 2e-05, "loss": 0.04978365, "step": 15539 }, { "epoch": 31.08, "grad_norm": 1.300132393836975, "learning_rate": 2e-05, "loss": 0.04754419, "step": 15540 }, { "epoch": 31.082, "grad_norm": 1.2345386743545532, "learning_rate": 2e-05, "loss": 0.04915947, "step": 15541 }, { "epoch": 31.084, "grad_norm": 1.2463332414627075, "learning_rate": 2e-05, "loss": 0.05059737, "step": 15542 }, { "epoch": 31.086, "grad_norm": 1.5540850162506104, "learning_rate": 2e-05, "loss": 0.05068446, "step": 15543 }, { "epoch": 31.088, "grad_norm": 1.4838085174560547, "learning_rate": 2e-05, "loss": 0.05688158, "step": 15544 }, { "epoch": 31.09, "grad_norm": 1.6285920143127441, "learning_rate": 2e-05, "loss": 0.03914272, "step": 15545 }, { "epoch": 31.092, "grad_norm": 1.4394869804382324, "learning_rate": 2e-05, "loss": 0.03176517, "step": 15546 }, { "epoch": 31.094, "grad_norm": 1.0067249536514282, "learning_rate": 2e-05, "loss": 0.03047253, "step": 15547 }, { "epoch": 31.096, "grad_norm": 1.0721482038497925, "learning_rate": 2e-05, "loss": 0.04787499, "step": 15548 }, { "epoch": 31.098, "grad_norm": 1.5150259733200073, "learning_rate": 2e-05, "loss": 0.0412864, "step": 15549 }, { "epoch": 31.1, "grad_norm": 1.0313751697540283, "learning_rate": 2e-05, "loss": 0.03975821, "step": 15550 }, { "epoch": 31.102, "grad_norm": 0.9426699280738831, "learning_rate": 2e-05, "loss": 0.04514257, "step": 15551 }, { "epoch": 31.104, "grad_norm": 1.9875872135162354, "learning_rate": 2e-05, "loss": 0.03541224, "step": 15552 }, { "epoch": 31.106, "grad_norm": 1.380558967590332, "learning_rate": 2e-05, "loss": 0.04382839, "step": 15553 }, { "epoch": 31.108, "grad_norm": 1.3085435628890991, "learning_rate": 2e-05, "loss": 0.04363869, "step": 15554 }, { "epoch": 31.11, "grad_norm": 1.4467430114746094, "learning_rate": 2e-05, "loss": 0.05015381, "step": 15555 }, { "epoch": 31.112, "grad_norm": 1.6829397678375244, "learning_rate": 2e-05, "loss": 0.04383092, "step": 15556 }, { "epoch": 31.114, "grad_norm": 1.2976007461547852, "learning_rate": 2e-05, "loss": 0.04883436, "step": 15557 }, { "epoch": 31.116, "grad_norm": 1.2922505140304565, "learning_rate": 2e-05, "loss": 0.04448408, "step": 15558 }, { "epoch": 31.118, "grad_norm": 1.0699033737182617, "learning_rate": 2e-05, "loss": 0.04153567, "step": 15559 }, { "epoch": 31.12, "grad_norm": 1.8308688402175903, "learning_rate": 2e-05, "loss": 0.05646932, "step": 15560 }, { "epoch": 31.122, "grad_norm": 1.0922354459762573, "learning_rate": 2e-05, "loss": 0.04574072, "step": 15561 }, { "epoch": 31.124, "grad_norm": 1.048677921295166, "learning_rate": 2e-05, "loss": 0.04131946, "step": 15562 }, { "epoch": 31.126, "grad_norm": 1.1248000860214233, "learning_rate": 2e-05, "loss": 0.04250053, "step": 15563 }, { "epoch": 31.128, "grad_norm": 0.7807497382164001, "learning_rate": 2e-05, "loss": 0.0262814, "step": 15564 }, { "epoch": 31.13, "grad_norm": 1.7389615774154663, "learning_rate": 2e-05, "loss": 0.03884977, "step": 15565 }, { "epoch": 31.132, "grad_norm": 1.052390694618225, "learning_rate": 2e-05, "loss": 0.0327931, "step": 15566 }, { "epoch": 31.134, "grad_norm": 1.119512677192688, "learning_rate": 2e-05, "loss": 0.03777361, "step": 15567 }, { "epoch": 31.136, "grad_norm": 1.1454516649246216, "learning_rate": 2e-05, "loss": 0.03934929, "step": 15568 }, { "epoch": 31.138, "grad_norm": 1.2114750146865845, "learning_rate": 2e-05, "loss": 0.04091373, "step": 15569 }, { "epoch": 31.14, "grad_norm": 1.0785170793533325, "learning_rate": 2e-05, "loss": 0.04788768, "step": 15570 }, { "epoch": 31.142, "grad_norm": 0.7798777222633362, "learning_rate": 2e-05, "loss": 0.03024499, "step": 15571 }, { "epoch": 31.144, "grad_norm": 1.8262717723846436, "learning_rate": 2e-05, "loss": 0.05653293, "step": 15572 }, { "epoch": 31.146, "grad_norm": 1.4031890630722046, "learning_rate": 2e-05, "loss": 0.0470552, "step": 15573 }, { "epoch": 31.148, "grad_norm": 2.539111614227295, "learning_rate": 2e-05, "loss": 0.04696056, "step": 15574 }, { "epoch": 31.15, "grad_norm": 1.2325671911239624, "learning_rate": 2e-05, "loss": 0.05807349, "step": 15575 }, { "epoch": 31.152, "grad_norm": 1.2042980194091797, "learning_rate": 2e-05, "loss": 0.03721279, "step": 15576 }, { "epoch": 31.154, "grad_norm": 1.0200237035751343, "learning_rate": 2e-05, "loss": 0.03576282, "step": 15577 }, { "epoch": 31.156, "grad_norm": 1.317344069480896, "learning_rate": 2e-05, "loss": 0.05826048, "step": 15578 }, { "epoch": 31.158, "grad_norm": 0.9854938387870789, "learning_rate": 2e-05, "loss": 0.03677797, "step": 15579 }, { "epoch": 31.16, "grad_norm": 1.3857543468475342, "learning_rate": 2e-05, "loss": 0.04401205, "step": 15580 }, { "epoch": 31.162, "grad_norm": 1.2355326414108276, "learning_rate": 2e-05, "loss": 0.04083851, "step": 15581 }, { "epoch": 31.164, "grad_norm": 1.4008420705795288, "learning_rate": 2e-05, "loss": 0.04319207, "step": 15582 }, { "epoch": 31.166, "grad_norm": 1.4046244621276855, "learning_rate": 2e-05, "loss": 0.03883013, "step": 15583 }, { "epoch": 31.168, "grad_norm": 1.0645074844360352, "learning_rate": 2e-05, "loss": 0.03111164, "step": 15584 }, { "epoch": 31.17, "grad_norm": 1.2474641799926758, "learning_rate": 2e-05, "loss": 0.04996317, "step": 15585 }, { "epoch": 31.172, "grad_norm": 1.528091311454773, "learning_rate": 2e-05, "loss": 0.04764867, "step": 15586 }, { "epoch": 31.174, "grad_norm": 0.9713089466094971, "learning_rate": 2e-05, "loss": 0.03092572, "step": 15587 }, { "epoch": 31.176, "grad_norm": 1.388753890991211, "learning_rate": 2e-05, "loss": 0.05143832, "step": 15588 }, { "epoch": 31.178, "grad_norm": 1.024638295173645, "learning_rate": 2e-05, "loss": 0.03624693, "step": 15589 }, { "epoch": 31.18, "grad_norm": 1.2838521003723145, "learning_rate": 2e-05, "loss": 0.05071371, "step": 15590 }, { "epoch": 31.182, "grad_norm": 1.2331929206848145, "learning_rate": 2e-05, "loss": 0.03981777, "step": 15591 }, { "epoch": 31.184, "grad_norm": 1.2013381719589233, "learning_rate": 2e-05, "loss": 0.03994104, "step": 15592 }, { "epoch": 31.186, "grad_norm": 1.1696420907974243, "learning_rate": 2e-05, "loss": 0.04622369, "step": 15593 }, { "epoch": 31.188, "grad_norm": 1.2114421129226685, "learning_rate": 2e-05, "loss": 0.04737671, "step": 15594 }, { "epoch": 31.19, "grad_norm": 3.1840028762817383, "learning_rate": 2e-05, "loss": 0.05424745, "step": 15595 }, { "epoch": 31.192, "grad_norm": 0.9180657267570496, "learning_rate": 2e-05, "loss": 0.03105077, "step": 15596 }, { "epoch": 31.194, "grad_norm": 2.774507999420166, "learning_rate": 2e-05, "loss": 0.05922185, "step": 15597 }, { "epoch": 31.196, "grad_norm": 1.3324391841888428, "learning_rate": 2e-05, "loss": 0.07509143, "step": 15598 }, { "epoch": 31.198, "grad_norm": 1.4697195291519165, "learning_rate": 2e-05, "loss": 0.0475155, "step": 15599 }, { "epoch": 31.2, "grad_norm": 1.262103796005249, "learning_rate": 2e-05, "loss": 0.0528318, "step": 15600 }, { "epoch": 31.202, "grad_norm": 1.2947865724563599, "learning_rate": 2e-05, "loss": 0.05472211, "step": 15601 }, { "epoch": 31.204, "grad_norm": 1.3033138513565063, "learning_rate": 2e-05, "loss": 0.04186102, "step": 15602 }, { "epoch": 31.206, "grad_norm": 1.0310145616531372, "learning_rate": 2e-05, "loss": 0.03951973, "step": 15603 }, { "epoch": 31.208, "grad_norm": 1.1158863306045532, "learning_rate": 2e-05, "loss": 0.05366652, "step": 15604 }, { "epoch": 31.21, "grad_norm": 2.2124950885772705, "learning_rate": 2e-05, "loss": 0.04289714, "step": 15605 }, { "epoch": 31.212, "grad_norm": 1.0827949047088623, "learning_rate": 2e-05, "loss": 0.03616601, "step": 15606 }, { "epoch": 31.214, "grad_norm": 0.9997469186782837, "learning_rate": 2e-05, "loss": 0.02913173, "step": 15607 }, { "epoch": 31.216, "grad_norm": 1.7722761631011963, "learning_rate": 2e-05, "loss": 0.03852941, "step": 15608 }, { "epoch": 31.218, "grad_norm": 1.3906171321868896, "learning_rate": 2e-05, "loss": 0.02763237, "step": 15609 }, { "epoch": 31.22, "grad_norm": 1.0609713792800903, "learning_rate": 2e-05, "loss": 0.02879823, "step": 15610 }, { "epoch": 31.222, "grad_norm": 1.0868626832962036, "learning_rate": 2e-05, "loss": 0.05553667, "step": 15611 }, { "epoch": 31.224, "grad_norm": 1.1028218269348145, "learning_rate": 2e-05, "loss": 0.04911265, "step": 15612 }, { "epoch": 31.226, "grad_norm": 4.763792991638184, "learning_rate": 2e-05, "loss": 0.05123036, "step": 15613 }, { "epoch": 31.228, "grad_norm": 3.5426268577575684, "learning_rate": 2e-05, "loss": 0.05969984, "step": 15614 }, { "epoch": 31.23, "grad_norm": 1.249315857887268, "learning_rate": 2e-05, "loss": 0.05426806, "step": 15615 }, { "epoch": 31.232, "grad_norm": 1.3891571760177612, "learning_rate": 2e-05, "loss": 0.04167043, "step": 15616 }, { "epoch": 31.234, "grad_norm": 1.2604353427886963, "learning_rate": 2e-05, "loss": 0.04896831, "step": 15617 }, { "epoch": 31.236, "grad_norm": 1.9731359481811523, "learning_rate": 2e-05, "loss": 0.0455533, "step": 15618 }, { "epoch": 31.238, "grad_norm": 1.475806474685669, "learning_rate": 2e-05, "loss": 0.03637319, "step": 15619 }, { "epoch": 31.24, "grad_norm": 1.0874478816986084, "learning_rate": 2e-05, "loss": 0.04791122, "step": 15620 }, { "epoch": 31.242, "grad_norm": 1.6708861589431763, "learning_rate": 2e-05, "loss": 0.06172489, "step": 15621 }, { "epoch": 31.244, "grad_norm": 1.0884424448013306, "learning_rate": 2e-05, "loss": 0.04378254, "step": 15622 }, { "epoch": 31.246, "grad_norm": 0.9615774154663086, "learning_rate": 2e-05, "loss": 0.04025812, "step": 15623 }, { "epoch": 31.248, "grad_norm": 1.8384637832641602, "learning_rate": 2e-05, "loss": 0.04432861, "step": 15624 }, { "epoch": 31.25, "grad_norm": 0.9626439809799194, "learning_rate": 2e-05, "loss": 0.03138332, "step": 15625 }, { "epoch": 31.252, "grad_norm": 1.9574202299118042, "learning_rate": 2e-05, "loss": 0.05803779, "step": 15626 }, { "epoch": 31.254, "grad_norm": 1.1588150262832642, "learning_rate": 2e-05, "loss": 0.04179052, "step": 15627 }, { "epoch": 31.256, "grad_norm": 1.0624265670776367, "learning_rate": 2e-05, "loss": 0.04120559, "step": 15628 }, { "epoch": 31.258, "grad_norm": 0.9347062706947327, "learning_rate": 2e-05, "loss": 0.04014846, "step": 15629 }, { "epoch": 31.26, "grad_norm": 1.1207269430160522, "learning_rate": 2e-05, "loss": 0.04577953, "step": 15630 }, { "epoch": 31.262, "grad_norm": 0.9788662791252136, "learning_rate": 2e-05, "loss": 0.03571104, "step": 15631 }, { "epoch": 31.264, "grad_norm": 1.2824745178222656, "learning_rate": 2e-05, "loss": 0.06243353, "step": 15632 }, { "epoch": 31.266, "grad_norm": 2.083545684814453, "learning_rate": 2e-05, "loss": 0.05412727, "step": 15633 }, { "epoch": 31.268, "grad_norm": 1.137243390083313, "learning_rate": 2e-05, "loss": 0.03898332, "step": 15634 }, { "epoch": 31.27, "grad_norm": 1.689199686050415, "learning_rate": 2e-05, "loss": 0.05919106, "step": 15635 }, { "epoch": 31.272, "grad_norm": 1.2659372091293335, "learning_rate": 2e-05, "loss": 0.04632677, "step": 15636 }, { "epoch": 31.274, "grad_norm": 1.112552285194397, "learning_rate": 2e-05, "loss": 0.04632806, "step": 15637 }, { "epoch": 31.276, "grad_norm": 1.6927549839019775, "learning_rate": 2e-05, "loss": 0.04679632, "step": 15638 }, { "epoch": 31.278, "grad_norm": 1.111116647720337, "learning_rate": 2e-05, "loss": 0.05047597, "step": 15639 }, { "epoch": 31.28, "grad_norm": 1.375753402709961, "learning_rate": 2e-05, "loss": 0.03683254, "step": 15640 }, { "epoch": 31.282, "grad_norm": 1.7661595344543457, "learning_rate": 2e-05, "loss": 0.03786363, "step": 15641 }, { "epoch": 31.284, "grad_norm": 0.8163806796073914, "learning_rate": 2e-05, "loss": 0.02566732, "step": 15642 }, { "epoch": 31.286, "grad_norm": 1.0341118574142456, "learning_rate": 2e-05, "loss": 0.03548244, "step": 15643 }, { "epoch": 31.288, "grad_norm": 1.100555419921875, "learning_rate": 2e-05, "loss": 0.04913027, "step": 15644 }, { "epoch": 31.29, "grad_norm": 1.5567846298217773, "learning_rate": 2e-05, "loss": 0.05019675, "step": 15645 }, { "epoch": 31.292, "grad_norm": 1.3390871286392212, "learning_rate": 2e-05, "loss": 0.03626147, "step": 15646 }, { "epoch": 31.294, "grad_norm": 1.2818732261657715, "learning_rate": 2e-05, "loss": 0.05738042, "step": 15647 }, { "epoch": 31.296, "grad_norm": 1.6103893518447876, "learning_rate": 2e-05, "loss": 0.05835925, "step": 15648 }, { "epoch": 31.298, "grad_norm": 1.4005788564682007, "learning_rate": 2e-05, "loss": 0.05155373, "step": 15649 }, { "epoch": 31.3, "grad_norm": 1.74521005153656, "learning_rate": 2e-05, "loss": 0.05141186, "step": 15650 }, { "epoch": 31.302, "grad_norm": 6.119893550872803, "learning_rate": 2e-05, "loss": 0.04554497, "step": 15651 }, { "epoch": 31.304, "grad_norm": 0.8908125758171082, "learning_rate": 2e-05, "loss": 0.0313488, "step": 15652 }, { "epoch": 31.306, "grad_norm": 2.1842198371887207, "learning_rate": 2e-05, "loss": 0.04460478, "step": 15653 }, { "epoch": 31.308, "grad_norm": 1.1753830909729004, "learning_rate": 2e-05, "loss": 0.0378788, "step": 15654 }, { "epoch": 31.31, "grad_norm": 0.8630046248435974, "learning_rate": 2e-05, "loss": 0.03361127, "step": 15655 }, { "epoch": 31.312, "grad_norm": 1.6762036085128784, "learning_rate": 2e-05, "loss": 0.04723896, "step": 15656 }, { "epoch": 31.314, "grad_norm": 1.1063750982284546, "learning_rate": 2e-05, "loss": 0.04719393, "step": 15657 }, { "epoch": 31.316, "grad_norm": 1.4814600944519043, "learning_rate": 2e-05, "loss": 0.04800023, "step": 15658 }, { "epoch": 31.318, "grad_norm": 1.3200794458389282, "learning_rate": 2e-05, "loss": 0.04815413, "step": 15659 }, { "epoch": 31.32, "grad_norm": 1.0114225149154663, "learning_rate": 2e-05, "loss": 0.03497773, "step": 15660 }, { "epoch": 31.322, "grad_norm": 1.0739425420761108, "learning_rate": 2e-05, "loss": 0.04006201, "step": 15661 }, { "epoch": 31.324, "grad_norm": 1.0473504066467285, "learning_rate": 2e-05, "loss": 0.04756029, "step": 15662 }, { "epoch": 31.326, "grad_norm": 1.3113157749176025, "learning_rate": 2e-05, "loss": 0.0442853, "step": 15663 }, { "epoch": 31.328, "grad_norm": 1.1747963428497314, "learning_rate": 2e-05, "loss": 0.05372734, "step": 15664 }, { "epoch": 31.33, "grad_norm": 0.8496965765953064, "learning_rate": 2e-05, "loss": 0.04116274, "step": 15665 }, { "epoch": 31.332, "grad_norm": 1.305624008178711, "learning_rate": 2e-05, "loss": 0.03603145, "step": 15666 }, { "epoch": 31.334, "grad_norm": 0.9579513669013977, "learning_rate": 2e-05, "loss": 0.03609766, "step": 15667 }, { "epoch": 31.336, "grad_norm": 1.0316411256790161, "learning_rate": 2e-05, "loss": 0.04552227, "step": 15668 }, { "epoch": 31.338, "grad_norm": 1.0851950645446777, "learning_rate": 2e-05, "loss": 0.04704334, "step": 15669 }, { "epoch": 31.34, "grad_norm": 1.6728236675262451, "learning_rate": 2e-05, "loss": 0.05019729, "step": 15670 }, { "epoch": 31.342, "grad_norm": 1.9348280429840088, "learning_rate": 2e-05, "loss": 0.06004303, "step": 15671 }, { "epoch": 31.344, "grad_norm": 1.61855149269104, "learning_rate": 2e-05, "loss": 0.08058114, "step": 15672 }, { "epoch": 31.346, "grad_norm": 1.2438894510269165, "learning_rate": 2e-05, "loss": 0.04178282, "step": 15673 }, { "epoch": 31.348, "grad_norm": 1.5808088779449463, "learning_rate": 2e-05, "loss": 0.03818889, "step": 15674 }, { "epoch": 31.35, "grad_norm": 1.6250077486038208, "learning_rate": 2e-05, "loss": 0.06269492, "step": 15675 }, { "epoch": 31.352, "grad_norm": 1.0409210920333862, "learning_rate": 2e-05, "loss": 0.04536034, "step": 15676 }, { "epoch": 31.354, "grad_norm": 1.7033014297485352, "learning_rate": 2e-05, "loss": 0.0674368, "step": 15677 }, { "epoch": 31.356, "grad_norm": 1.3224788904190063, "learning_rate": 2e-05, "loss": 0.06046909, "step": 15678 }, { "epoch": 31.358, "grad_norm": 1.0691778659820557, "learning_rate": 2e-05, "loss": 0.03665531, "step": 15679 }, { "epoch": 31.36, "grad_norm": 1.0010557174682617, "learning_rate": 2e-05, "loss": 0.05547294, "step": 15680 }, { "epoch": 31.362, "grad_norm": 1.0064655542373657, "learning_rate": 2e-05, "loss": 0.03364509, "step": 15681 }, { "epoch": 31.364, "grad_norm": 1.3175302743911743, "learning_rate": 2e-05, "loss": 0.03027017, "step": 15682 }, { "epoch": 31.366, "grad_norm": 1.2213633060455322, "learning_rate": 2e-05, "loss": 0.03984521, "step": 15683 }, { "epoch": 31.368, "grad_norm": 1.5415242910385132, "learning_rate": 2e-05, "loss": 0.04748254, "step": 15684 }, { "epoch": 31.37, "grad_norm": 1.40778648853302, "learning_rate": 2e-05, "loss": 0.04686006, "step": 15685 }, { "epoch": 31.372, "grad_norm": 1.3736506700515747, "learning_rate": 2e-05, "loss": 0.04237969, "step": 15686 }, { "epoch": 31.374, "grad_norm": 1.25831139087677, "learning_rate": 2e-05, "loss": 0.04642356, "step": 15687 }, { "epoch": 31.376, "grad_norm": 0.9877125024795532, "learning_rate": 2e-05, "loss": 0.02985021, "step": 15688 }, { "epoch": 31.378, "grad_norm": 1.063590407371521, "learning_rate": 2e-05, "loss": 0.04104499, "step": 15689 }, { "epoch": 31.38, "grad_norm": 1.0465511083602905, "learning_rate": 2e-05, "loss": 0.04367655, "step": 15690 }, { "epoch": 31.382, "grad_norm": 3.631286859512329, "learning_rate": 2e-05, "loss": 0.0619649, "step": 15691 }, { "epoch": 31.384, "grad_norm": 0.9848971962928772, "learning_rate": 2e-05, "loss": 0.03327, "step": 15692 }, { "epoch": 31.386, "grad_norm": 0.9652875661849976, "learning_rate": 2e-05, "loss": 0.0263647, "step": 15693 }, { "epoch": 31.388, "grad_norm": 3.6076254844665527, "learning_rate": 2e-05, "loss": 0.05036663, "step": 15694 }, { "epoch": 31.39, "grad_norm": 1.5262267589569092, "learning_rate": 2e-05, "loss": 0.04752599, "step": 15695 }, { "epoch": 31.392, "grad_norm": 1.4341087341308594, "learning_rate": 2e-05, "loss": 0.03729438, "step": 15696 }, { "epoch": 31.394, "grad_norm": 2.271669626235962, "learning_rate": 2e-05, "loss": 0.05606764, "step": 15697 }, { "epoch": 31.396, "grad_norm": 3.866393804550171, "learning_rate": 2e-05, "loss": 0.0523802, "step": 15698 }, { "epoch": 31.398, "grad_norm": 4.744752883911133, "learning_rate": 2e-05, "loss": 0.05100797, "step": 15699 }, { "epoch": 31.4, "grad_norm": 1.4482446908950806, "learning_rate": 2e-05, "loss": 0.03665149, "step": 15700 }, { "epoch": 31.402, "grad_norm": 1.279300332069397, "learning_rate": 2e-05, "loss": 0.07632536, "step": 15701 }, { "epoch": 31.404, "grad_norm": 1.055903434753418, "learning_rate": 2e-05, "loss": 0.03435541, "step": 15702 }, { "epoch": 31.406, "grad_norm": 1.2498873472213745, "learning_rate": 2e-05, "loss": 0.04013643, "step": 15703 }, { "epoch": 31.408, "grad_norm": 0.9528058767318726, "learning_rate": 2e-05, "loss": 0.03516664, "step": 15704 }, { "epoch": 31.41, "grad_norm": 1.1201534271240234, "learning_rate": 2e-05, "loss": 0.04960186, "step": 15705 }, { "epoch": 31.412, "grad_norm": 2.661227226257324, "learning_rate": 2e-05, "loss": 0.06705199, "step": 15706 }, { "epoch": 31.414, "grad_norm": 1.3577604293823242, "learning_rate": 2e-05, "loss": 0.04827485, "step": 15707 }, { "epoch": 31.416, "grad_norm": 2.0779330730438232, "learning_rate": 2e-05, "loss": 0.05919573, "step": 15708 }, { "epoch": 31.418, "grad_norm": 1.1076674461364746, "learning_rate": 2e-05, "loss": 0.04013889, "step": 15709 }, { "epoch": 31.42, "grad_norm": 2.1122114658355713, "learning_rate": 2e-05, "loss": 0.0394566, "step": 15710 }, { "epoch": 31.422, "grad_norm": 1.6121785640716553, "learning_rate": 2e-05, "loss": 0.05721608, "step": 15711 }, { "epoch": 31.424, "grad_norm": 1.2673602104187012, "learning_rate": 2e-05, "loss": 0.05748147, "step": 15712 }, { "epoch": 31.426, "grad_norm": 1.0048468112945557, "learning_rate": 2e-05, "loss": 0.04387796, "step": 15713 }, { "epoch": 31.428, "grad_norm": 1.6549828052520752, "learning_rate": 2e-05, "loss": 0.0308019, "step": 15714 }, { "epoch": 31.43, "grad_norm": 1.0368094444274902, "learning_rate": 2e-05, "loss": 0.05190976, "step": 15715 }, { "epoch": 31.432, "grad_norm": 1.4638230800628662, "learning_rate": 2e-05, "loss": 0.04086473, "step": 15716 }, { "epoch": 31.434, "grad_norm": 1.05438232421875, "learning_rate": 2e-05, "loss": 0.05348888, "step": 15717 }, { "epoch": 31.436, "grad_norm": 1.2306461334228516, "learning_rate": 2e-05, "loss": 0.04790132, "step": 15718 }, { "epoch": 31.438, "grad_norm": 1.2164409160614014, "learning_rate": 2e-05, "loss": 0.05819508, "step": 15719 }, { "epoch": 31.44, "grad_norm": 1.0996991395950317, "learning_rate": 2e-05, "loss": 0.03779875, "step": 15720 }, { "epoch": 31.442, "grad_norm": 1.0024462938308716, "learning_rate": 2e-05, "loss": 0.03726301, "step": 15721 }, { "epoch": 31.444, "grad_norm": 1.3350788354873657, "learning_rate": 2e-05, "loss": 0.04758968, "step": 15722 }, { "epoch": 31.446, "grad_norm": 1.1430721282958984, "learning_rate": 2e-05, "loss": 0.04645448, "step": 15723 }, { "epoch": 31.448, "grad_norm": 0.8742209076881409, "learning_rate": 2e-05, "loss": 0.03666718, "step": 15724 }, { "epoch": 31.45, "grad_norm": 2.4872255325317383, "learning_rate": 2e-05, "loss": 0.05186316, "step": 15725 }, { "epoch": 31.452, "grad_norm": 0.9442282319068909, "learning_rate": 2e-05, "loss": 0.03467327, "step": 15726 }, { "epoch": 31.454, "grad_norm": 1.7800120115280151, "learning_rate": 2e-05, "loss": 0.03841582, "step": 15727 }, { "epoch": 31.456, "grad_norm": 0.9323883056640625, "learning_rate": 2e-05, "loss": 0.03322595, "step": 15728 }, { "epoch": 31.458, "grad_norm": 1.1979058980941772, "learning_rate": 2e-05, "loss": 0.04481182, "step": 15729 }, { "epoch": 31.46, "grad_norm": 1.4935935735702515, "learning_rate": 2e-05, "loss": 0.05089278, "step": 15730 }, { "epoch": 31.462, "grad_norm": 1.2047449350357056, "learning_rate": 2e-05, "loss": 0.04731514, "step": 15731 }, { "epoch": 31.464, "grad_norm": 0.9115632772445679, "learning_rate": 2e-05, "loss": 0.03296842, "step": 15732 }, { "epoch": 31.466, "grad_norm": 1.1322036981582642, "learning_rate": 2e-05, "loss": 0.03154603, "step": 15733 }, { "epoch": 31.468, "grad_norm": 1.1309213638305664, "learning_rate": 2e-05, "loss": 0.04606347, "step": 15734 }, { "epoch": 31.47, "grad_norm": 1.014689326286316, "learning_rate": 2e-05, "loss": 0.03607409, "step": 15735 }, { "epoch": 31.472, "grad_norm": 1.2394776344299316, "learning_rate": 2e-05, "loss": 0.05316552, "step": 15736 }, { "epoch": 31.474, "grad_norm": 1.5289125442504883, "learning_rate": 2e-05, "loss": 0.07152525, "step": 15737 }, { "epoch": 31.476, "grad_norm": 0.9883049130439758, "learning_rate": 2e-05, "loss": 0.03511796, "step": 15738 }, { "epoch": 31.478, "grad_norm": 1.1610225439071655, "learning_rate": 2e-05, "loss": 0.03893195, "step": 15739 }, { "epoch": 31.48, "grad_norm": 1.6628162860870361, "learning_rate": 2e-05, "loss": 0.050082, "step": 15740 }, { "epoch": 31.482, "grad_norm": 1.4675723314285278, "learning_rate": 2e-05, "loss": 0.05357666, "step": 15741 }, { "epoch": 31.484, "grad_norm": 1.352639079093933, "learning_rate": 2e-05, "loss": 0.03547839, "step": 15742 }, { "epoch": 31.486, "grad_norm": 1.1179454326629639, "learning_rate": 2e-05, "loss": 0.03814749, "step": 15743 }, { "epoch": 31.488, "grad_norm": 0.9795663356781006, "learning_rate": 2e-05, "loss": 0.03900216, "step": 15744 }, { "epoch": 31.49, "grad_norm": 1.5570218563079834, "learning_rate": 2e-05, "loss": 0.04240408, "step": 15745 }, { "epoch": 31.492, "grad_norm": 2.1028215885162354, "learning_rate": 2e-05, "loss": 0.04598399, "step": 15746 }, { "epoch": 31.494, "grad_norm": 1.1730042695999146, "learning_rate": 2e-05, "loss": 0.05494734, "step": 15747 }, { "epoch": 31.496, "grad_norm": 1.098713755607605, "learning_rate": 2e-05, "loss": 0.05006399, "step": 15748 }, { "epoch": 31.498, "grad_norm": 1.178218960762024, "learning_rate": 2e-05, "loss": 0.04227079, "step": 15749 }, { "epoch": 31.5, "grad_norm": 2.585055351257324, "learning_rate": 2e-05, "loss": 0.04975813, "step": 15750 }, { "epoch": 31.502, "grad_norm": 2.266711473464966, "learning_rate": 2e-05, "loss": 0.05157644, "step": 15751 }, { "epoch": 31.504, "grad_norm": 1.5033444166183472, "learning_rate": 2e-05, "loss": 0.0578462, "step": 15752 }, { "epoch": 31.506, "grad_norm": 1.6901814937591553, "learning_rate": 2e-05, "loss": 0.04620863, "step": 15753 }, { "epoch": 31.508, "grad_norm": 0.8861660957336426, "learning_rate": 2e-05, "loss": 0.02776463, "step": 15754 }, { "epoch": 31.51, "grad_norm": 1.4894425868988037, "learning_rate": 2e-05, "loss": 0.05743075, "step": 15755 }, { "epoch": 31.512, "grad_norm": 1.54172682762146, "learning_rate": 2e-05, "loss": 0.05731036, "step": 15756 }, { "epoch": 31.514, "grad_norm": 3.299351930618286, "learning_rate": 2e-05, "loss": 0.03340764, "step": 15757 }, { "epoch": 31.516, "grad_norm": 1.767289638519287, "learning_rate": 2e-05, "loss": 0.05479082, "step": 15758 }, { "epoch": 31.518, "grad_norm": 1.1137621402740479, "learning_rate": 2e-05, "loss": 0.04651989, "step": 15759 }, { "epoch": 31.52, "grad_norm": 4.403497695922852, "learning_rate": 2e-05, "loss": 0.05484784, "step": 15760 }, { "epoch": 31.522, "grad_norm": 0.9935469627380371, "learning_rate": 2e-05, "loss": 0.02681471, "step": 15761 }, { "epoch": 31.524, "grad_norm": 1.239449143409729, "learning_rate": 2e-05, "loss": 0.04478639, "step": 15762 }, { "epoch": 31.526, "grad_norm": 1.0836318731307983, "learning_rate": 2e-05, "loss": 0.04632657, "step": 15763 }, { "epoch": 31.528, "grad_norm": 1.127844214439392, "learning_rate": 2e-05, "loss": 0.05195791, "step": 15764 }, { "epoch": 31.53, "grad_norm": 0.9752721190452576, "learning_rate": 2e-05, "loss": 0.04764936, "step": 15765 }, { "epoch": 31.532, "grad_norm": 1.4673633575439453, "learning_rate": 2e-05, "loss": 0.04626267, "step": 15766 }, { "epoch": 31.534, "grad_norm": 1.931850552558899, "learning_rate": 2e-05, "loss": 0.06421394, "step": 15767 }, { "epoch": 31.536, "grad_norm": 1.1329762935638428, "learning_rate": 2e-05, "loss": 0.0434003, "step": 15768 }, { "epoch": 31.538, "grad_norm": 1.0417413711547852, "learning_rate": 2e-05, "loss": 0.03507046, "step": 15769 }, { "epoch": 31.54, "grad_norm": 1.4948378801345825, "learning_rate": 2e-05, "loss": 0.04815332, "step": 15770 }, { "epoch": 31.542, "grad_norm": 2.6032447814941406, "learning_rate": 2e-05, "loss": 0.04236407, "step": 15771 }, { "epoch": 31.544, "grad_norm": 1.0077167749404907, "learning_rate": 2e-05, "loss": 0.03445692, "step": 15772 }, { "epoch": 31.546, "grad_norm": 1.0981523990631104, "learning_rate": 2e-05, "loss": 0.04068336, "step": 15773 }, { "epoch": 31.548000000000002, "grad_norm": 1.4508930444717407, "learning_rate": 2e-05, "loss": 0.05076244, "step": 15774 }, { "epoch": 31.55, "grad_norm": 1.6133698225021362, "learning_rate": 2e-05, "loss": 0.0435809, "step": 15775 }, { "epoch": 31.552, "grad_norm": 1.154510736465454, "learning_rate": 2e-05, "loss": 0.03871128, "step": 15776 }, { "epoch": 31.554, "grad_norm": 0.9561405181884766, "learning_rate": 2e-05, "loss": 0.04080594, "step": 15777 }, { "epoch": 31.556, "grad_norm": 0.9834510684013367, "learning_rate": 2e-05, "loss": 0.03244867, "step": 15778 }, { "epoch": 31.558, "grad_norm": 2.004338502883911, "learning_rate": 2e-05, "loss": 0.05344335, "step": 15779 }, { "epoch": 31.56, "grad_norm": 1.2364351749420166, "learning_rate": 2e-05, "loss": 0.05170216, "step": 15780 }, { "epoch": 31.562, "grad_norm": 1.4382699728012085, "learning_rate": 2e-05, "loss": 0.03863439, "step": 15781 }, { "epoch": 31.564, "grad_norm": 0.9535454511642456, "learning_rate": 2e-05, "loss": 0.04045202, "step": 15782 }, { "epoch": 31.566, "grad_norm": 1.036081075668335, "learning_rate": 2e-05, "loss": 0.03678982, "step": 15783 }, { "epoch": 31.568, "grad_norm": 1.0939866304397583, "learning_rate": 2e-05, "loss": 0.03180864, "step": 15784 }, { "epoch": 31.57, "grad_norm": 1.143340826034546, "learning_rate": 2e-05, "loss": 0.05616608, "step": 15785 }, { "epoch": 31.572, "grad_norm": 1.1616203784942627, "learning_rate": 2e-05, "loss": 0.04822581, "step": 15786 }, { "epoch": 31.574, "grad_norm": 3.608323574066162, "learning_rate": 2e-05, "loss": 0.04122865, "step": 15787 }, { "epoch": 31.576, "grad_norm": 0.9763780832290649, "learning_rate": 2e-05, "loss": 0.04797199, "step": 15788 }, { "epoch": 31.578, "grad_norm": 1.4918748140335083, "learning_rate": 2e-05, "loss": 0.04703324, "step": 15789 }, { "epoch": 31.58, "grad_norm": 1.0545265674591064, "learning_rate": 2e-05, "loss": 0.03836868, "step": 15790 }, { "epoch": 31.582, "grad_norm": 1.474339246749878, "learning_rate": 2e-05, "loss": 0.06008284, "step": 15791 }, { "epoch": 31.584, "grad_norm": 1.0625519752502441, "learning_rate": 2e-05, "loss": 0.05859602, "step": 15792 }, { "epoch": 31.586, "grad_norm": 1.2134989500045776, "learning_rate": 2e-05, "loss": 0.04208735, "step": 15793 }, { "epoch": 31.588, "grad_norm": 1.2566266059875488, "learning_rate": 2e-05, "loss": 0.03742356, "step": 15794 }, { "epoch": 31.59, "grad_norm": 1.3968597650527954, "learning_rate": 2e-05, "loss": 0.06733236, "step": 15795 }, { "epoch": 31.592, "grad_norm": 1.4475517272949219, "learning_rate": 2e-05, "loss": 0.05899442, "step": 15796 }, { "epoch": 31.594, "grad_norm": 1.1160857677459717, "learning_rate": 2e-05, "loss": 0.04420739, "step": 15797 }, { "epoch": 31.596, "grad_norm": 3.387834072113037, "learning_rate": 2e-05, "loss": 0.03860878, "step": 15798 }, { "epoch": 31.598, "grad_norm": 1.3520134687423706, "learning_rate": 2e-05, "loss": 0.0479091, "step": 15799 }, { "epoch": 31.6, "grad_norm": 1.0275832414627075, "learning_rate": 2e-05, "loss": 0.04483669, "step": 15800 }, { "epoch": 31.602, "grad_norm": 1.150698184967041, "learning_rate": 2e-05, "loss": 0.04116081, "step": 15801 }, { "epoch": 31.604, "grad_norm": 1.164976716041565, "learning_rate": 2e-05, "loss": 0.04441263, "step": 15802 }, { "epoch": 31.606, "grad_norm": 1.4972437620162964, "learning_rate": 2e-05, "loss": 0.04349743, "step": 15803 }, { "epoch": 31.608, "grad_norm": 1.0806130170822144, "learning_rate": 2e-05, "loss": 0.04196836, "step": 15804 }, { "epoch": 31.61, "grad_norm": 1.1995141506195068, "learning_rate": 2e-05, "loss": 0.03850396, "step": 15805 }, { "epoch": 31.612, "grad_norm": 1.1772652864456177, "learning_rate": 2e-05, "loss": 0.04858148, "step": 15806 }, { "epoch": 31.614, "grad_norm": 1.2144558429718018, "learning_rate": 2e-05, "loss": 0.04736113, "step": 15807 }, { "epoch": 31.616, "grad_norm": 1.003003478050232, "learning_rate": 2e-05, "loss": 0.03994528, "step": 15808 }, { "epoch": 31.618, "grad_norm": 1.2089412212371826, "learning_rate": 2e-05, "loss": 0.05446092, "step": 15809 }, { "epoch": 31.62, "grad_norm": 0.9926894307136536, "learning_rate": 2e-05, "loss": 0.03842264, "step": 15810 }, { "epoch": 31.622, "grad_norm": 1.0654360055923462, "learning_rate": 2e-05, "loss": 0.04553921, "step": 15811 }, { "epoch": 31.624, "grad_norm": 1.0569010972976685, "learning_rate": 2e-05, "loss": 0.03932887, "step": 15812 }, { "epoch": 31.626, "grad_norm": 1.1645009517669678, "learning_rate": 2e-05, "loss": 0.04667068, "step": 15813 }, { "epoch": 31.628, "grad_norm": 1.1764347553253174, "learning_rate": 2e-05, "loss": 0.03884183, "step": 15814 }, { "epoch": 31.63, "grad_norm": 1.056957721710205, "learning_rate": 2e-05, "loss": 0.03504774, "step": 15815 }, { "epoch": 31.632, "grad_norm": 1.523184895515442, "learning_rate": 2e-05, "loss": 0.03928267, "step": 15816 }, { "epoch": 31.634, "grad_norm": 0.9528183937072754, "learning_rate": 2e-05, "loss": 0.02977644, "step": 15817 }, { "epoch": 31.636, "grad_norm": 1.139365792274475, "learning_rate": 2e-05, "loss": 0.04936465, "step": 15818 }, { "epoch": 31.638, "grad_norm": 1.0164538621902466, "learning_rate": 2e-05, "loss": 0.04015662, "step": 15819 }, { "epoch": 31.64, "grad_norm": 2.5323426723480225, "learning_rate": 2e-05, "loss": 0.04329709, "step": 15820 }, { "epoch": 31.642, "grad_norm": 0.9892186522483826, "learning_rate": 2e-05, "loss": 0.04343133, "step": 15821 }, { "epoch": 31.644, "grad_norm": 0.9569028615951538, "learning_rate": 2e-05, "loss": 0.03114694, "step": 15822 }, { "epoch": 31.646, "grad_norm": 1.1068476438522339, "learning_rate": 2e-05, "loss": 0.03301759, "step": 15823 }, { "epoch": 31.648, "grad_norm": 1.5227477550506592, "learning_rate": 2e-05, "loss": 0.05028416, "step": 15824 }, { "epoch": 31.65, "grad_norm": 1.1207176446914673, "learning_rate": 2e-05, "loss": 0.05229302, "step": 15825 }, { "epoch": 31.652, "grad_norm": 1.5114483833312988, "learning_rate": 2e-05, "loss": 0.03748871, "step": 15826 }, { "epoch": 31.654, "grad_norm": 1.271199107170105, "learning_rate": 2e-05, "loss": 0.04786011, "step": 15827 }, { "epoch": 31.656, "grad_norm": 2.418534994125366, "learning_rate": 2e-05, "loss": 0.04346465, "step": 15828 }, { "epoch": 31.658, "grad_norm": 1.1120386123657227, "learning_rate": 2e-05, "loss": 0.0327154, "step": 15829 }, { "epoch": 31.66, "grad_norm": 1.1137319803237915, "learning_rate": 2e-05, "loss": 0.05912303, "step": 15830 }, { "epoch": 31.662, "grad_norm": 1.2933043241500854, "learning_rate": 2e-05, "loss": 0.04017461, "step": 15831 }, { "epoch": 31.664, "grad_norm": 1.0982502698898315, "learning_rate": 2e-05, "loss": 0.03859887, "step": 15832 }, { "epoch": 31.666, "grad_norm": 0.8622153997421265, "learning_rate": 2e-05, "loss": 0.02759753, "step": 15833 }, { "epoch": 31.668, "grad_norm": 1.0364253520965576, "learning_rate": 2e-05, "loss": 0.04519296, "step": 15834 }, { "epoch": 31.67, "grad_norm": 1.2100967168807983, "learning_rate": 2e-05, "loss": 0.03839786, "step": 15835 }, { "epoch": 31.672, "grad_norm": 1.111802577972412, "learning_rate": 2e-05, "loss": 0.04178933, "step": 15836 }, { "epoch": 31.674, "grad_norm": 1.0190538167953491, "learning_rate": 2e-05, "loss": 0.03445241, "step": 15837 }, { "epoch": 31.676, "grad_norm": 1.8687206506729126, "learning_rate": 2e-05, "loss": 0.05826758, "step": 15838 }, { "epoch": 31.678, "grad_norm": 1.047756552696228, "learning_rate": 2e-05, "loss": 0.03885385, "step": 15839 }, { "epoch": 31.68, "grad_norm": 0.9928880333900452, "learning_rate": 2e-05, "loss": 0.04523052, "step": 15840 }, { "epoch": 31.682, "grad_norm": 1.2337273359298706, "learning_rate": 2e-05, "loss": 0.05221771, "step": 15841 }, { "epoch": 31.684, "grad_norm": 1.0689107179641724, "learning_rate": 2e-05, "loss": 0.04347456, "step": 15842 }, { "epoch": 31.686, "grad_norm": 1.2016342878341675, "learning_rate": 2e-05, "loss": 0.04474635, "step": 15843 }, { "epoch": 31.688, "grad_norm": 1.1433383226394653, "learning_rate": 2e-05, "loss": 0.04778634, "step": 15844 }, { "epoch": 31.69, "grad_norm": 2.7174150943756104, "learning_rate": 2e-05, "loss": 0.07287376, "step": 15845 }, { "epoch": 31.692, "grad_norm": 1.3149713277816772, "learning_rate": 2e-05, "loss": 0.04179319, "step": 15846 }, { "epoch": 31.694, "grad_norm": 5.19163703918457, "learning_rate": 2e-05, "loss": 0.04478062, "step": 15847 }, { "epoch": 31.696, "grad_norm": 2.104957342147827, "learning_rate": 2e-05, "loss": 0.05446626, "step": 15848 }, { "epoch": 31.698, "grad_norm": 1.2916414737701416, "learning_rate": 2e-05, "loss": 0.04545394, "step": 15849 }, { "epoch": 31.7, "grad_norm": 1.0930049419403076, "learning_rate": 2e-05, "loss": 0.04331831, "step": 15850 }, { "epoch": 31.701999999999998, "grad_norm": 0.9730895161628723, "learning_rate": 2e-05, "loss": 0.03830769, "step": 15851 }, { "epoch": 31.704, "grad_norm": 0.8668298721313477, "learning_rate": 2e-05, "loss": 0.02239366, "step": 15852 }, { "epoch": 31.706, "grad_norm": 3.044990062713623, "learning_rate": 2e-05, "loss": 0.06177013, "step": 15853 }, { "epoch": 31.708, "grad_norm": 1.7499326467514038, "learning_rate": 2e-05, "loss": 0.05107445, "step": 15854 }, { "epoch": 31.71, "grad_norm": 0.787744402885437, "learning_rate": 2e-05, "loss": 0.03012834, "step": 15855 }, { "epoch": 31.712, "grad_norm": 1.1019916534423828, "learning_rate": 2e-05, "loss": 0.04200315, "step": 15856 }, { "epoch": 31.714, "grad_norm": 0.9541832804679871, "learning_rate": 2e-05, "loss": 0.03924496, "step": 15857 }, { "epoch": 31.716, "grad_norm": 0.9597941637039185, "learning_rate": 2e-05, "loss": 0.02669969, "step": 15858 }, { "epoch": 31.718, "grad_norm": 0.884486734867096, "learning_rate": 2e-05, "loss": 0.03527596, "step": 15859 }, { "epoch": 31.72, "grad_norm": 1.0208630561828613, "learning_rate": 2e-05, "loss": 0.03534837, "step": 15860 }, { "epoch": 31.722, "grad_norm": 1.7355231046676636, "learning_rate": 2e-05, "loss": 0.06564537, "step": 15861 }, { "epoch": 31.724, "grad_norm": 1.0348602533340454, "learning_rate": 2e-05, "loss": 0.04258328, "step": 15862 }, { "epoch": 31.726, "grad_norm": 1.8180856704711914, "learning_rate": 2e-05, "loss": 0.04855717, "step": 15863 }, { "epoch": 31.728, "grad_norm": 0.9880276322364807, "learning_rate": 2e-05, "loss": 0.03922275, "step": 15864 }, { "epoch": 31.73, "grad_norm": 1.0288110971450806, "learning_rate": 2e-05, "loss": 0.04276623, "step": 15865 }, { "epoch": 31.732, "grad_norm": 1.7478946447372437, "learning_rate": 2e-05, "loss": 0.05458619, "step": 15866 }, { "epoch": 31.734, "grad_norm": 1.010421872138977, "learning_rate": 2e-05, "loss": 0.04428875, "step": 15867 }, { "epoch": 31.736, "grad_norm": 1.1560001373291016, "learning_rate": 2e-05, "loss": 0.05556989, "step": 15868 }, { "epoch": 31.738, "grad_norm": 1.1337833404541016, "learning_rate": 2e-05, "loss": 0.03786508, "step": 15869 }, { "epoch": 31.74, "grad_norm": 1.323927640914917, "learning_rate": 2e-05, "loss": 0.05452766, "step": 15870 }, { "epoch": 31.742, "grad_norm": 1.639430046081543, "learning_rate": 2e-05, "loss": 0.04627925, "step": 15871 }, { "epoch": 31.744, "grad_norm": 0.9859926700592041, "learning_rate": 2e-05, "loss": 0.03508934, "step": 15872 }, { "epoch": 31.746, "grad_norm": 1.2310607433319092, "learning_rate": 2e-05, "loss": 0.05566741, "step": 15873 }, { "epoch": 31.748, "grad_norm": 1.1457229852676392, "learning_rate": 2e-05, "loss": 0.04572055, "step": 15874 }, { "epoch": 31.75, "grad_norm": 1.5464967489242554, "learning_rate": 2e-05, "loss": 0.06493871, "step": 15875 }, { "epoch": 31.752, "grad_norm": 1.0586541891098022, "learning_rate": 2e-05, "loss": 0.0350925, "step": 15876 }, { "epoch": 31.754, "grad_norm": 0.9922010898590088, "learning_rate": 2e-05, "loss": 0.03237592, "step": 15877 }, { "epoch": 31.756, "grad_norm": 1.6347486972808838, "learning_rate": 2e-05, "loss": 0.0366933, "step": 15878 }, { "epoch": 31.758, "grad_norm": 1.9233605861663818, "learning_rate": 2e-05, "loss": 0.04147567, "step": 15879 }, { "epoch": 31.76, "grad_norm": 1.0620523691177368, "learning_rate": 2e-05, "loss": 0.03815773, "step": 15880 }, { "epoch": 31.762, "grad_norm": 1.2568045854568481, "learning_rate": 2e-05, "loss": 0.04045515, "step": 15881 }, { "epoch": 31.764, "grad_norm": 1.3260029554367065, "learning_rate": 2e-05, "loss": 0.06650871, "step": 15882 }, { "epoch": 31.766, "grad_norm": 0.9206283092498779, "learning_rate": 2e-05, "loss": 0.0290489, "step": 15883 }, { "epoch": 31.768, "grad_norm": 1.5656499862670898, "learning_rate": 2e-05, "loss": 0.0362188, "step": 15884 }, { "epoch": 31.77, "grad_norm": 1.312314748764038, "learning_rate": 2e-05, "loss": 0.06194432, "step": 15885 }, { "epoch": 31.772, "grad_norm": 2.2398221492767334, "learning_rate": 2e-05, "loss": 0.06821667, "step": 15886 }, { "epoch": 31.774, "grad_norm": 1.1739871501922607, "learning_rate": 2e-05, "loss": 0.03177306, "step": 15887 }, { "epoch": 31.776, "grad_norm": 1.0532993078231812, "learning_rate": 2e-05, "loss": 0.03686599, "step": 15888 }, { "epoch": 31.778, "grad_norm": 0.8376888036727905, "learning_rate": 2e-05, "loss": 0.03524396, "step": 15889 }, { "epoch": 31.78, "grad_norm": 2.267432928085327, "learning_rate": 2e-05, "loss": 0.05465377, "step": 15890 }, { "epoch": 31.782, "grad_norm": 1.0571322441101074, "learning_rate": 2e-05, "loss": 0.04264095, "step": 15891 }, { "epoch": 31.784, "grad_norm": 1.3199357986450195, "learning_rate": 2e-05, "loss": 0.06520945, "step": 15892 }, { "epoch": 31.786, "grad_norm": 1.5665909051895142, "learning_rate": 2e-05, "loss": 0.05910393, "step": 15893 }, { "epoch": 31.788, "grad_norm": 1.4252721071243286, "learning_rate": 2e-05, "loss": 0.03654172, "step": 15894 }, { "epoch": 31.79, "grad_norm": 1.2176549434661865, "learning_rate": 2e-05, "loss": 0.0468789, "step": 15895 }, { "epoch": 31.792, "grad_norm": 1.7313529253005981, "learning_rate": 2e-05, "loss": 0.04097667, "step": 15896 }, { "epoch": 31.794, "grad_norm": 2.0214059352874756, "learning_rate": 2e-05, "loss": 0.05272017, "step": 15897 }, { "epoch": 31.796, "grad_norm": 1.1506291627883911, "learning_rate": 2e-05, "loss": 0.04326191, "step": 15898 }, { "epoch": 31.798000000000002, "grad_norm": 0.9576292634010315, "learning_rate": 2e-05, "loss": 0.02942866, "step": 15899 }, { "epoch": 31.8, "grad_norm": 2.844074010848999, "learning_rate": 2e-05, "loss": 0.05193437, "step": 15900 }, { "epoch": 31.802, "grad_norm": 1.0188461542129517, "learning_rate": 2e-05, "loss": 0.05375243, "step": 15901 }, { "epoch": 31.804, "grad_norm": 1.1720850467681885, "learning_rate": 2e-05, "loss": 0.03583799, "step": 15902 }, { "epoch": 31.806, "grad_norm": 1.239471197128296, "learning_rate": 2e-05, "loss": 0.04422534, "step": 15903 }, { "epoch": 31.808, "grad_norm": 1.0280276536941528, "learning_rate": 2e-05, "loss": 0.04165411, "step": 15904 }, { "epoch": 31.81, "grad_norm": 0.9891774654388428, "learning_rate": 2e-05, "loss": 0.04784955, "step": 15905 }, { "epoch": 31.812, "grad_norm": 1.134988784790039, "learning_rate": 2e-05, "loss": 0.04264707, "step": 15906 }, { "epoch": 31.814, "grad_norm": 1.1324423551559448, "learning_rate": 2e-05, "loss": 0.03583518, "step": 15907 }, { "epoch": 31.816, "grad_norm": 2.5018234252929688, "learning_rate": 2e-05, "loss": 0.04717122, "step": 15908 }, { "epoch": 31.818, "grad_norm": 1.1477476358413696, "learning_rate": 2e-05, "loss": 0.04307114, "step": 15909 }, { "epoch": 31.82, "grad_norm": 0.8575872182846069, "learning_rate": 2e-05, "loss": 0.02858703, "step": 15910 }, { "epoch": 31.822, "grad_norm": 1.012574553489685, "learning_rate": 2e-05, "loss": 0.03951813, "step": 15911 }, { "epoch": 31.824, "grad_norm": 1.3815621137619019, "learning_rate": 2e-05, "loss": 0.04898196, "step": 15912 }, { "epoch": 31.826, "grad_norm": 0.8362985849380493, "learning_rate": 2e-05, "loss": 0.03051193, "step": 15913 }, { "epoch": 31.828, "grad_norm": 1.2465306520462036, "learning_rate": 2e-05, "loss": 0.03481557, "step": 15914 }, { "epoch": 31.83, "grad_norm": 1.4176462888717651, "learning_rate": 2e-05, "loss": 0.04691321, "step": 15915 }, { "epoch": 31.832, "grad_norm": 1.248950719833374, "learning_rate": 2e-05, "loss": 0.06173592, "step": 15916 }, { "epoch": 31.834, "grad_norm": 1.6640843152999878, "learning_rate": 2e-05, "loss": 0.05572528, "step": 15917 }, { "epoch": 31.836, "grad_norm": 1.2289388179779053, "learning_rate": 2e-05, "loss": 0.06155836, "step": 15918 }, { "epoch": 31.838, "grad_norm": 0.9859318733215332, "learning_rate": 2e-05, "loss": 0.04409668, "step": 15919 }, { "epoch": 31.84, "grad_norm": 1.0400197505950928, "learning_rate": 2e-05, "loss": 0.05137893, "step": 15920 }, { "epoch": 31.842, "grad_norm": 1.3462915420532227, "learning_rate": 2e-05, "loss": 0.04780863, "step": 15921 }, { "epoch": 31.844, "grad_norm": 0.9731854200363159, "learning_rate": 2e-05, "loss": 0.03293011, "step": 15922 }, { "epoch": 31.846, "grad_norm": 1.2685530185699463, "learning_rate": 2e-05, "loss": 0.05982886, "step": 15923 }, { "epoch": 31.848, "grad_norm": 1.025337815284729, "learning_rate": 2e-05, "loss": 0.0380186, "step": 15924 }, { "epoch": 31.85, "grad_norm": 1.2428250312805176, "learning_rate": 2e-05, "loss": 0.04449599, "step": 15925 }, { "epoch": 31.852, "grad_norm": 1.4256794452667236, "learning_rate": 2e-05, "loss": 0.04801312, "step": 15926 }, { "epoch": 31.854, "grad_norm": 1.210816740989685, "learning_rate": 2e-05, "loss": 0.05219975, "step": 15927 }, { "epoch": 31.856, "grad_norm": 1.0081429481506348, "learning_rate": 2e-05, "loss": 0.04066213, "step": 15928 }, { "epoch": 31.858, "grad_norm": 1.3839162588119507, "learning_rate": 2e-05, "loss": 0.0361177, "step": 15929 }, { "epoch": 31.86, "grad_norm": 1.1862558126449585, "learning_rate": 2e-05, "loss": 0.04290885, "step": 15930 }, { "epoch": 31.862, "grad_norm": 0.9478551149368286, "learning_rate": 2e-05, "loss": 0.04138046, "step": 15931 }, { "epoch": 31.864, "grad_norm": 1.1621347665786743, "learning_rate": 2e-05, "loss": 0.05174235, "step": 15932 }, { "epoch": 31.866, "grad_norm": 1.1488691568374634, "learning_rate": 2e-05, "loss": 0.03811418, "step": 15933 }, { "epoch": 31.868, "grad_norm": 1.1009749174118042, "learning_rate": 2e-05, "loss": 0.04835455, "step": 15934 }, { "epoch": 31.87, "grad_norm": 0.9989393949508667, "learning_rate": 2e-05, "loss": 0.04177198, "step": 15935 }, { "epoch": 31.872, "grad_norm": 0.8199046850204468, "learning_rate": 2e-05, "loss": 0.03224732, "step": 15936 }, { "epoch": 31.874, "grad_norm": 1.2866772413253784, "learning_rate": 2e-05, "loss": 0.0462041, "step": 15937 }, { "epoch": 31.876, "grad_norm": 2.014058828353882, "learning_rate": 2e-05, "loss": 0.05243024, "step": 15938 }, { "epoch": 31.878, "grad_norm": 1.895260214805603, "learning_rate": 2e-05, "loss": 0.05436756, "step": 15939 }, { "epoch": 31.88, "grad_norm": 1.379758358001709, "learning_rate": 2e-05, "loss": 0.05134067, "step": 15940 }, { "epoch": 31.882, "grad_norm": 1.1402020454406738, "learning_rate": 2e-05, "loss": 0.05053543, "step": 15941 }, { "epoch": 31.884, "grad_norm": 1.9122066497802734, "learning_rate": 2e-05, "loss": 0.04480761, "step": 15942 }, { "epoch": 31.886, "grad_norm": 1.1403417587280273, "learning_rate": 2e-05, "loss": 0.03008875, "step": 15943 }, { "epoch": 31.888, "grad_norm": 1.1798020601272583, "learning_rate": 2e-05, "loss": 0.04141122, "step": 15944 }, { "epoch": 31.89, "grad_norm": 1.0835275650024414, "learning_rate": 2e-05, "loss": 0.0342609, "step": 15945 }, { "epoch": 31.892, "grad_norm": 1.5087027549743652, "learning_rate": 2e-05, "loss": 0.03361506, "step": 15946 }, { "epoch": 31.894, "grad_norm": 1.2732311487197876, "learning_rate": 2e-05, "loss": 0.04503596, "step": 15947 }, { "epoch": 31.896, "grad_norm": 1.117771029472351, "learning_rate": 2e-05, "loss": 0.05453199, "step": 15948 }, { "epoch": 31.898, "grad_norm": 0.9061999917030334, "learning_rate": 2e-05, "loss": 0.0304344, "step": 15949 }, { "epoch": 31.9, "grad_norm": 0.9329893589019775, "learning_rate": 2e-05, "loss": 0.03484414, "step": 15950 }, { "epoch": 31.902, "grad_norm": 1.371660590171814, "learning_rate": 2e-05, "loss": 0.0439395, "step": 15951 }, { "epoch": 31.904, "grad_norm": 1.7571653127670288, "learning_rate": 2e-05, "loss": 0.03117009, "step": 15952 }, { "epoch": 31.906, "grad_norm": 1.85367751121521, "learning_rate": 2e-05, "loss": 0.05765525, "step": 15953 }, { "epoch": 31.908, "grad_norm": 0.9609562158584595, "learning_rate": 2e-05, "loss": 0.04214827, "step": 15954 }, { "epoch": 31.91, "grad_norm": 0.9756461977958679, "learning_rate": 2e-05, "loss": 0.03862667, "step": 15955 }, { "epoch": 31.912, "grad_norm": 1.0798842906951904, "learning_rate": 2e-05, "loss": 0.04864082, "step": 15956 }, { "epoch": 31.914, "grad_norm": 1.8472621440887451, "learning_rate": 2e-05, "loss": 0.04330658, "step": 15957 }, { "epoch": 31.916, "grad_norm": 1.0119237899780273, "learning_rate": 2e-05, "loss": 0.02977612, "step": 15958 }, { "epoch": 31.918, "grad_norm": 0.7594591379165649, "learning_rate": 2e-05, "loss": 0.02641629, "step": 15959 }, { "epoch": 31.92, "grad_norm": 3.8506855964660645, "learning_rate": 2e-05, "loss": 0.05078499, "step": 15960 }, { "epoch": 31.922, "grad_norm": 1.3508875370025635, "learning_rate": 2e-05, "loss": 0.03430257, "step": 15961 }, { "epoch": 31.924, "grad_norm": 1.0510114431381226, "learning_rate": 2e-05, "loss": 0.04310552, "step": 15962 }, { "epoch": 31.926, "grad_norm": 1.2425123453140259, "learning_rate": 2e-05, "loss": 0.03266704, "step": 15963 }, { "epoch": 31.928, "grad_norm": 1.095833420753479, "learning_rate": 2e-05, "loss": 0.04365671, "step": 15964 }, { "epoch": 31.93, "grad_norm": 1.2923943996429443, "learning_rate": 2e-05, "loss": 0.04559427, "step": 15965 }, { "epoch": 31.932, "grad_norm": 1.8447715044021606, "learning_rate": 2e-05, "loss": 0.04925349, "step": 15966 }, { "epoch": 31.934, "grad_norm": 1.3923882246017456, "learning_rate": 2e-05, "loss": 0.03552225, "step": 15967 }, { "epoch": 31.936, "grad_norm": 1.6300287246704102, "learning_rate": 2e-05, "loss": 0.03371734, "step": 15968 }, { "epoch": 31.938, "grad_norm": 0.9517141580581665, "learning_rate": 2e-05, "loss": 0.03424593, "step": 15969 }, { "epoch": 31.94, "grad_norm": 1.0027364492416382, "learning_rate": 2e-05, "loss": 0.03960797, "step": 15970 }, { "epoch": 31.942, "grad_norm": 1.1754388809204102, "learning_rate": 2e-05, "loss": 0.0579026, "step": 15971 }, { "epoch": 31.944, "grad_norm": 1.044007420539856, "learning_rate": 2e-05, "loss": 0.04491287, "step": 15972 }, { "epoch": 31.946, "grad_norm": 1.0131793022155762, "learning_rate": 2e-05, "loss": 0.04302017, "step": 15973 }, { "epoch": 31.948, "grad_norm": 1.6754741668701172, "learning_rate": 2e-05, "loss": 0.03826069, "step": 15974 }, { "epoch": 31.95, "grad_norm": 1.3440221548080444, "learning_rate": 2e-05, "loss": 0.03422169, "step": 15975 }, { "epoch": 31.951999999999998, "grad_norm": 1.0821226835250854, "learning_rate": 2e-05, "loss": 0.05283847, "step": 15976 }, { "epoch": 31.954, "grad_norm": 1.1359598636627197, "learning_rate": 2e-05, "loss": 0.04611814, "step": 15977 }, { "epoch": 31.956, "grad_norm": 1.0634214878082275, "learning_rate": 2e-05, "loss": 0.03629611, "step": 15978 }, { "epoch": 31.958, "grad_norm": 1.1144078969955444, "learning_rate": 2e-05, "loss": 0.04468196, "step": 15979 }, { "epoch": 31.96, "grad_norm": 1.3621118068695068, "learning_rate": 2e-05, "loss": 0.05095581, "step": 15980 }, { "epoch": 31.962, "grad_norm": 1.2638226747512817, "learning_rate": 2e-05, "loss": 0.03788029, "step": 15981 }, { "epoch": 31.964, "grad_norm": 0.9756414890289307, "learning_rate": 2e-05, "loss": 0.03850944, "step": 15982 }, { "epoch": 31.966, "grad_norm": 1.1255450248718262, "learning_rate": 2e-05, "loss": 0.050939, "step": 15983 }, { "epoch": 31.968, "grad_norm": 2.7487754821777344, "learning_rate": 2e-05, "loss": 0.04751518, "step": 15984 }, { "epoch": 31.97, "grad_norm": 1.0990818738937378, "learning_rate": 2e-05, "loss": 0.04240175, "step": 15985 }, { "epoch": 31.972, "grad_norm": 0.9245083928108215, "learning_rate": 2e-05, "loss": 0.02757048, "step": 15986 }, { "epoch": 31.974, "grad_norm": 1.0856565237045288, "learning_rate": 2e-05, "loss": 0.04022551, "step": 15987 }, { "epoch": 31.976, "grad_norm": 1.1982617378234863, "learning_rate": 2e-05, "loss": 0.0306278, "step": 15988 }, { "epoch": 31.978, "grad_norm": 1.283501148223877, "learning_rate": 2e-05, "loss": 0.05583193, "step": 15989 }, { "epoch": 31.98, "grad_norm": 1.167189121246338, "learning_rate": 2e-05, "loss": 0.05435322, "step": 15990 }, { "epoch": 31.982, "grad_norm": 2.2477195262908936, "learning_rate": 2e-05, "loss": 0.04087004, "step": 15991 }, { "epoch": 31.984, "grad_norm": 0.9776336550712585, "learning_rate": 2e-05, "loss": 0.03896228, "step": 15992 }, { "epoch": 31.986, "grad_norm": 0.8978980779647827, "learning_rate": 2e-05, "loss": 0.04046476, "step": 15993 }, { "epoch": 31.988, "grad_norm": 1.8571480512619019, "learning_rate": 2e-05, "loss": 0.05393496, "step": 15994 }, { "epoch": 31.99, "grad_norm": 4.182859897613525, "learning_rate": 2e-05, "loss": 0.04097991, "step": 15995 }, { "epoch": 31.992, "grad_norm": 0.9442010521888733, "learning_rate": 2e-05, "loss": 0.03468394, "step": 15996 }, { "epoch": 31.994, "grad_norm": 1.8864837884902954, "learning_rate": 2e-05, "loss": 0.03547326, "step": 15997 }, { "epoch": 31.996, "grad_norm": 0.9562711715698242, "learning_rate": 2e-05, "loss": 0.0390859, "step": 15998 }, { "epoch": 31.998, "grad_norm": 1.0057873725891113, "learning_rate": 2e-05, "loss": 0.0406678, "step": 15999 }, { "epoch": 32.0, "grad_norm": 1.0923762321472168, "learning_rate": 2e-05, "loss": 0.03873049, "step": 16000 }, { "epoch": 32.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.994, "Equal_2": 0.9680638722554891, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9939879759519038, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.992, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8907815631262525, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.984, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 225.1374, "eval_samples_per_second": 46.638, "eval_steps_per_second": 0.933, "step": 16000 }, { "epoch": 32.002, "grad_norm": 1.2356011867523193, "learning_rate": 2e-05, "loss": 0.05466909, "step": 16001 }, { "epoch": 32.004, "grad_norm": 1.0156986713409424, "learning_rate": 2e-05, "loss": 0.0469223, "step": 16002 }, { "epoch": 32.006, "grad_norm": 1.2976840734481812, "learning_rate": 2e-05, "loss": 0.04618651, "step": 16003 }, { "epoch": 32.008, "grad_norm": 1.2794440984725952, "learning_rate": 2e-05, "loss": 0.05156206, "step": 16004 }, { "epoch": 32.01, "grad_norm": 4.485249996185303, "learning_rate": 2e-05, "loss": 0.07050195, "step": 16005 }, { "epoch": 32.012, "grad_norm": 1.328390121459961, "learning_rate": 2e-05, "loss": 0.05205965, "step": 16006 }, { "epoch": 32.014, "grad_norm": 1.3376307487487793, "learning_rate": 2e-05, "loss": 0.04151385, "step": 16007 }, { "epoch": 32.016, "grad_norm": 1.1926473379135132, "learning_rate": 2e-05, "loss": 0.04749357, "step": 16008 }, { "epoch": 32.018, "grad_norm": 1.7660237550735474, "learning_rate": 2e-05, "loss": 0.05129511, "step": 16009 }, { "epoch": 32.02, "grad_norm": 1.1376497745513916, "learning_rate": 2e-05, "loss": 0.06089584, "step": 16010 }, { "epoch": 32.022, "grad_norm": 1.4228826761245728, "learning_rate": 2e-05, "loss": 0.05121621, "step": 16011 }, { "epoch": 32.024, "grad_norm": 1.1508406400680542, "learning_rate": 2e-05, "loss": 0.05312311, "step": 16012 }, { "epoch": 32.026, "grad_norm": 1.0967044830322266, "learning_rate": 2e-05, "loss": 0.04000089, "step": 16013 }, { "epoch": 32.028, "grad_norm": 1.2801448106765747, "learning_rate": 2e-05, "loss": 0.05693541, "step": 16014 }, { "epoch": 32.03, "grad_norm": 0.9661258459091187, "learning_rate": 2e-05, "loss": 0.04040671, "step": 16015 }, { "epoch": 32.032, "grad_norm": 1.0276020765304565, "learning_rate": 2e-05, "loss": 0.03524083, "step": 16016 }, { "epoch": 32.034, "grad_norm": 2.1342718601226807, "learning_rate": 2e-05, "loss": 0.04446546, "step": 16017 }, { "epoch": 32.036, "grad_norm": 1.1162304878234863, "learning_rate": 2e-05, "loss": 0.05264103, "step": 16018 }, { "epoch": 32.038, "grad_norm": 1.1637152433395386, "learning_rate": 2e-05, "loss": 0.04831163, "step": 16019 }, { "epoch": 32.04, "grad_norm": 1.112960696220398, "learning_rate": 2e-05, "loss": 0.05166023, "step": 16020 }, { "epoch": 32.042, "grad_norm": 1.1386680603027344, "learning_rate": 2e-05, "loss": 0.04694862, "step": 16021 }, { "epoch": 32.044, "grad_norm": 0.8496449589729309, "learning_rate": 2e-05, "loss": 0.02530818, "step": 16022 }, { "epoch": 32.046, "grad_norm": 0.9825944900512695, "learning_rate": 2e-05, "loss": 0.0397158, "step": 16023 }, { "epoch": 32.048, "grad_norm": 1.327502965927124, "learning_rate": 2e-05, "loss": 0.04858366, "step": 16024 }, { "epoch": 32.05, "grad_norm": 1.2455800771713257, "learning_rate": 2e-05, "loss": 0.04819886, "step": 16025 }, { "epoch": 32.052, "grad_norm": 1.3697658777236938, "learning_rate": 2e-05, "loss": 0.05422865, "step": 16026 }, { "epoch": 32.054, "grad_norm": 0.9916019439697266, "learning_rate": 2e-05, "loss": 0.04199918, "step": 16027 }, { "epoch": 32.056, "grad_norm": 1.1752361059188843, "learning_rate": 2e-05, "loss": 0.04264934, "step": 16028 }, { "epoch": 32.058, "grad_norm": 0.9786742329597473, "learning_rate": 2e-05, "loss": 0.03730422, "step": 16029 }, { "epoch": 32.06, "grad_norm": 1.8626909255981445, "learning_rate": 2e-05, "loss": 0.03900536, "step": 16030 }, { "epoch": 32.062, "grad_norm": 2.360508680343628, "learning_rate": 2e-05, "loss": 0.05190337, "step": 16031 }, { "epoch": 32.064, "grad_norm": 1.1845301389694214, "learning_rate": 2e-05, "loss": 0.04113044, "step": 16032 }, { "epoch": 32.066, "grad_norm": 2.492816925048828, "learning_rate": 2e-05, "loss": 0.05427489, "step": 16033 }, { "epoch": 32.068, "grad_norm": 1.528493881225586, "learning_rate": 2e-05, "loss": 0.04995369, "step": 16034 }, { "epoch": 32.07, "grad_norm": 3.656578540802002, "learning_rate": 2e-05, "loss": 0.05380844, "step": 16035 }, { "epoch": 32.072, "grad_norm": 1.0701860189437866, "learning_rate": 2e-05, "loss": 0.03740144, "step": 16036 }, { "epoch": 32.074, "grad_norm": 2.843520402908325, "learning_rate": 2e-05, "loss": 0.04214677, "step": 16037 }, { "epoch": 32.076, "grad_norm": 0.9815409779548645, "learning_rate": 2e-05, "loss": 0.04010665, "step": 16038 }, { "epoch": 32.078, "grad_norm": 0.9799947142601013, "learning_rate": 2e-05, "loss": 0.03869931, "step": 16039 }, { "epoch": 32.08, "grad_norm": 1.9988937377929688, "learning_rate": 2e-05, "loss": 0.03912951, "step": 16040 }, { "epoch": 32.082, "grad_norm": 1.1815953254699707, "learning_rate": 2e-05, "loss": 0.04659181, "step": 16041 }, { "epoch": 32.084, "grad_norm": 1.4960631132125854, "learning_rate": 2e-05, "loss": 0.04047168, "step": 16042 }, { "epoch": 32.086, "grad_norm": 0.9173590540885925, "learning_rate": 2e-05, "loss": 0.02785573, "step": 16043 }, { "epoch": 32.088, "grad_norm": 1.0821963548660278, "learning_rate": 2e-05, "loss": 0.04041246, "step": 16044 }, { "epoch": 32.09, "grad_norm": 1.036468744277954, "learning_rate": 2e-05, "loss": 0.04227835, "step": 16045 }, { "epoch": 32.092, "grad_norm": 1.1241968870162964, "learning_rate": 2e-05, "loss": 0.03792845, "step": 16046 }, { "epoch": 32.094, "grad_norm": 1.2741143703460693, "learning_rate": 2e-05, "loss": 0.05662498, "step": 16047 }, { "epoch": 32.096, "grad_norm": 1.8568767309188843, "learning_rate": 2e-05, "loss": 0.05284123, "step": 16048 }, { "epoch": 32.098, "grad_norm": 3.89512038230896, "learning_rate": 2e-05, "loss": 0.07812426, "step": 16049 }, { "epoch": 32.1, "grad_norm": 1.3303203582763672, "learning_rate": 2e-05, "loss": 0.05902573, "step": 16050 }, { "epoch": 32.102, "grad_norm": 1.136581540107727, "learning_rate": 2e-05, "loss": 0.04114444, "step": 16051 }, { "epoch": 32.104, "grad_norm": 1.0775119066238403, "learning_rate": 2e-05, "loss": 0.04174216, "step": 16052 }, { "epoch": 32.106, "grad_norm": 1.431694507598877, "learning_rate": 2e-05, "loss": 0.0605766, "step": 16053 }, { "epoch": 32.108, "grad_norm": 1.0896109342575073, "learning_rate": 2e-05, "loss": 0.03770591, "step": 16054 }, { "epoch": 32.11, "grad_norm": 0.894169807434082, "learning_rate": 2e-05, "loss": 0.03163622, "step": 16055 }, { "epoch": 32.112, "grad_norm": 1.3525890111923218, "learning_rate": 2e-05, "loss": 0.04273454, "step": 16056 }, { "epoch": 32.114, "grad_norm": 1.3543816804885864, "learning_rate": 2e-05, "loss": 0.04325897, "step": 16057 }, { "epoch": 32.116, "grad_norm": 0.9105185866355896, "learning_rate": 2e-05, "loss": 0.03451267, "step": 16058 }, { "epoch": 32.118, "grad_norm": 1.1880437135696411, "learning_rate": 2e-05, "loss": 0.02085032, "step": 16059 }, { "epoch": 32.12, "grad_norm": 2.6206436157226562, "learning_rate": 2e-05, "loss": 0.05422061, "step": 16060 }, { "epoch": 32.122, "grad_norm": 3.470801591873169, "learning_rate": 2e-05, "loss": 0.06888855, "step": 16061 }, { "epoch": 32.124, "grad_norm": 1.0784660577774048, "learning_rate": 2e-05, "loss": 0.03310742, "step": 16062 }, { "epoch": 32.126, "grad_norm": 1.1307471990585327, "learning_rate": 2e-05, "loss": 0.03930689, "step": 16063 }, { "epoch": 32.128, "grad_norm": 0.8141838908195496, "learning_rate": 2e-05, "loss": 0.02826838, "step": 16064 }, { "epoch": 32.13, "grad_norm": 1.3934303522109985, "learning_rate": 2e-05, "loss": 0.05498338, "step": 16065 }, { "epoch": 32.132, "grad_norm": 1.2954531908035278, "learning_rate": 2e-05, "loss": 0.04460048, "step": 16066 }, { "epoch": 32.134, "grad_norm": 1.3868544101715088, "learning_rate": 2e-05, "loss": 0.04542375, "step": 16067 }, { "epoch": 32.136, "grad_norm": 1.039772391319275, "learning_rate": 2e-05, "loss": 0.0403011, "step": 16068 }, { "epoch": 32.138, "grad_norm": 1.1986145973205566, "learning_rate": 2e-05, "loss": 0.05973785, "step": 16069 }, { "epoch": 32.14, "grad_norm": 2.2725701332092285, "learning_rate": 2e-05, "loss": 0.06814368, "step": 16070 }, { "epoch": 32.142, "grad_norm": 1.2118464708328247, "learning_rate": 2e-05, "loss": 0.04011428, "step": 16071 }, { "epoch": 32.144, "grad_norm": 1.9756301641464233, "learning_rate": 2e-05, "loss": 0.0406368, "step": 16072 }, { "epoch": 32.146, "grad_norm": 1.6966930627822876, "learning_rate": 2e-05, "loss": 0.03090478, "step": 16073 }, { "epoch": 32.148, "grad_norm": 2.1176340579986572, "learning_rate": 2e-05, "loss": 0.05888394, "step": 16074 }, { "epoch": 32.15, "grad_norm": 1.5570383071899414, "learning_rate": 2e-05, "loss": 0.05008405, "step": 16075 }, { "epoch": 32.152, "grad_norm": 1.3696781396865845, "learning_rate": 2e-05, "loss": 0.05907987, "step": 16076 }, { "epoch": 32.154, "grad_norm": 1.1086663007736206, "learning_rate": 2e-05, "loss": 0.04438163, "step": 16077 }, { "epoch": 32.156, "grad_norm": 0.9588847756385803, "learning_rate": 2e-05, "loss": 0.03657414, "step": 16078 }, { "epoch": 32.158, "grad_norm": 1.534988522529602, "learning_rate": 2e-05, "loss": 0.06217058, "step": 16079 }, { "epoch": 32.16, "grad_norm": 1.1655292510986328, "learning_rate": 2e-05, "loss": 0.04939203, "step": 16080 }, { "epoch": 32.162, "grad_norm": 1.2395936250686646, "learning_rate": 2e-05, "loss": 0.04618511, "step": 16081 }, { "epoch": 32.164, "grad_norm": 1.1421581506729126, "learning_rate": 2e-05, "loss": 0.05333049, "step": 16082 }, { "epoch": 32.166, "grad_norm": 1.1398416757583618, "learning_rate": 2e-05, "loss": 0.04084802, "step": 16083 }, { "epoch": 32.168, "grad_norm": 1.1570777893066406, "learning_rate": 2e-05, "loss": 0.05009046, "step": 16084 }, { "epoch": 32.17, "grad_norm": 1.0689493417739868, "learning_rate": 2e-05, "loss": 0.04293783, "step": 16085 }, { "epoch": 32.172, "grad_norm": 1.1279675960540771, "learning_rate": 2e-05, "loss": 0.0462275, "step": 16086 }, { "epoch": 32.174, "grad_norm": 0.9166948199272156, "learning_rate": 2e-05, "loss": 0.03267596, "step": 16087 }, { "epoch": 32.176, "grad_norm": 1.4560723304748535, "learning_rate": 2e-05, "loss": 0.04366771, "step": 16088 }, { "epoch": 32.178, "grad_norm": 1.9315309524536133, "learning_rate": 2e-05, "loss": 0.04382716, "step": 16089 }, { "epoch": 32.18, "grad_norm": 0.9030523896217346, "learning_rate": 2e-05, "loss": 0.03546926, "step": 16090 }, { "epoch": 32.182, "grad_norm": 1.1285887956619263, "learning_rate": 2e-05, "loss": 0.05290186, "step": 16091 }, { "epoch": 32.184, "grad_norm": 1.0761277675628662, "learning_rate": 2e-05, "loss": 0.04373253, "step": 16092 }, { "epoch": 32.186, "grad_norm": 1.157943844795227, "learning_rate": 2e-05, "loss": 0.04580727, "step": 16093 }, { "epoch": 32.188, "grad_norm": 1.7800873517990112, "learning_rate": 2e-05, "loss": 0.04879148, "step": 16094 }, { "epoch": 32.19, "grad_norm": 1.3260236978530884, "learning_rate": 2e-05, "loss": 0.04741555, "step": 16095 }, { "epoch": 32.192, "grad_norm": 1.1106562614440918, "learning_rate": 2e-05, "loss": 0.0432886, "step": 16096 }, { "epoch": 32.194, "grad_norm": 0.7569925785064697, "learning_rate": 2e-05, "loss": 0.02881912, "step": 16097 }, { "epoch": 32.196, "grad_norm": 1.3151812553405762, "learning_rate": 2e-05, "loss": 0.0532372, "step": 16098 }, { "epoch": 32.198, "grad_norm": 1.1523593664169312, "learning_rate": 2e-05, "loss": 0.04770309, "step": 16099 }, { "epoch": 32.2, "grad_norm": 1.6698036193847656, "learning_rate": 2e-05, "loss": 0.03688781, "step": 16100 }, { "epoch": 32.202, "grad_norm": 1.6945507526397705, "learning_rate": 2e-05, "loss": 0.04813316, "step": 16101 }, { "epoch": 32.204, "grad_norm": 0.9330757260322571, "learning_rate": 2e-05, "loss": 0.03236951, "step": 16102 }, { "epoch": 32.206, "grad_norm": 1.1667741537094116, "learning_rate": 2e-05, "loss": 0.04854637, "step": 16103 }, { "epoch": 32.208, "grad_norm": 1.1867941617965698, "learning_rate": 2e-05, "loss": 0.02713115, "step": 16104 }, { "epoch": 32.21, "grad_norm": 1.0923073291778564, "learning_rate": 2e-05, "loss": 0.04134283, "step": 16105 }, { "epoch": 32.212, "grad_norm": 1.5956231355667114, "learning_rate": 2e-05, "loss": 0.05034843, "step": 16106 }, { "epoch": 32.214, "grad_norm": 1.0301272869110107, "learning_rate": 2e-05, "loss": 0.02942278, "step": 16107 }, { "epoch": 32.216, "grad_norm": 2.103224515914917, "learning_rate": 2e-05, "loss": 0.04764633, "step": 16108 }, { "epoch": 32.218, "grad_norm": 1.115855097770691, "learning_rate": 2e-05, "loss": 0.04809504, "step": 16109 }, { "epoch": 32.22, "grad_norm": 0.9842031002044678, "learning_rate": 2e-05, "loss": 0.02803292, "step": 16110 }, { "epoch": 32.222, "grad_norm": 1.1000885963439941, "learning_rate": 2e-05, "loss": 0.03887554, "step": 16111 }, { "epoch": 32.224, "grad_norm": 1.6421066522598267, "learning_rate": 2e-05, "loss": 0.04154354, "step": 16112 }, { "epoch": 32.226, "grad_norm": 0.9637966156005859, "learning_rate": 2e-05, "loss": 0.04104836, "step": 16113 }, { "epoch": 32.228, "grad_norm": 1.0460913181304932, "learning_rate": 2e-05, "loss": 0.0505436, "step": 16114 }, { "epoch": 32.23, "grad_norm": 1.3602415323257446, "learning_rate": 2e-05, "loss": 0.0564248, "step": 16115 }, { "epoch": 32.232, "grad_norm": 0.9343727231025696, "learning_rate": 2e-05, "loss": 0.0349865, "step": 16116 }, { "epoch": 32.234, "grad_norm": 0.8895676136016846, "learning_rate": 2e-05, "loss": 0.04362579, "step": 16117 }, { "epoch": 32.236, "grad_norm": 0.8997917771339417, "learning_rate": 2e-05, "loss": 0.03519147, "step": 16118 }, { "epoch": 32.238, "grad_norm": 0.9945343732833862, "learning_rate": 2e-05, "loss": 0.0340136, "step": 16119 }, { "epoch": 32.24, "grad_norm": 2.3583974838256836, "learning_rate": 2e-05, "loss": 0.04783948, "step": 16120 }, { "epoch": 32.242, "grad_norm": 0.853070080280304, "learning_rate": 2e-05, "loss": 0.02915358, "step": 16121 }, { "epoch": 32.244, "grad_norm": 1.319618582725525, "learning_rate": 2e-05, "loss": 0.04522935, "step": 16122 }, { "epoch": 32.246, "grad_norm": 1.7377943992614746, "learning_rate": 2e-05, "loss": 0.04493669, "step": 16123 }, { "epoch": 32.248, "grad_norm": 0.8464582562446594, "learning_rate": 2e-05, "loss": 0.0264312, "step": 16124 }, { "epoch": 32.25, "grad_norm": 0.7920047044754028, "learning_rate": 2e-05, "loss": 0.02892896, "step": 16125 }, { "epoch": 32.252, "grad_norm": 1.252237319946289, "learning_rate": 2e-05, "loss": 0.04152841, "step": 16126 }, { "epoch": 32.254, "grad_norm": 2.3109395503997803, "learning_rate": 2e-05, "loss": 0.04287978, "step": 16127 }, { "epoch": 32.256, "grad_norm": 0.8441155552864075, "learning_rate": 2e-05, "loss": 0.03464156, "step": 16128 }, { "epoch": 32.258, "grad_norm": 0.9774783253669739, "learning_rate": 2e-05, "loss": 0.04076412, "step": 16129 }, { "epoch": 32.26, "grad_norm": 1.0384986400604248, "learning_rate": 2e-05, "loss": 0.04245531, "step": 16130 }, { "epoch": 32.262, "grad_norm": 1.001991868019104, "learning_rate": 2e-05, "loss": 0.04159827, "step": 16131 }, { "epoch": 32.264, "grad_norm": 1.536611795425415, "learning_rate": 2e-05, "loss": 0.04512593, "step": 16132 }, { "epoch": 32.266, "grad_norm": 1.067681074142456, "learning_rate": 2e-05, "loss": 0.04142949, "step": 16133 }, { "epoch": 32.268, "grad_norm": 1.2170981168746948, "learning_rate": 2e-05, "loss": 0.04906242, "step": 16134 }, { "epoch": 32.27, "grad_norm": 0.9911013245582581, "learning_rate": 2e-05, "loss": 0.03261213, "step": 16135 }, { "epoch": 32.272, "grad_norm": 1.3202298879623413, "learning_rate": 2e-05, "loss": 0.03175102, "step": 16136 }, { "epoch": 32.274, "grad_norm": 1.990440845489502, "learning_rate": 2e-05, "loss": 0.04840795, "step": 16137 }, { "epoch": 32.276, "grad_norm": 1.2240030765533447, "learning_rate": 2e-05, "loss": 0.04696646, "step": 16138 }, { "epoch": 32.278, "grad_norm": 1.1625831127166748, "learning_rate": 2e-05, "loss": 0.05451063, "step": 16139 }, { "epoch": 32.28, "grad_norm": 1.0635885000228882, "learning_rate": 2e-05, "loss": 0.0548157, "step": 16140 }, { "epoch": 32.282, "grad_norm": 1.0855488777160645, "learning_rate": 2e-05, "loss": 0.04400775, "step": 16141 }, { "epoch": 32.284, "grad_norm": 1.2290327548980713, "learning_rate": 2e-05, "loss": 0.0579409, "step": 16142 }, { "epoch": 32.286, "grad_norm": 2.374091625213623, "learning_rate": 2e-05, "loss": 0.05313415, "step": 16143 }, { "epoch": 32.288, "grad_norm": 1.0269756317138672, "learning_rate": 2e-05, "loss": 0.0458583, "step": 16144 }, { "epoch": 32.29, "grad_norm": 0.9971767663955688, "learning_rate": 2e-05, "loss": 0.04700863, "step": 16145 }, { "epoch": 32.292, "grad_norm": 0.9480926394462585, "learning_rate": 2e-05, "loss": 0.03220262, "step": 16146 }, { "epoch": 32.294, "grad_norm": 1.2678351402282715, "learning_rate": 2e-05, "loss": 0.04739953, "step": 16147 }, { "epoch": 32.296, "grad_norm": 1.664489507675171, "learning_rate": 2e-05, "loss": 0.05218516, "step": 16148 }, { "epoch": 32.298, "grad_norm": 0.9204906821250916, "learning_rate": 2e-05, "loss": 0.03914509, "step": 16149 }, { "epoch": 32.3, "grad_norm": 2.669588327407837, "learning_rate": 2e-05, "loss": 0.0403178, "step": 16150 }, { "epoch": 32.302, "grad_norm": 1.400075078010559, "learning_rate": 2e-05, "loss": 0.07089913, "step": 16151 }, { "epoch": 32.304, "grad_norm": 1.0432827472686768, "learning_rate": 2e-05, "loss": 0.05564982, "step": 16152 }, { "epoch": 32.306, "grad_norm": 0.8763149380683899, "learning_rate": 2e-05, "loss": 0.03222255, "step": 16153 }, { "epoch": 32.308, "grad_norm": 1.1551921367645264, "learning_rate": 2e-05, "loss": 0.06010785, "step": 16154 }, { "epoch": 32.31, "grad_norm": 1.83162522315979, "learning_rate": 2e-05, "loss": 0.05497311, "step": 16155 }, { "epoch": 32.312, "grad_norm": 1.7800264358520508, "learning_rate": 2e-05, "loss": 0.06714081, "step": 16156 }, { "epoch": 32.314, "grad_norm": 1.1127432584762573, "learning_rate": 2e-05, "loss": 0.04245913, "step": 16157 }, { "epoch": 32.316, "grad_norm": 1.5372437238693237, "learning_rate": 2e-05, "loss": 0.0662888, "step": 16158 }, { "epoch": 32.318, "grad_norm": 1.2435904741287231, "learning_rate": 2e-05, "loss": 0.05517496, "step": 16159 }, { "epoch": 32.32, "grad_norm": 0.7930717468261719, "learning_rate": 2e-05, "loss": 0.02309351, "step": 16160 }, { "epoch": 32.322, "grad_norm": 1.0555087327957153, "learning_rate": 2e-05, "loss": 0.03615391, "step": 16161 }, { "epoch": 32.324, "grad_norm": 0.9631820321083069, "learning_rate": 2e-05, "loss": 0.04297973, "step": 16162 }, { "epoch": 32.326, "grad_norm": 0.9600966572761536, "learning_rate": 2e-05, "loss": 0.02753797, "step": 16163 }, { "epoch": 32.328, "grad_norm": 0.9929326772689819, "learning_rate": 2e-05, "loss": 0.04358477, "step": 16164 }, { "epoch": 32.33, "grad_norm": 0.9095787405967712, "learning_rate": 2e-05, "loss": 0.03732352, "step": 16165 }, { "epoch": 32.332, "grad_norm": 1.040367603302002, "learning_rate": 2e-05, "loss": 0.03997251, "step": 16166 }, { "epoch": 32.334, "grad_norm": 1.1411361694335938, "learning_rate": 2e-05, "loss": 0.04648264, "step": 16167 }, { "epoch": 32.336, "grad_norm": 0.8514032363891602, "learning_rate": 2e-05, "loss": 0.0308071, "step": 16168 }, { "epoch": 32.338, "grad_norm": 1.0284074544906616, "learning_rate": 2e-05, "loss": 0.04685637, "step": 16169 }, { "epoch": 32.34, "grad_norm": 1.4742350578308105, "learning_rate": 2e-05, "loss": 0.04580137, "step": 16170 }, { "epoch": 32.342, "grad_norm": 1.1794533729553223, "learning_rate": 2e-05, "loss": 0.03911822, "step": 16171 }, { "epoch": 32.344, "grad_norm": 1.5881638526916504, "learning_rate": 2e-05, "loss": 0.06021281, "step": 16172 }, { "epoch": 32.346, "grad_norm": 1.1822782754898071, "learning_rate": 2e-05, "loss": 0.04221109, "step": 16173 }, { "epoch": 32.348, "grad_norm": 2.015674114227295, "learning_rate": 2e-05, "loss": 0.04846906, "step": 16174 }, { "epoch": 32.35, "grad_norm": 0.8046813011169434, "learning_rate": 2e-05, "loss": 0.02578454, "step": 16175 }, { "epoch": 32.352, "grad_norm": 1.1389373540878296, "learning_rate": 2e-05, "loss": 0.04945101, "step": 16176 }, { "epoch": 32.354, "grad_norm": 1.140512466430664, "learning_rate": 2e-05, "loss": 0.03714588, "step": 16177 }, { "epoch": 32.356, "grad_norm": 1.353422999382019, "learning_rate": 2e-05, "loss": 0.0607155, "step": 16178 }, { "epoch": 32.358, "grad_norm": 1.0958099365234375, "learning_rate": 2e-05, "loss": 0.04654004, "step": 16179 }, { "epoch": 32.36, "grad_norm": 1.108579397201538, "learning_rate": 2e-05, "loss": 0.0298132, "step": 16180 }, { "epoch": 32.362, "grad_norm": 1.2370631694793701, "learning_rate": 2e-05, "loss": 0.04716839, "step": 16181 }, { "epoch": 32.364, "grad_norm": 0.9830132126808167, "learning_rate": 2e-05, "loss": 0.03581386, "step": 16182 }, { "epoch": 32.366, "grad_norm": 1.394779920578003, "learning_rate": 2e-05, "loss": 0.06361222, "step": 16183 }, { "epoch": 32.368, "grad_norm": 2.114030361175537, "learning_rate": 2e-05, "loss": 0.0591024, "step": 16184 }, { "epoch": 32.37, "grad_norm": 1.0256534814834595, "learning_rate": 2e-05, "loss": 0.04021797, "step": 16185 }, { "epoch": 32.372, "grad_norm": 0.8982040286064148, "learning_rate": 2e-05, "loss": 0.03349052, "step": 16186 }, { "epoch": 32.374, "grad_norm": 1.414779543876648, "learning_rate": 2e-05, "loss": 0.04881832, "step": 16187 }, { "epoch": 32.376, "grad_norm": 1.2019731998443604, "learning_rate": 2e-05, "loss": 0.04777313, "step": 16188 }, { "epoch": 32.378, "grad_norm": 1.3526569604873657, "learning_rate": 2e-05, "loss": 0.06108937, "step": 16189 }, { "epoch": 32.38, "grad_norm": 2.6972169876098633, "learning_rate": 2e-05, "loss": 0.05688487, "step": 16190 }, { "epoch": 32.382, "grad_norm": 1.276382327079773, "learning_rate": 2e-05, "loss": 0.04962563, "step": 16191 }, { "epoch": 32.384, "grad_norm": 1.0629853010177612, "learning_rate": 2e-05, "loss": 0.039207, "step": 16192 }, { "epoch": 32.386, "grad_norm": 0.9753513932228088, "learning_rate": 2e-05, "loss": 0.03970077, "step": 16193 }, { "epoch": 32.388, "grad_norm": 1.0859004259109497, "learning_rate": 2e-05, "loss": 0.05057412, "step": 16194 }, { "epoch": 32.39, "grad_norm": 1.1043413877487183, "learning_rate": 2e-05, "loss": 0.04748823, "step": 16195 }, { "epoch": 32.392, "grad_norm": 1.791853427886963, "learning_rate": 2e-05, "loss": 0.04063898, "step": 16196 }, { "epoch": 32.394, "grad_norm": 1.4518014192581177, "learning_rate": 2e-05, "loss": 0.06379384, "step": 16197 }, { "epoch": 32.396, "grad_norm": 1.4134232997894287, "learning_rate": 2e-05, "loss": 0.04738367, "step": 16198 }, { "epoch": 32.398, "grad_norm": 0.9420365691184998, "learning_rate": 2e-05, "loss": 0.03974089, "step": 16199 }, { "epoch": 32.4, "grad_norm": 1.284409761428833, "learning_rate": 2e-05, "loss": 0.04513875, "step": 16200 }, { "epoch": 32.402, "grad_norm": 1.1278762817382812, "learning_rate": 2e-05, "loss": 0.05390372, "step": 16201 }, { "epoch": 32.404, "grad_norm": 1.6105965375900269, "learning_rate": 2e-05, "loss": 0.05792791, "step": 16202 }, { "epoch": 32.406, "grad_norm": 0.7698038220405579, "learning_rate": 2e-05, "loss": 0.02571417, "step": 16203 }, { "epoch": 32.408, "grad_norm": 0.9933599829673767, "learning_rate": 2e-05, "loss": 0.04129467, "step": 16204 }, { "epoch": 32.41, "grad_norm": 1.1061280965805054, "learning_rate": 2e-05, "loss": 0.03714271, "step": 16205 }, { "epoch": 32.412, "grad_norm": 1.474578857421875, "learning_rate": 2e-05, "loss": 0.05044924, "step": 16206 }, { "epoch": 32.414, "grad_norm": 1.1528687477111816, "learning_rate": 2e-05, "loss": 0.05266282, "step": 16207 }, { "epoch": 32.416, "grad_norm": 1.0459516048431396, "learning_rate": 2e-05, "loss": 0.03959113, "step": 16208 }, { "epoch": 32.418, "grad_norm": 1.4648975133895874, "learning_rate": 2e-05, "loss": 0.0560421, "step": 16209 }, { "epoch": 32.42, "grad_norm": 1.1999322175979614, "learning_rate": 2e-05, "loss": 0.0468458, "step": 16210 }, { "epoch": 32.422, "grad_norm": 1.0842093229293823, "learning_rate": 2e-05, "loss": 0.05294051, "step": 16211 }, { "epoch": 32.424, "grad_norm": 3.429898738861084, "learning_rate": 2e-05, "loss": 0.05743803, "step": 16212 }, { "epoch": 32.426, "grad_norm": 1.0406116247177124, "learning_rate": 2e-05, "loss": 0.03883129, "step": 16213 }, { "epoch": 32.428, "grad_norm": 1.0052114725112915, "learning_rate": 2e-05, "loss": 0.03623196, "step": 16214 }, { "epoch": 32.43, "grad_norm": 0.9565922021865845, "learning_rate": 2e-05, "loss": 0.03119179, "step": 16215 }, { "epoch": 32.432, "grad_norm": 1.044499158859253, "learning_rate": 2e-05, "loss": 0.03987388, "step": 16216 }, { "epoch": 32.434, "grad_norm": 1.574156641960144, "learning_rate": 2e-05, "loss": 0.06443222, "step": 16217 }, { "epoch": 32.436, "grad_norm": 1.1535178422927856, "learning_rate": 2e-05, "loss": 0.03644914, "step": 16218 }, { "epoch": 32.438, "grad_norm": 1.0952578783035278, "learning_rate": 2e-05, "loss": 0.03878371, "step": 16219 }, { "epoch": 32.44, "grad_norm": 1.0223770141601562, "learning_rate": 2e-05, "loss": 0.04351723, "step": 16220 }, { "epoch": 32.442, "grad_norm": 2.734037160873413, "learning_rate": 2e-05, "loss": 0.04852162, "step": 16221 }, { "epoch": 32.444, "grad_norm": 2.2366368770599365, "learning_rate": 2e-05, "loss": 0.0498224, "step": 16222 }, { "epoch": 32.446, "grad_norm": 1.0357908010482788, "learning_rate": 2e-05, "loss": 0.03789012, "step": 16223 }, { "epoch": 32.448, "grad_norm": 1.2489087581634521, "learning_rate": 2e-05, "loss": 0.04120308, "step": 16224 }, { "epoch": 32.45, "grad_norm": 1.0485516786575317, "learning_rate": 2e-05, "loss": 0.04065135, "step": 16225 }, { "epoch": 32.452, "grad_norm": 1.1578280925750732, "learning_rate": 2e-05, "loss": 0.04651877, "step": 16226 }, { "epoch": 32.454, "grad_norm": 1.0170836448669434, "learning_rate": 2e-05, "loss": 0.03288697, "step": 16227 }, { "epoch": 32.456, "grad_norm": 1.3160803318023682, "learning_rate": 2e-05, "loss": 0.06229516, "step": 16228 }, { "epoch": 32.458, "grad_norm": 1.4435750246047974, "learning_rate": 2e-05, "loss": 0.04922429, "step": 16229 }, { "epoch": 32.46, "grad_norm": 1.8806605339050293, "learning_rate": 2e-05, "loss": 0.0416417, "step": 16230 }, { "epoch": 32.462, "grad_norm": 1.3576693534851074, "learning_rate": 2e-05, "loss": 0.03013571, "step": 16231 }, { "epoch": 32.464, "grad_norm": 1.6590790748596191, "learning_rate": 2e-05, "loss": 0.04374089, "step": 16232 }, { "epoch": 32.466, "grad_norm": 1.2796303033828735, "learning_rate": 2e-05, "loss": 0.04095548, "step": 16233 }, { "epoch": 32.468, "grad_norm": 0.8308750987052917, "learning_rate": 2e-05, "loss": 0.02793445, "step": 16234 }, { "epoch": 32.47, "grad_norm": 1.1069896221160889, "learning_rate": 2e-05, "loss": 0.0387296, "step": 16235 }, { "epoch": 32.472, "grad_norm": 1.0630885362625122, "learning_rate": 2e-05, "loss": 0.04539268, "step": 16236 }, { "epoch": 32.474, "grad_norm": 1.2754840850830078, "learning_rate": 2e-05, "loss": 0.03691139, "step": 16237 }, { "epoch": 32.476, "grad_norm": 1.356078028678894, "learning_rate": 2e-05, "loss": 0.04399051, "step": 16238 }, { "epoch": 32.478, "grad_norm": 1.1358963251113892, "learning_rate": 2e-05, "loss": 0.05365004, "step": 16239 }, { "epoch": 32.48, "grad_norm": 1.3925995826721191, "learning_rate": 2e-05, "loss": 0.04138825, "step": 16240 }, { "epoch": 32.482, "grad_norm": 0.9964032769203186, "learning_rate": 2e-05, "loss": 0.04154122, "step": 16241 }, { "epoch": 32.484, "grad_norm": 1.5699931383132935, "learning_rate": 2e-05, "loss": 0.04448958, "step": 16242 }, { "epoch": 32.486, "grad_norm": 1.0521963834762573, "learning_rate": 2e-05, "loss": 0.04356248, "step": 16243 }, { "epoch": 32.488, "grad_norm": 3.295790672302246, "learning_rate": 2e-05, "loss": 0.0389884, "step": 16244 }, { "epoch": 32.49, "grad_norm": 1.945090651512146, "learning_rate": 2e-05, "loss": 0.06051948, "step": 16245 }, { "epoch": 32.492, "grad_norm": 1.3952381610870361, "learning_rate": 2e-05, "loss": 0.05171783, "step": 16246 }, { "epoch": 32.494, "grad_norm": 1.0240063667297363, "learning_rate": 2e-05, "loss": 0.04166801, "step": 16247 }, { "epoch": 32.496, "grad_norm": 1.9945091009140015, "learning_rate": 2e-05, "loss": 0.03696847, "step": 16248 }, { "epoch": 32.498, "grad_norm": 1.1662887334823608, "learning_rate": 2e-05, "loss": 0.04546071, "step": 16249 }, { "epoch": 32.5, "grad_norm": 1.1431673765182495, "learning_rate": 2e-05, "loss": 0.04085463, "step": 16250 }, { "epoch": 32.502, "grad_norm": 1.4011180400848389, "learning_rate": 2e-05, "loss": 0.04097657, "step": 16251 }, { "epoch": 32.504, "grad_norm": 1.1913303136825562, "learning_rate": 2e-05, "loss": 0.05245316, "step": 16252 }, { "epoch": 32.506, "grad_norm": 1.17953622341156, "learning_rate": 2e-05, "loss": 0.02878503, "step": 16253 }, { "epoch": 32.508, "grad_norm": 1.2890692949295044, "learning_rate": 2e-05, "loss": 0.0533168, "step": 16254 }, { "epoch": 32.51, "grad_norm": 1.1801269054412842, "learning_rate": 2e-05, "loss": 0.03159849, "step": 16255 }, { "epoch": 32.512, "grad_norm": 0.920243501663208, "learning_rate": 2e-05, "loss": 0.02960003, "step": 16256 }, { "epoch": 32.514, "grad_norm": 1.1037468910217285, "learning_rate": 2e-05, "loss": 0.04297595, "step": 16257 }, { "epoch": 32.516, "grad_norm": 1.1111853122711182, "learning_rate": 2e-05, "loss": 0.0475248, "step": 16258 }, { "epoch": 32.518, "grad_norm": 1.081834316253662, "learning_rate": 2e-05, "loss": 0.03885624, "step": 16259 }, { "epoch": 32.52, "grad_norm": 2.0016655921936035, "learning_rate": 2e-05, "loss": 0.05810552, "step": 16260 }, { "epoch": 32.522, "grad_norm": 0.9334720969200134, "learning_rate": 2e-05, "loss": 0.03868662, "step": 16261 }, { "epoch": 32.524, "grad_norm": 1.2165385484695435, "learning_rate": 2e-05, "loss": 0.06041864, "step": 16262 }, { "epoch": 32.526, "grad_norm": 1.1212373971939087, "learning_rate": 2e-05, "loss": 0.04545804, "step": 16263 }, { "epoch": 32.528, "grad_norm": 1.348405361175537, "learning_rate": 2e-05, "loss": 0.06695837, "step": 16264 }, { "epoch": 32.53, "grad_norm": 1.1398199796676636, "learning_rate": 2e-05, "loss": 0.05199791, "step": 16265 }, { "epoch": 32.532, "grad_norm": 1.1829419136047363, "learning_rate": 2e-05, "loss": 0.03969724, "step": 16266 }, { "epoch": 32.534, "grad_norm": 1.148137092590332, "learning_rate": 2e-05, "loss": 0.04542587, "step": 16267 }, { "epoch": 32.536, "grad_norm": 0.9357627630233765, "learning_rate": 2e-05, "loss": 0.04507944, "step": 16268 }, { "epoch": 32.538, "grad_norm": 1.3121267557144165, "learning_rate": 2e-05, "loss": 0.04835391, "step": 16269 }, { "epoch": 32.54, "grad_norm": 2.0285484790802, "learning_rate": 2e-05, "loss": 0.05088212, "step": 16270 }, { "epoch": 32.542, "grad_norm": 2.0232818126678467, "learning_rate": 2e-05, "loss": 0.05421069, "step": 16271 }, { "epoch": 32.544, "grad_norm": 1.3026111125946045, "learning_rate": 2e-05, "loss": 0.06596977, "step": 16272 }, { "epoch": 32.546, "grad_norm": 1.095878005027771, "learning_rate": 2e-05, "loss": 0.05185566, "step": 16273 }, { "epoch": 32.548, "grad_norm": 1.0548791885375977, "learning_rate": 2e-05, "loss": 0.04696474, "step": 16274 }, { "epoch": 32.55, "grad_norm": 2.2554283142089844, "learning_rate": 2e-05, "loss": 0.04388547, "step": 16275 }, { "epoch": 32.552, "grad_norm": 0.984747052192688, "learning_rate": 2e-05, "loss": 0.04554465, "step": 16276 }, { "epoch": 32.554, "grad_norm": 1.0528199672698975, "learning_rate": 2e-05, "loss": 0.04237689, "step": 16277 }, { "epoch": 32.556, "grad_norm": 1.716301679611206, "learning_rate": 2e-05, "loss": 0.05334918, "step": 16278 }, { "epoch": 32.558, "grad_norm": 1.8568881750106812, "learning_rate": 2e-05, "loss": 0.05312324, "step": 16279 }, { "epoch": 32.56, "grad_norm": 1.431077003479004, "learning_rate": 2e-05, "loss": 0.03273985, "step": 16280 }, { "epoch": 32.562, "grad_norm": 1.1652774810791016, "learning_rate": 2e-05, "loss": 0.04368386, "step": 16281 }, { "epoch": 32.564, "grad_norm": 1.3321254253387451, "learning_rate": 2e-05, "loss": 0.0367992, "step": 16282 }, { "epoch": 32.566, "grad_norm": 0.8953471779823303, "learning_rate": 2e-05, "loss": 0.04237193, "step": 16283 }, { "epoch": 32.568, "grad_norm": 0.9668625593185425, "learning_rate": 2e-05, "loss": 0.03793396, "step": 16284 }, { "epoch": 32.57, "grad_norm": 1.3684470653533936, "learning_rate": 2e-05, "loss": 0.04976954, "step": 16285 }, { "epoch": 32.572, "grad_norm": 0.8641055822372437, "learning_rate": 2e-05, "loss": 0.03733483, "step": 16286 }, { "epoch": 32.574, "grad_norm": 1.9221665859222412, "learning_rate": 2e-05, "loss": 0.06496726, "step": 16287 }, { "epoch": 32.576, "grad_norm": 1.084017038345337, "learning_rate": 2e-05, "loss": 0.0537901, "step": 16288 }, { "epoch": 32.578, "grad_norm": 1.3254586458206177, "learning_rate": 2e-05, "loss": 0.05101616, "step": 16289 }, { "epoch": 32.58, "grad_norm": 0.9048498272895813, "learning_rate": 2e-05, "loss": 0.02791585, "step": 16290 }, { "epoch": 32.582, "grad_norm": 0.8707861304283142, "learning_rate": 2e-05, "loss": 0.03403701, "step": 16291 }, { "epoch": 32.584, "grad_norm": 1.250246286392212, "learning_rate": 2e-05, "loss": 0.06114908, "step": 16292 }, { "epoch": 32.586, "grad_norm": 0.9462719559669495, "learning_rate": 2e-05, "loss": 0.03526659, "step": 16293 }, { "epoch": 32.588, "grad_norm": 1.5564969778060913, "learning_rate": 2e-05, "loss": 0.04473447, "step": 16294 }, { "epoch": 32.59, "grad_norm": 1.1885018348693848, "learning_rate": 2e-05, "loss": 0.04804235, "step": 16295 }, { "epoch": 32.592, "grad_norm": 2.263399600982666, "learning_rate": 2e-05, "loss": 0.05261849, "step": 16296 }, { "epoch": 32.594, "grad_norm": 1.6016762256622314, "learning_rate": 2e-05, "loss": 0.05311537, "step": 16297 }, { "epoch": 32.596, "grad_norm": 0.955118715763092, "learning_rate": 2e-05, "loss": 0.03513832, "step": 16298 }, { "epoch": 32.598, "grad_norm": 1.0069520473480225, "learning_rate": 2e-05, "loss": 0.03814769, "step": 16299 }, { "epoch": 32.6, "grad_norm": 1.0309523344039917, "learning_rate": 2e-05, "loss": 0.03411955, "step": 16300 }, { "epoch": 32.602, "grad_norm": 1.063146948814392, "learning_rate": 2e-05, "loss": 0.04516539, "step": 16301 }, { "epoch": 32.604, "grad_norm": 1.8066554069519043, "learning_rate": 2e-05, "loss": 0.05612845, "step": 16302 }, { "epoch": 32.606, "grad_norm": 2.081737518310547, "learning_rate": 2e-05, "loss": 0.05316564, "step": 16303 }, { "epoch": 32.608, "grad_norm": 1.4410886764526367, "learning_rate": 2e-05, "loss": 0.03002356, "step": 16304 }, { "epoch": 32.61, "grad_norm": 1.1499985456466675, "learning_rate": 2e-05, "loss": 0.03965862, "step": 16305 }, { "epoch": 32.612, "grad_norm": 1.0387705564498901, "learning_rate": 2e-05, "loss": 0.03451302, "step": 16306 }, { "epoch": 32.614, "grad_norm": 1.291050672531128, "learning_rate": 2e-05, "loss": 0.04642691, "step": 16307 }, { "epoch": 32.616, "grad_norm": 1.1157009601593018, "learning_rate": 2e-05, "loss": 0.04432426, "step": 16308 }, { "epoch": 32.618, "grad_norm": 1.8771175146102905, "learning_rate": 2e-05, "loss": 0.05904584, "step": 16309 }, { "epoch": 32.62, "grad_norm": 1.189979076385498, "learning_rate": 2e-05, "loss": 0.03692175, "step": 16310 }, { "epoch": 32.622, "grad_norm": 1.0707104206085205, "learning_rate": 2e-05, "loss": 0.0449913, "step": 16311 }, { "epoch": 32.624, "grad_norm": 1.0123356580734253, "learning_rate": 2e-05, "loss": 0.05037958, "step": 16312 }, { "epoch": 32.626, "grad_norm": 1.0081778764724731, "learning_rate": 2e-05, "loss": 0.03349006, "step": 16313 }, { "epoch": 32.628, "grad_norm": 1.1643072366714478, "learning_rate": 2e-05, "loss": 0.05371416, "step": 16314 }, { "epoch": 32.63, "grad_norm": 1.0919042825698853, "learning_rate": 2e-05, "loss": 0.03882549, "step": 16315 }, { "epoch": 32.632, "grad_norm": 1.0387927293777466, "learning_rate": 2e-05, "loss": 0.03141912, "step": 16316 }, { "epoch": 32.634, "grad_norm": 0.9459293484687805, "learning_rate": 2e-05, "loss": 0.03755194, "step": 16317 }, { "epoch": 32.636, "grad_norm": 0.9938026666641235, "learning_rate": 2e-05, "loss": 0.04253921, "step": 16318 }, { "epoch": 32.638, "grad_norm": 1.7219102382659912, "learning_rate": 2e-05, "loss": 0.05450426, "step": 16319 }, { "epoch": 32.64, "grad_norm": 1.4139060974121094, "learning_rate": 2e-05, "loss": 0.0409883, "step": 16320 }, { "epoch": 32.642, "grad_norm": 1.0332183837890625, "learning_rate": 2e-05, "loss": 0.04064386, "step": 16321 }, { "epoch": 32.644, "grad_norm": 2.226306200027466, "learning_rate": 2e-05, "loss": 0.05300971, "step": 16322 }, { "epoch": 32.646, "grad_norm": 0.9938848614692688, "learning_rate": 2e-05, "loss": 0.05232156, "step": 16323 }, { "epoch": 32.648, "grad_norm": 0.9475815296173096, "learning_rate": 2e-05, "loss": 0.03965949, "step": 16324 }, { "epoch": 32.65, "grad_norm": 1.1354097127914429, "learning_rate": 2e-05, "loss": 0.04621927, "step": 16325 }, { "epoch": 32.652, "grad_norm": 0.9696798920631409, "learning_rate": 2e-05, "loss": 0.03465438, "step": 16326 }, { "epoch": 32.654, "grad_norm": 2.5035712718963623, "learning_rate": 2e-05, "loss": 0.04046266, "step": 16327 }, { "epoch": 32.656, "grad_norm": 1.0952516794204712, "learning_rate": 2e-05, "loss": 0.03977197, "step": 16328 }, { "epoch": 32.658, "grad_norm": 2.281114339828491, "learning_rate": 2e-05, "loss": 0.05332372, "step": 16329 }, { "epoch": 32.66, "grad_norm": 1.2734099626541138, "learning_rate": 2e-05, "loss": 0.05778461, "step": 16330 }, { "epoch": 32.662, "grad_norm": 1.1110118627548218, "learning_rate": 2e-05, "loss": 0.04580304, "step": 16331 }, { "epoch": 32.664, "grad_norm": 1.0556721687316895, "learning_rate": 2e-05, "loss": 0.03761445, "step": 16332 }, { "epoch": 32.666, "grad_norm": 1.2698131799697876, "learning_rate": 2e-05, "loss": 0.04783428, "step": 16333 }, { "epoch": 32.668, "grad_norm": 0.9815773963928223, "learning_rate": 2e-05, "loss": 0.04001576, "step": 16334 }, { "epoch": 32.67, "grad_norm": 0.9906237125396729, "learning_rate": 2e-05, "loss": 0.03611821, "step": 16335 }, { "epoch": 32.672, "grad_norm": 2.327683448791504, "learning_rate": 2e-05, "loss": 0.04943229, "step": 16336 }, { "epoch": 32.674, "grad_norm": 1.0837647914886475, "learning_rate": 2e-05, "loss": 0.04108093, "step": 16337 }, { "epoch": 32.676, "grad_norm": 2.258039712905884, "learning_rate": 2e-05, "loss": 0.05548419, "step": 16338 }, { "epoch": 32.678, "grad_norm": 2.8477838039398193, "learning_rate": 2e-05, "loss": 0.05341278, "step": 16339 }, { "epoch": 32.68, "grad_norm": 1.095145344734192, "learning_rate": 2e-05, "loss": 0.03467471, "step": 16340 }, { "epoch": 32.682, "grad_norm": 1.28204345703125, "learning_rate": 2e-05, "loss": 0.05985833, "step": 16341 }, { "epoch": 32.684, "grad_norm": 1.0193347930908203, "learning_rate": 2e-05, "loss": 0.04455447, "step": 16342 }, { "epoch": 32.686, "grad_norm": 0.9091442823410034, "learning_rate": 2e-05, "loss": 0.03512418, "step": 16343 }, { "epoch": 32.688, "grad_norm": 1.1163957118988037, "learning_rate": 2e-05, "loss": 0.04635857, "step": 16344 }, { "epoch": 32.69, "grad_norm": 1.3926900625228882, "learning_rate": 2e-05, "loss": 0.06795005, "step": 16345 }, { "epoch": 32.692, "grad_norm": 1.1602072715759277, "learning_rate": 2e-05, "loss": 0.04930188, "step": 16346 }, { "epoch": 32.694, "grad_norm": 1.3942979574203491, "learning_rate": 2e-05, "loss": 0.0321093, "step": 16347 }, { "epoch": 32.696, "grad_norm": 0.9655115008354187, "learning_rate": 2e-05, "loss": 0.03636239, "step": 16348 }, { "epoch": 32.698, "grad_norm": 1.8497449159622192, "learning_rate": 2e-05, "loss": 0.04188688, "step": 16349 }, { "epoch": 32.7, "grad_norm": 1.2416372299194336, "learning_rate": 2e-05, "loss": 0.04306786, "step": 16350 }, { "epoch": 32.702, "grad_norm": 1.1872172355651855, "learning_rate": 2e-05, "loss": 0.05371705, "step": 16351 }, { "epoch": 32.704, "grad_norm": 2.017808675765991, "learning_rate": 2e-05, "loss": 0.0361925, "step": 16352 }, { "epoch": 32.706, "grad_norm": 1.5361605882644653, "learning_rate": 2e-05, "loss": 0.06136606, "step": 16353 }, { "epoch": 32.708, "grad_norm": 1.2719402313232422, "learning_rate": 2e-05, "loss": 0.04378556, "step": 16354 }, { "epoch": 32.71, "grad_norm": 1.5629328489303589, "learning_rate": 2e-05, "loss": 0.04726329, "step": 16355 }, { "epoch": 32.712, "grad_norm": 1.218009114265442, "learning_rate": 2e-05, "loss": 0.04252596, "step": 16356 }, { "epoch": 32.714, "grad_norm": 1.5036392211914062, "learning_rate": 2e-05, "loss": 0.05147304, "step": 16357 }, { "epoch": 32.716, "grad_norm": 1.2819830179214478, "learning_rate": 2e-05, "loss": 0.05303483, "step": 16358 }, { "epoch": 32.718, "grad_norm": 2.23203706741333, "learning_rate": 2e-05, "loss": 0.04342014, "step": 16359 }, { "epoch": 32.72, "grad_norm": 1.4196090698242188, "learning_rate": 2e-05, "loss": 0.04446263, "step": 16360 }, { "epoch": 32.722, "grad_norm": 1.192689061164856, "learning_rate": 2e-05, "loss": 0.04249515, "step": 16361 }, { "epoch": 32.724, "grad_norm": 1.229402780532837, "learning_rate": 2e-05, "loss": 0.04547435, "step": 16362 }, { "epoch": 32.726, "grad_norm": 0.9871392846107483, "learning_rate": 2e-05, "loss": 0.03445245, "step": 16363 }, { "epoch": 32.728, "grad_norm": 0.940700888633728, "learning_rate": 2e-05, "loss": 0.03357405, "step": 16364 }, { "epoch": 32.73, "grad_norm": 1.7767146825790405, "learning_rate": 2e-05, "loss": 0.04568103, "step": 16365 }, { "epoch": 32.732, "grad_norm": 1.0970723628997803, "learning_rate": 2e-05, "loss": 0.04344926, "step": 16366 }, { "epoch": 32.734, "grad_norm": 1.554121732711792, "learning_rate": 2e-05, "loss": 0.04905734, "step": 16367 }, { "epoch": 32.736, "grad_norm": 2.302638053894043, "learning_rate": 2e-05, "loss": 0.05685548, "step": 16368 }, { "epoch": 32.738, "grad_norm": 1.5561833381652832, "learning_rate": 2e-05, "loss": 0.051638, "step": 16369 }, { "epoch": 32.74, "grad_norm": 1.4446285963058472, "learning_rate": 2e-05, "loss": 0.05237379, "step": 16370 }, { "epoch": 32.742, "grad_norm": 1.0939897298812866, "learning_rate": 2e-05, "loss": 0.04528828, "step": 16371 }, { "epoch": 32.744, "grad_norm": 1.0667661428451538, "learning_rate": 2e-05, "loss": 0.05729897, "step": 16372 }, { "epoch": 32.746, "grad_norm": 1.5616623163223267, "learning_rate": 2e-05, "loss": 0.07154126, "step": 16373 }, { "epoch": 32.748, "grad_norm": 1.059200644493103, "learning_rate": 2e-05, "loss": 0.04375971, "step": 16374 }, { "epoch": 32.75, "grad_norm": 1.1296769380569458, "learning_rate": 2e-05, "loss": 0.04273132, "step": 16375 }, { "epoch": 32.752, "grad_norm": 1.0726172924041748, "learning_rate": 2e-05, "loss": 0.04483677, "step": 16376 }, { "epoch": 32.754, "grad_norm": 1.0706665515899658, "learning_rate": 2e-05, "loss": 0.0367687, "step": 16377 }, { "epoch": 32.756, "grad_norm": 1.4981821775436401, "learning_rate": 2e-05, "loss": 0.0449388, "step": 16378 }, { "epoch": 32.758, "grad_norm": 1.2846806049346924, "learning_rate": 2e-05, "loss": 0.05101033, "step": 16379 }, { "epoch": 32.76, "grad_norm": 0.9525061249732971, "learning_rate": 2e-05, "loss": 0.0405778, "step": 16380 }, { "epoch": 32.762, "grad_norm": 1.7515175342559814, "learning_rate": 2e-05, "loss": 0.04970495, "step": 16381 }, { "epoch": 32.764, "grad_norm": 1.0568068027496338, "learning_rate": 2e-05, "loss": 0.0454537, "step": 16382 }, { "epoch": 32.766, "grad_norm": 2.070103168487549, "learning_rate": 2e-05, "loss": 0.04984166, "step": 16383 }, { "epoch": 32.768, "grad_norm": 1.2946271896362305, "learning_rate": 2e-05, "loss": 0.03644682, "step": 16384 }, { "epoch": 32.77, "grad_norm": 1.2592251300811768, "learning_rate": 2e-05, "loss": 0.05145838, "step": 16385 }, { "epoch": 32.772, "grad_norm": 1.557255506515503, "learning_rate": 2e-05, "loss": 0.04418119, "step": 16386 }, { "epoch": 32.774, "grad_norm": 1.2124803066253662, "learning_rate": 2e-05, "loss": 0.05006649, "step": 16387 }, { "epoch": 32.776, "grad_norm": 1.4191831350326538, "learning_rate": 2e-05, "loss": 0.04611137, "step": 16388 }, { "epoch": 32.778, "grad_norm": 0.8476065993309021, "learning_rate": 2e-05, "loss": 0.02913493, "step": 16389 }, { "epoch": 32.78, "grad_norm": 1.0639674663543701, "learning_rate": 2e-05, "loss": 0.04659071, "step": 16390 }, { "epoch": 32.782, "grad_norm": 1.1053870916366577, "learning_rate": 2e-05, "loss": 0.04689553, "step": 16391 }, { "epoch": 32.784, "grad_norm": 1.0523080825805664, "learning_rate": 2e-05, "loss": 0.03766502, "step": 16392 }, { "epoch": 32.786, "grad_norm": 1.2587509155273438, "learning_rate": 2e-05, "loss": 0.05685302, "step": 16393 }, { "epoch": 32.788, "grad_norm": 1.7399545907974243, "learning_rate": 2e-05, "loss": 0.03251291, "step": 16394 }, { "epoch": 32.79, "grad_norm": 1.2036712169647217, "learning_rate": 2e-05, "loss": 0.04034231, "step": 16395 }, { "epoch": 32.792, "grad_norm": 1.280685305595398, "learning_rate": 2e-05, "loss": 0.06298284, "step": 16396 }, { "epoch": 32.794, "grad_norm": 1.4318487644195557, "learning_rate": 2e-05, "loss": 0.05840269, "step": 16397 }, { "epoch": 32.796, "grad_norm": 1.2964693307876587, "learning_rate": 2e-05, "loss": 0.03987563, "step": 16398 }, { "epoch": 32.798, "grad_norm": 1.6009737253189087, "learning_rate": 2e-05, "loss": 0.03669723, "step": 16399 }, { "epoch": 32.8, "grad_norm": 0.9451805353164673, "learning_rate": 2e-05, "loss": 0.04146789, "step": 16400 }, { "epoch": 32.802, "grad_norm": 1.3097028732299805, "learning_rate": 2e-05, "loss": 0.04831776, "step": 16401 }, { "epoch": 32.804, "grad_norm": 1.2059910297393799, "learning_rate": 2e-05, "loss": 0.04807359, "step": 16402 }, { "epoch": 32.806, "grad_norm": 1.3856134414672852, "learning_rate": 2e-05, "loss": 0.03569076, "step": 16403 }, { "epoch": 32.808, "grad_norm": 1.09983229637146, "learning_rate": 2e-05, "loss": 0.0418646, "step": 16404 }, { "epoch": 32.81, "grad_norm": 1.2396420240402222, "learning_rate": 2e-05, "loss": 0.04677983, "step": 16405 }, { "epoch": 32.812, "grad_norm": 1.0248969793319702, "learning_rate": 2e-05, "loss": 0.03576694, "step": 16406 }, { "epoch": 32.814, "grad_norm": 1.1126916408538818, "learning_rate": 2e-05, "loss": 0.04777541, "step": 16407 }, { "epoch": 32.816, "grad_norm": 1.615116834640503, "learning_rate": 2e-05, "loss": 0.06528717, "step": 16408 }, { "epoch": 32.818, "grad_norm": 0.91813725233078, "learning_rate": 2e-05, "loss": 0.03679512, "step": 16409 }, { "epoch": 32.82, "grad_norm": 0.9300145506858826, "learning_rate": 2e-05, "loss": 0.03622337, "step": 16410 }, { "epoch": 32.822, "grad_norm": 1.0304036140441895, "learning_rate": 2e-05, "loss": 0.0311872, "step": 16411 }, { "epoch": 32.824, "grad_norm": 0.9967870116233826, "learning_rate": 2e-05, "loss": 0.04553411, "step": 16412 }, { "epoch": 32.826, "grad_norm": 1.560076355934143, "learning_rate": 2e-05, "loss": 0.03436767, "step": 16413 }, { "epoch": 32.828, "grad_norm": 0.8006979823112488, "learning_rate": 2e-05, "loss": 0.02602676, "step": 16414 }, { "epoch": 32.83, "grad_norm": 1.0074442625045776, "learning_rate": 2e-05, "loss": 0.04613006, "step": 16415 }, { "epoch": 32.832, "grad_norm": 1.3419992923736572, "learning_rate": 2e-05, "loss": 0.05372937, "step": 16416 }, { "epoch": 32.834, "grad_norm": 1.0680981874465942, "learning_rate": 2e-05, "loss": 0.03514516, "step": 16417 }, { "epoch": 32.836, "grad_norm": 1.0536341667175293, "learning_rate": 2e-05, "loss": 0.04480874, "step": 16418 }, { "epoch": 32.838, "grad_norm": 0.8839556574821472, "learning_rate": 2e-05, "loss": 0.03141466, "step": 16419 }, { "epoch": 32.84, "grad_norm": 1.003722071647644, "learning_rate": 2e-05, "loss": 0.03807032, "step": 16420 }, { "epoch": 32.842, "grad_norm": 1.013817310333252, "learning_rate": 2e-05, "loss": 0.03793994, "step": 16421 }, { "epoch": 32.844, "grad_norm": 0.9921594262123108, "learning_rate": 2e-05, "loss": 0.0329622, "step": 16422 }, { "epoch": 32.846, "grad_norm": 1.3013228178024292, "learning_rate": 2e-05, "loss": 0.05682348, "step": 16423 }, { "epoch": 32.848, "grad_norm": 2.449519157409668, "learning_rate": 2e-05, "loss": 0.05360094, "step": 16424 }, { "epoch": 32.85, "grad_norm": 1.0672171115875244, "learning_rate": 2e-05, "loss": 0.04411232, "step": 16425 }, { "epoch": 32.852, "grad_norm": 1.0839647054672241, "learning_rate": 2e-05, "loss": 0.03874303, "step": 16426 }, { "epoch": 32.854, "grad_norm": 1.2687973976135254, "learning_rate": 2e-05, "loss": 0.03958414, "step": 16427 }, { "epoch": 32.856, "grad_norm": 0.9840965867042542, "learning_rate": 2e-05, "loss": 0.03927713, "step": 16428 }, { "epoch": 32.858, "grad_norm": 1.281537413597107, "learning_rate": 2e-05, "loss": 0.05566413, "step": 16429 }, { "epoch": 32.86, "grad_norm": 1.012905240058899, "learning_rate": 2e-05, "loss": 0.04881471, "step": 16430 }, { "epoch": 32.862, "grad_norm": 0.9168137311935425, "learning_rate": 2e-05, "loss": 0.0294755, "step": 16431 }, { "epoch": 32.864, "grad_norm": 1.1158313751220703, "learning_rate": 2e-05, "loss": 0.03272557, "step": 16432 }, { "epoch": 32.866, "grad_norm": 1.176352620124817, "learning_rate": 2e-05, "loss": 0.04293972, "step": 16433 }, { "epoch": 32.868, "grad_norm": 0.95728999376297, "learning_rate": 2e-05, "loss": 0.0446701, "step": 16434 }, { "epoch": 32.87, "grad_norm": 1.6264740228652954, "learning_rate": 2e-05, "loss": 0.04587853, "step": 16435 }, { "epoch": 32.872, "grad_norm": 1.2280687093734741, "learning_rate": 2e-05, "loss": 0.05692969, "step": 16436 }, { "epoch": 32.874, "grad_norm": 1.1896427869796753, "learning_rate": 2e-05, "loss": 0.05105351, "step": 16437 }, { "epoch": 32.876, "grad_norm": 1.1344528198242188, "learning_rate": 2e-05, "loss": 0.05192492, "step": 16438 }, { "epoch": 32.878, "grad_norm": 2.921778678894043, "learning_rate": 2e-05, "loss": 0.06294307, "step": 16439 }, { "epoch": 32.88, "grad_norm": 1.4751189947128296, "learning_rate": 2e-05, "loss": 0.04908253, "step": 16440 }, { "epoch": 32.882, "grad_norm": 1.8473886251449585, "learning_rate": 2e-05, "loss": 0.05383722, "step": 16441 }, { "epoch": 32.884, "grad_norm": 1.7660833597183228, "learning_rate": 2e-05, "loss": 0.03391756, "step": 16442 }, { "epoch": 32.886, "grad_norm": 1.4730424880981445, "learning_rate": 2e-05, "loss": 0.04530013, "step": 16443 }, { "epoch": 32.888, "grad_norm": 0.9206303954124451, "learning_rate": 2e-05, "loss": 0.03124309, "step": 16444 }, { "epoch": 32.89, "grad_norm": 0.8322934508323669, "learning_rate": 2e-05, "loss": 0.03473114, "step": 16445 }, { "epoch": 32.892, "grad_norm": 1.1998991966247559, "learning_rate": 2e-05, "loss": 0.05920854, "step": 16446 }, { "epoch": 32.894, "grad_norm": 1.0595715045928955, "learning_rate": 2e-05, "loss": 0.0335474, "step": 16447 }, { "epoch": 32.896, "grad_norm": 1.1446808576583862, "learning_rate": 2e-05, "loss": 0.04144682, "step": 16448 }, { "epoch": 32.898, "grad_norm": 0.9624278545379639, "learning_rate": 2e-05, "loss": 0.04155927, "step": 16449 }, { "epoch": 32.9, "grad_norm": 0.9550600647926331, "learning_rate": 2e-05, "loss": 0.0359564, "step": 16450 }, { "epoch": 32.902, "grad_norm": 1.3171793222427368, "learning_rate": 2e-05, "loss": 0.04967549, "step": 16451 }, { "epoch": 32.904, "grad_norm": 1.2323284149169922, "learning_rate": 2e-05, "loss": 0.04720514, "step": 16452 }, { "epoch": 32.906, "grad_norm": 0.8751348257064819, "learning_rate": 2e-05, "loss": 0.03478843, "step": 16453 }, { "epoch": 32.908, "grad_norm": 0.9509841203689575, "learning_rate": 2e-05, "loss": 0.02845589, "step": 16454 }, { "epoch": 32.91, "grad_norm": 0.9622454047203064, "learning_rate": 2e-05, "loss": 0.03686379, "step": 16455 }, { "epoch": 32.912, "grad_norm": 1.2011555433273315, "learning_rate": 2e-05, "loss": 0.05269288, "step": 16456 }, { "epoch": 32.914, "grad_norm": 1.1490558385849, "learning_rate": 2e-05, "loss": 0.04453009, "step": 16457 }, { "epoch": 32.916, "grad_norm": 1.0042850971221924, "learning_rate": 2e-05, "loss": 0.04883328, "step": 16458 }, { "epoch": 32.918, "grad_norm": 1.234962821006775, "learning_rate": 2e-05, "loss": 0.04242648, "step": 16459 }, { "epoch": 32.92, "grad_norm": 1.0605660676956177, "learning_rate": 2e-05, "loss": 0.04353252, "step": 16460 }, { "epoch": 32.922, "grad_norm": 1.0785400867462158, "learning_rate": 2e-05, "loss": 0.04832813, "step": 16461 }, { "epoch": 32.924, "grad_norm": 1.1029609441757202, "learning_rate": 2e-05, "loss": 0.02630161, "step": 16462 }, { "epoch": 32.926, "grad_norm": 1.1941431760787964, "learning_rate": 2e-05, "loss": 0.05544811, "step": 16463 }, { "epoch": 32.928, "grad_norm": 2.294684648513794, "learning_rate": 2e-05, "loss": 0.05932409, "step": 16464 }, { "epoch": 32.93, "grad_norm": 2.2714035511016846, "learning_rate": 2e-05, "loss": 0.08028505, "step": 16465 }, { "epoch": 32.932, "grad_norm": 1.1306442022323608, "learning_rate": 2e-05, "loss": 0.04428364, "step": 16466 }, { "epoch": 32.934, "grad_norm": 2.8830254077911377, "learning_rate": 2e-05, "loss": 0.06098013, "step": 16467 }, { "epoch": 32.936, "grad_norm": 1.1790919303894043, "learning_rate": 2e-05, "loss": 0.03695724, "step": 16468 }, { "epoch": 32.938, "grad_norm": 1.8275067806243896, "learning_rate": 2e-05, "loss": 0.04843022, "step": 16469 }, { "epoch": 32.94, "grad_norm": 1.1185084581375122, "learning_rate": 2e-05, "loss": 0.04931872, "step": 16470 }, { "epoch": 32.942, "grad_norm": 2.2257957458496094, "learning_rate": 2e-05, "loss": 0.0590531, "step": 16471 }, { "epoch": 32.944, "grad_norm": 1.2542955875396729, "learning_rate": 2e-05, "loss": 0.04654231, "step": 16472 }, { "epoch": 32.946, "grad_norm": 1.2641464471817017, "learning_rate": 2e-05, "loss": 0.04473643, "step": 16473 }, { "epoch": 32.948, "grad_norm": 1.1596637964248657, "learning_rate": 2e-05, "loss": 0.04330778, "step": 16474 }, { "epoch": 32.95, "grad_norm": 1.2354049682617188, "learning_rate": 2e-05, "loss": 0.0426512, "step": 16475 }, { "epoch": 32.952, "grad_norm": 2.066526174545288, "learning_rate": 2e-05, "loss": 0.07357596, "step": 16476 }, { "epoch": 32.954, "grad_norm": 0.9862046837806702, "learning_rate": 2e-05, "loss": 0.03118996, "step": 16477 }, { "epoch": 32.956, "grad_norm": 1.1311702728271484, "learning_rate": 2e-05, "loss": 0.04690422, "step": 16478 }, { "epoch": 32.958, "grad_norm": 1.133905053138733, "learning_rate": 2e-05, "loss": 0.04565517, "step": 16479 }, { "epoch": 32.96, "grad_norm": 3.211580991744995, "learning_rate": 2e-05, "loss": 0.0586502, "step": 16480 }, { "epoch": 32.962, "grad_norm": 1.1017708778381348, "learning_rate": 2e-05, "loss": 0.05424349, "step": 16481 }, { "epoch": 32.964, "grad_norm": 1.0407196283340454, "learning_rate": 2e-05, "loss": 0.04288088, "step": 16482 }, { "epoch": 32.966, "grad_norm": 0.888372004032135, "learning_rate": 2e-05, "loss": 0.0414963, "step": 16483 }, { "epoch": 32.968, "grad_norm": 0.9764722585678101, "learning_rate": 2e-05, "loss": 0.0456218, "step": 16484 }, { "epoch": 32.97, "grad_norm": 1.389668345451355, "learning_rate": 2e-05, "loss": 0.03466986, "step": 16485 }, { "epoch": 32.972, "grad_norm": 0.9910828471183777, "learning_rate": 2e-05, "loss": 0.03918536, "step": 16486 }, { "epoch": 32.974, "grad_norm": 1.133689522743225, "learning_rate": 2e-05, "loss": 0.042367, "step": 16487 }, { "epoch": 32.976, "grad_norm": 0.8173225522041321, "learning_rate": 2e-05, "loss": 0.02765619, "step": 16488 }, { "epoch": 32.978, "grad_norm": 1.2284605503082275, "learning_rate": 2e-05, "loss": 0.0416344, "step": 16489 }, { "epoch": 32.98, "grad_norm": 0.9596043825149536, "learning_rate": 2e-05, "loss": 0.02729161, "step": 16490 }, { "epoch": 32.982, "grad_norm": 1.0862085819244385, "learning_rate": 2e-05, "loss": 0.03662115, "step": 16491 }, { "epoch": 32.984, "grad_norm": 0.823144257068634, "learning_rate": 2e-05, "loss": 0.031934, "step": 16492 }, { "epoch": 32.986, "grad_norm": 2.0727217197418213, "learning_rate": 2e-05, "loss": 0.05489798, "step": 16493 }, { "epoch": 32.988, "grad_norm": 1.0378632545471191, "learning_rate": 2e-05, "loss": 0.0315949, "step": 16494 }, { "epoch": 32.99, "grad_norm": 1.0488449335098267, "learning_rate": 2e-05, "loss": 0.03921867, "step": 16495 }, { "epoch": 32.992, "grad_norm": 0.9913254976272583, "learning_rate": 2e-05, "loss": 0.026169, "step": 16496 }, { "epoch": 32.994, "grad_norm": 1.112544059753418, "learning_rate": 2e-05, "loss": 0.04853779, "step": 16497 }, { "epoch": 32.996, "grad_norm": 1.4042452573776245, "learning_rate": 2e-05, "loss": 0.04820094, "step": 16498 }, { "epoch": 32.998, "grad_norm": 1.0718038082122803, "learning_rate": 2e-05, "loss": 0.05348171, "step": 16499 }, { "epoch": 33.0, "grad_norm": 1.8665814399719238, "learning_rate": 2e-05, "loss": 0.04839181, "step": 16500 }, { "epoch": 33.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.99, "Equal_2": 0.9740518962075848, "Equal_3": 0.9800399201596807, "LineComparison_1": 1.0, "LineComparison_2": 0.998003992015968, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9899799599198397, "Parallel_2": 1.0, "Parallel_3": 0.992, "Perpendicular_1": 0.994, "Perpendicular_2": 0.996, "Perpendicular_3": 0.8847695390781564, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 225.9094, "eval_samples_per_second": 46.479, "eval_steps_per_second": 0.93, "step": 16500 }, { "epoch": 33.002, "grad_norm": 1.8742752075195312, "learning_rate": 2e-05, "loss": 0.04390191, "step": 16501 }, { "epoch": 33.004, "grad_norm": 0.9478954076766968, "learning_rate": 2e-05, "loss": 0.03457246, "step": 16502 }, { "epoch": 33.006, "grad_norm": 1.1291429996490479, "learning_rate": 2e-05, "loss": 0.04588809, "step": 16503 }, { "epoch": 33.008, "grad_norm": 1.4424923658370972, "learning_rate": 2e-05, "loss": 0.06802455, "step": 16504 }, { "epoch": 33.01, "grad_norm": 1.3401007652282715, "learning_rate": 2e-05, "loss": 0.05080446, "step": 16505 }, { "epoch": 33.012, "grad_norm": 1.1646857261657715, "learning_rate": 2e-05, "loss": 0.0431009, "step": 16506 }, { "epoch": 33.014, "grad_norm": 1.1292438507080078, "learning_rate": 2e-05, "loss": 0.04401367, "step": 16507 }, { "epoch": 33.016, "grad_norm": 1.8923307657241821, "learning_rate": 2e-05, "loss": 0.05041666, "step": 16508 }, { "epoch": 33.018, "grad_norm": 1.3575987815856934, "learning_rate": 2e-05, "loss": 0.05090225, "step": 16509 }, { "epoch": 33.02, "grad_norm": 1.1571112871170044, "learning_rate": 2e-05, "loss": 0.03775828, "step": 16510 }, { "epoch": 33.022, "grad_norm": 3.411186695098877, "learning_rate": 2e-05, "loss": 0.06999505, "step": 16511 }, { "epoch": 33.024, "grad_norm": 0.9754483103752136, "learning_rate": 2e-05, "loss": 0.04458144, "step": 16512 }, { "epoch": 33.026, "grad_norm": 1.591568946838379, "learning_rate": 2e-05, "loss": 0.04625469, "step": 16513 }, { "epoch": 33.028, "grad_norm": 1.3043265342712402, "learning_rate": 2e-05, "loss": 0.04571303, "step": 16514 }, { "epoch": 33.03, "grad_norm": 0.8962936401367188, "learning_rate": 2e-05, "loss": 0.03678203, "step": 16515 }, { "epoch": 33.032, "grad_norm": 2.0390634536743164, "learning_rate": 2e-05, "loss": 0.05122909, "step": 16516 }, { "epoch": 33.034, "grad_norm": 1.0965821743011475, "learning_rate": 2e-05, "loss": 0.04838735, "step": 16517 }, { "epoch": 33.036, "grad_norm": 1.1577503681182861, "learning_rate": 2e-05, "loss": 0.04358473, "step": 16518 }, { "epoch": 33.038, "grad_norm": 1.0234256982803345, "learning_rate": 2e-05, "loss": 0.04380442, "step": 16519 }, { "epoch": 33.04, "grad_norm": 1.189461350440979, "learning_rate": 2e-05, "loss": 0.05848221, "step": 16520 }, { "epoch": 33.042, "grad_norm": 1.9547271728515625, "learning_rate": 2e-05, "loss": 0.06269915, "step": 16521 }, { "epoch": 33.044, "grad_norm": 1.0230276584625244, "learning_rate": 2e-05, "loss": 0.03680551, "step": 16522 }, { "epoch": 33.046, "grad_norm": 1.380255103111267, "learning_rate": 2e-05, "loss": 0.04792426, "step": 16523 }, { "epoch": 33.048, "grad_norm": 1.5860072374343872, "learning_rate": 2e-05, "loss": 0.03340201, "step": 16524 }, { "epoch": 33.05, "grad_norm": 1.172709584236145, "learning_rate": 2e-05, "loss": 0.04130891, "step": 16525 }, { "epoch": 33.052, "grad_norm": 1.0512524843215942, "learning_rate": 2e-05, "loss": 0.0336128, "step": 16526 }, { "epoch": 33.054, "grad_norm": 1.2180253267288208, "learning_rate": 2e-05, "loss": 0.03535387, "step": 16527 }, { "epoch": 33.056, "grad_norm": 0.8937636017799377, "learning_rate": 2e-05, "loss": 0.03394864, "step": 16528 }, { "epoch": 33.058, "grad_norm": 3.7773680686950684, "learning_rate": 2e-05, "loss": 0.05020875, "step": 16529 }, { "epoch": 33.06, "grad_norm": 1.1028627157211304, "learning_rate": 2e-05, "loss": 0.05043498, "step": 16530 }, { "epoch": 33.062, "grad_norm": 0.9813693165779114, "learning_rate": 2e-05, "loss": 0.03821371, "step": 16531 }, { "epoch": 33.064, "grad_norm": 1.3395925760269165, "learning_rate": 2e-05, "loss": 0.05423224, "step": 16532 }, { "epoch": 33.066, "grad_norm": 1.0174565315246582, "learning_rate": 2e-05, "loss": 0.04584997, "step": 16533 }, { "epoch": 33.068, "grad_norm": 1.1119120121002197, "learning_rate": 2e-05, "loss": 0.04364266, "step": 16534 }, { "epoch": 33.07, "grad_norm": 1.0562788248062134, "learning_rate": 2e-05, "loss": 0.04389023, "step": 16535 }, { "epoch": 33.072, "grad_norm": 1.6769914627075195, "learning_rate": 2e-05, "loss": 0.05619061, "step": 16536 }, { "epoch": 33.074, "grad_norm": 0.9680246114730835, "learning_rate": 2e-05, "loss": 0.03963096, "step": 16537 }, { "epoch": 33.076, "grad_norm": 1.1671905517578125, "learning_rate": 2e-05, "loss": 0.04525262, "step": 16538 }, { "epoch": 33.078, "grad_norm": 1.1376272439956665, "learning_rate": 2e-05, "loss": 0.04593732, "step": 16539 }, { "epoch": 33.08, "grad_norm": 1.2598377466201782, "learning_rate": 2e-05, "loss": 0.05377841, "step": 16540 }, { "epoch": 33.082, "grad_norm": 1.14108145236969, "learning_rate": 2e-05, "loss": 0.04623436, "step": 16541 }, { "epoch": 33.084, "grad_norm": 0.9887399077415466, "learning_rate": 2e-05, "loss": 0.03961337, "step": 16542 }, { "epoch": 33.086, "grad_norm": 1.023150086402893, "learning_rate": 2e-05, "loss": 0.03559041, "step": 16543 }, { "epoch": 33.088, "grad_norm": 3.148284912109375, "learning_rate": 2e-05, "loss": 0.03908374, "step": 16544 }, { "epoch": 33.09, "grad_norm": 1.2066078186035156, "learning_rate": 2e-05, "loss": 0.03368102, "step": 16545 }, { "epoch": 33.092, "grad_norm": 1.0730541944503784, "learning_rate": 2e-05, "loss": 0.04434441, "step": 16546 }, { "epoch": 33.094, "grad_norm": 1.3829246759414673, "learning_rate": 2e-05, "loss": 0.04834133, "step": 16547 }, { "epoch": 33.096, "grad_norm": 1.3140910863876343, "learning_rate": 2e-05, "loss": 0.04902386, "step": 16548 }, { "epoch": 33.098, "grad_norm": 1.1118944883346558, "learning_rate": 2e-05, "loss": 0.0415615, "step": 16549 }, { "epoch": 33.1, "grad_norm": 1.2704483270645142, "learning_rate": 2e-05, "loss": 0.04404637, "step": 16550 }, { "epoch": 33.102, "grad_norm": 1.07719087600708, "learning_rate": 2e-05, "loss": 0.0312663, "step": 16551 }, { "epoch": 33.104, "grad_norm": 1.3883740901947021, "learning_rate": 2e-05, "loss": 0.05217436, "step": 16552 }, { "epoch": 33.106, "grad_norm": 1.2143936157226562, "learning_rate": 2e-05, "loss": 0.03088974, "step": 16553 }, { "epoch": 33.108, "grad_norm": 2.3619611263275146, "learning_rate": 2e-05, "loss": 0.05280104, "step": 16554 }, { "epoch": 33.11, "grad_norm": 1.3736426830291748, "learning_rate": 2e-05, "loss": 0.04681594, "step": 16555 }, { "epoch": 33.112, "grad_norm": 0.8139145970344543, "learning_rate": 2e-05, "loss": 0.02581437, "step": 16556 }, { "epoch": 33.114, "grad_norm": 1.0787487030029297, "learning_rate": 2e-05, "loss": 0.04514161, "step": 16557 }, { "epoch": 33.116, "grad_norm": 1.1590672731399536, "learning_rate": 2e-05, "loss": 0.04985914, "step": 16558 }, { "epoch": 33.118, "grad_norm": 1.130553960800171, "learning_rate": 2e-05, "loss": 0.04621843, "step": 16559 }, { "epoch": 33.12, "grad_norm": 1.2325562238693237, "learning_rate": 2e-05, "loss": 0.04397321, "step": 16560 }, { "epoch": 33.122, "grad_norm": 1.0951335430145264, "learning_rate": 2e-05, "loss": 0.04922796, "step": 16561 }, { "epoch": 33.124, "grad_norm": 1.2015124559402466, "learning_rate": 2e-05, "loss": 0.05954171, "step": 16562 }, { "epoch": 33.126, "grad_norm": 1.2625983953475952, "learning_rate": 2e-05, "loss": 0.04511356, "step": 16563 }, { "epoch": 33.128, "grad_norm": 0.9667055010795593, "learning_rate": 2e-05, "loss": 0.03358893, "step": 16564 }, { "epoch": 33.13, "grad_norm": 1.0958563089370728, "learning_rate": 2e-05, "loss": 0.03611242, "step": 16565 }, { "epoch": 33.132, "grad_norm": 0.9025627374649048, "learning_rate": 2e-05, "loss": 0.03051174, "step": 16566 }, { "epoch": 33.134, "grad_norm": 1.5583475828170776, "learning_rate": 2e-05, "loss": 0.05807483, "step": 16567 }, { "epoch": 33.136, "grad_norm": 1.0448895692825317, "learning_rate": 2e-05, "loss": 0.0458197, "step": 16568 }, { "epoch": 33.138, "grad_norm": 1.1707614660263062, "learning_rate": 2e-05, "loss": 0.04597853, "step": 16569 }, { "epoch": 33.14, "grad_norm": 9.085439682006836, "learning_rate": 2e-05, "loss": 0.04125863, "step": 16570 }, { "epoch": 33.142, "grad_norm": 1.642804741859436, "learning_rate": 2e-05, "loss": 0.05959935, "step": 16571 }, { "epoch": 33.144, "grad_norm": 1.0741275548934937, "learning_rate": 2e-05, "loss": 0.0415042, "step": 16572 }, { "epoch": 33.146, "grad_norm": 1.205912470817566, "learning_rate": 2e-05, "loss": 0.03962626, "step": 16573 }, { "epoch": 33.148, "grad_norm": 1.1372909545898438, "learning_rate": 2e-05, "loss": 0.05803122, "step": 16574 }, { "epoch": 33.15, "grad_norm": 1.0143489837646484, "learning_rate": 2e-05, "loss": 0.03611202, "step": 16575 }, { "epoch": 33.152, "grad_norm": 1.2386409044265747, "learning_rate": 2e-05, "loss": 0.0359178, "step": 16576 }, { "epoch": 33.154, "grad_norm": 1.91444993019104, "learning_rate": 2e-05, "loss": 0.04019357, "step": 16577 }, { "epoch": 33.156, "grad_norm": 1.215999722480774, "learning_rate": 2e-05, "loss": 0.04444807, "step": 16578 }, { "epoch": 33.158, "grad_norm": 1.5167213678359985, "learning_rate": 2e-05, "loss": 0.04090632, "step": 16579 }, { "epoch": 33.16, "grad_norm": 1.8708481788635254, "learning_rate": 2e-05, "loss": 0.04187147, "step": 16580 }, { "epoch": 33.162, "grad_norm": 1.7757595777511597, "learning_rate": 2e-05, "loss": 0.0404147, "step": 16581 }, { "epoch": 33.164, "grad_norm": 1.1033940315246582, "learning_rate": 2e-05, "loss": 0.05227657, "step": 16582 }, { "epoch": 33.166, "grad_norm": 1.4014803171157837, "learning_rate": 2e-05, "loss": 0.04744992, "step": 16583 }, { "epoch": 33.168, "grad_norm": 1.4021376371383667, "learning_rate": 2e-05, "loss": 0.0454725, "step": 16584 }, { "epoch": 33.17, "grad_norm": 1.2496156692504883, "learning_rate": 2e-05, "loss": 0.03417306, "step": 16585 }, { "epoch": 33.172, "grad_norm": 1.2188053131103516, "learning_rate": 2e-05, "loss": 0.05218752, "step": 16586 }, { "epoch": 33.174, "grad_norm": 1.0117905139923096, "learning_rate": 2e-05, "loss": 0.03470121, "step": 16587 }, { "epoch": 33.176, "grad_norm": 1.0313796997070312, "learning_rate": 2e-05, "loss": 0.04641647, "step": 16588 }, { "epoch": 33.178, "grad_norm": 0.9282832741737366, "learning_rate": 2e-05, "loss": 0.03444399, "step": 16589 }, { "epoch": 33.18, "grad_norm": 1.147687554359436, "learning_rate": 2e-05, "loss": 0.03979536, "step": 16590 }, { "epoch": 33.182, "grad_norm": 1.4381738901138306, "learning_rate": 2e-05, "loss": 0.04562946, "step": 16591 }, { "epoch": 33.184, "grad_norm": 1.9809186458587646, "learning_rate": 2e-05, "loss": 0.06956667, "step": 16592 }, { "epoch": 33.186, "grad_norm": 1.195204734802246, "learning_rate": 2e-05, "loss": 0.05627143, "step": 16593 }, { "epoch": 33.188, "grad_norm": 0.8577668070793152, "learning_rate": 2e-05, "loss": 0.02719451, "step": 16594 }, { "epoch": 33.19, "grad_norm": 1.3461871147155762, "learning_rate": 2e-05, "loss": 0.04568033, "step": 16595 }, { "epoch": 33.192, "grad_norm": 1.3425030708312988, "learning_rate": 2e-05, "loss": 0.04505402, "step": 16596 }, { "epoch": 33.194, "grad_norm": 1.674403190612793, "learning_rate": 2e-05, "loss": 0.04483566, "step": 16597 }, { "epoch": 33.196, "grad_norm": 0.9830165505409241, "learning_rate": 2e-05, "loss": 0.03588854, "step": 16598 }, { "epoch": 33.198, "grad_norm": 1.3202444314956665, "learning_rate": 2e-05, "loss": 0.06022623, "step": 16599 }, { "epoch": 33.2, "grad_norm": 0.9969018697738647, "learning_rate": 2e-05, "loss": 0.04533515, "step": 16600 }, { "epoch": 33.202, "grad_norm": 1.1821666955947876, "learning_rate": 2e-05, "loss": 0.05297372, "step": 16601 }, { "epoch": 33.204, "grad_norm": 4.628532886505127, "learning_rate": 2e-05, "loss": 0.05697471, "step": 16602 }, { "epoch": 33.206, "grad_norm": 3.326324462890625, "learning_rate": 2e-05, "loss": 0.0421439, "step": 16603 }, { "epoch": 33.208, "grad_norm": 1.9806350469589233, "learning_rate": 2e-05, "loss": 0.04390667, "step": 16604 }, { "epoch": 33.21, "grad_norm": 0.9768386483192444, "learning_rate": 2e-05, "loss": 0.03737723, "step": 16605 }, { "epoch": 33.212, "grad_norm": 0.8743588328361511, "learning_rate": 2e-05, "loss": 0.03034067, "step": 16606 }, { "epoch": 33.214, "grad_norm": 1.0335402488708496, "learning_rate": 2e-05, "loss": 0.02764076, "step": 16607 }, { "epoch": 33.216, "grad_norm": 1.0602924823760986, "learning_rate": 2e-05, "loss": 0.0448831, "step": 16608 }, { "epoch": 33.218, "grad_norm": 1.21599280834198, "learning_rate": 2e-05, "loss": 0.04380748, "step": 16609 }, { "epoch": 33.22, "grad_norm": 1.0447802543640137, "learning_rate": 2e-05, "loss": 0.04234251, "step": 16610 }, { "epoch": 33.222, "grad_norm": 1.3472880125045776, "learning_rate": 2e-05, "loss": 0.06390414, "step": 16611 }, { "epoch": 33.224, "grad_norm": 1.4157096147537231, "learning_rate": 2e-05, "loss": 0.03427018, "step": 16612 }, { "epoch": 33.226, "grad_norm": 1.4838067293167114, "learning_rate": 2e-05, "loss": 0.04150025, "step": 16613 }, { "epoch": 33.228, "grad_norm": 1.7404870986938477, "learning_rate": 2e-05, "loss": 0.0486716, "step": 16614 }, { "epoch": 33.23, "grad_norm": 1.1128737926483154, "learning_rate": 2e-05, "loss": 0.03301398, "step": 16615 }, { "epoch": 33.232, "grad_norm": 1.4341853857040405, "learning_rate": 2e-05, "loss": 0.05830026, "step": 16616 }, { "epoch": 33.234, "grad_norm": 1.0585178136825562, "learning_rate": 2e-05, "loss": 0.04020492, "step": 16617 }, { "epoch": 33.236, "grad_norm": 1.4341096878051758, "learning_rate": 2e-05, "loss": 0.03099304, "step": 16618 }, { "epoch": 33.238, "grad_norm": 1.035534381866455, "learning_rate": 2e-05, "loss": 0.03006751, "step": 16619 }, { "epoch": 33.24, "grad_norm": 0.992446780204773, "learning_rate": 2e-05, "loss": 0.03938267, "step": 16620 }, { "epoch": 33.242, "grad_norm": 1.5021088123321533, "learning_rate": 2e-05, "loss": 0.05418485, "step": 16621 }, { "epoch": 33.244, "grad_norm": 1.0584254264831543, "learning_rate": 2e-05, "loss": 0.03767761, "step": 16622 }, { "epoch": 33.246, "grad_norm": 1.6923965215682983, "learning_rate": 2e-05, "loss": 0.03513008, "step": 16623 }, { "epoch": 33.248, "grad_norm": 1.5270103216171265, "learning_rate": 2e-05, "loss": 0.05263095, "step": 16624 }, { "epoch": 33.25, "grad_norm": 1.45656156539917, "learning_rate": 2e-05, "loss": 0.04110594, "step": 16625 }, { "epoch": 33.252, "grad_norm": 1.524317979812622, "learning_rate": 2e-05, "loss": 0.05280022, "step": 16626 }, { "epoch": 33.254, "grad_norm": 1.229456901550293, "learning_rate": 2e-05, "loss": 0.05098088, "step": 16627 }, { "epoch": 33.256, "grad_norm": 1.7588896751403809, "learning_rate": 2e-05, "loss": 0.03914933, "step": 16628 }, { "epoch": 33.258, "grad_norm": 1.1047731637954712, "learning_rate": 2e-05, "loss": 0.03240821, "step": 16629 }, { "epoch": 33.26, "grad_norm": 1.0595215559005737, "learning_rate": 2e-05, "loss": 0.05026794, "step": 16630 }, { "epoch": 33.262, "grad_norm": 1.132789134979248, "learning_rate": 2e-05, "loss": 0.04580799, "step": 16631 }, { "epoch": 33.264, "grad_norm": 1.1134626865386963, "learning_rate": 2e-05, "loss": 0.05286353, "step": 16632 }, { "epoch": 33.266, "grad_norm": 1.0350605249404907, "learning_rate": 2e-05, "loss": 0.03354338, "step": 16633 }, { "epoch": 33.268, "grad_norm": 1.4341222047805786, "learning_rate": 2e-05, "loss": 0.06025121, "step": 16634 }, { "epoch": 33.27, "grad_norm": 1.0463708639144897, "learning_rate": 2e-05, "loss": 0.04659346, "step": 16635 }, { "epoch": 33.272, "grad_norm": 2.316408157348633, "learning_rate": 2e-05, "loss": 0.05661595, "step": 16636 }, { "epoch": 33.274, "grad_norm": 1.2525486946105957, "learning_rate": 2e-05, "loss": 0.04642862, "step": 16637 }, { "epoch": 33.276, "grad_norm": 1.3507734537124634, "learning_rate": 2e-05, "loss": 0.05744727, "step": 16638 }, { "epoch": 33.278, "grad_norm": 1.0149567127227783, "learning_rate": 2e-05, "loss": 0.04692563, "step": 16639 }, { "epoch": 33.28, "grad_norm": 1.5544465780258179, "learning_rate": 2e-05, "loss": 0.03992948, "step": 16640 }, { "epoch": 33.282, "grad_norm": 0.9250316023826599, "learning_rate": 2e-05, "loss": 0.03643877, "step": 16641 }, { "epoch": 33.284, "grad_norm": 1.341622233390808, "learning_rate": 2e-05, "loss": 0.05067304, "step": 16642 }, { "epoch": 33.286, "grad_norm": 1.1168239116668701, "learning_rate": 2e-05, "loss": 0.04282977, "step": 16643 }, { "epoch": 33.288, "grad_norm": 1.1053242683410645, "learning_rate": 2e-05, "loss": 0.03903052, "step": 16644 }, { "epoch": 33.29, "grad_norm": 0.9920464158058167, "learning_rate": 2e-05, "loss": 0.03518555, "step": 16645 }, { "epoch": 33.292, "grad_norm": 2.067941188812256, "learning_rate": 2e-05, "loss": 0.04913537, "step": 16646 }, { "epoch": 33.294, "grad_norm": 0.8730197548866272, "learning_rate": 2e-05, "loss": 0.02313932, "step": 16647 }, { "epoch": 33.296, "grad_norm": 1.109843134880066, "learning_rate": 2e-05, "loss": 0.04147884, "step": 16648 }, { "epoch": 33.298, "grad_norm": 1.2914462089538574, "learning_rate": 2e-05, "loss": 0.03932636, "step": 16649 }, { "epoch": 33.3, "grad_norm": 1.1206598281860352, "learning_rate": 2e-05, "loss": 0.04091083, "step": 16650 }, { "epoch": 33.302, "grad_norm": 2.085839033126831, "learning_rate": 2e-05, "loss": 0.03803088, "step": 16651 }, { "epoch": 33.304, "grad_norm": 1.384078025817871, "learning_rate": 2e-05, "loss": 0.048898, "step": 16652 }, { "epoch": 33.306, "grad_norm": 1.4028631448745728, "learning_rate": 2e-05, "loss": 0.04014731, "step": 16653 }, { "epoch": 33.308, "grad_norm": 3.990025281906128, "learning_rate": 2e-05, "loss": 0.07288264, "step": 16654 }, { "epoch": 33.31, "grad_norm": 2.089921712875366, "learning_rate": 2e-05, "loss": 0.04600559, "step": 16655 }, { "epoch": 33.312, "grad_norm": 1.1702724695205688, "learning_rate": 2e-05, "loss": 0.04183608, "step": 16656 }, { "epoch": 33.314, "grad_norm": 1.2754509449005127, "learning_rate": 2e-05, "loss": 0.05778325, "step": 16657 }, { "epoch": 33.316, "grad_norm": 1.223655104637146, "learning_rate": 2e-05, "loss": 0.03841271, "step": 16658 }, { "epoch": 33.318, "grad_norm": 1.3654208183288574, "learning_rate": 2e-05, "loss": 0.04274459, "step": 16659 }, { "epoch": 33.32, "grad_norm": 1.2712701559066772, "learning_rate": 2e-05, "loss": 0.06252027, "step": 16660 }, { "epoch": 33.322, "grad_norm": 1.2660847902297974, "learning_rate": 2e-05, "loss": 0.05174091, "step": 16661 }, { "epoch": 33.324, "grad_norm": 1.0497486591339111, "learning_rate": 2e-05, "loss": 0.03145991, "step": 16662 }, { "epoch": 33.326, "grad_norm": 1.1801789999008179, "learning_rate": 2e-05, "loss": 0.06312978, "step": 16663 }, { "epoch": 33.328, "grad_norm": 1.006455898284912, "learning_rate": 2e-05, "loss": 0.03321014, "step": 16664 }, { "epoch": 33.33, "grad_norm": 1.230626106262207, "learning_rate": 2e-05, "loss": 0.05381472, "step": 16665 }, { "epoch": 33.332, "grad_norm": 1.1948356628417969, "learning_rate": 2e-05, "loss": 0.05477214, "step": 16666 }, { "epoch": 33.334, "grad_norm": 0.9755367636680603, "learning_rate": 2e-05, "loss": 0.0340113, "step": 16667 }, { "epoch": 33.336, "grad_norm": 1.2817784547805786, "learning_rate": 2e-05, "loss": 0.04299092, "step": 16668 }, { "epoch": 33.338, "grad_norm": 1.8252346515655518, "learning_rate": 2e-05, "loss": 0.04350387, "step": 16669 }, { "epoch": 33.34, "grad_norm": 1.4566718339920044, "learning_rate": 2e-05, "loss": 0.07068452, "step": 16670 }, { "epoch": 33.342, "grad_norm": 1.1186827421188354, "learning_rate": 2e-05, "loss": 0.04574951, "step": 16671 }, { "epoch": 33.344, "grad_norm": 1.4827148914337158, "learning_rate": 2e-05, "loss": 0.05055145, "step": 16672 }, { "epoch": 33.346, "grad_norm": 1.0429630279541016, "learning_rate": 2e-05, "loss": 0.05453166, "step": 16673 }, { "epoch": 33.348, "grad_norm": 1.028663158416748, "learning_rate": 2e-05, "loss": 0.03832456, "step": 16674 }, { "epoch": 33.35, "grad_norm": 1.093003749847412, "learning_rate": 2e-05, "loss": 0.02938829, "step": 16675 }, { "epoch": 33.352, "grad_norm": 1.1109856367111206, "learning_rate": 2e-05, "loss": 0.0470498, "step": 16676 }, { "epoch": 33.354, "grad_norm": 1.8134148120880127, "learning_rate": 2e-05, "loss": 0.04907217, "step": 16677 }, { "epoch": 33.356, "grad_norm": 1.8511101007461548, "learning_rate": 2e-05, "loss": 0.06509379, "step": 16678 }, { "epoch": 33.358, "grad_norm": 1.9142557382583618, "learning_rate": 2e-05, "loss": 0.05846536, "step": 16679 }, { "epoch": 33.36, "grad_norm": 1.758450984954834, "learning_rate": 2e-05, "loss": 0.0506797, "step": 16680 }, { "epoch": 33.362, "grad_norm": 1.0345455408096313, "learning_rate": 2e-05, "loss": 0.02182858, "step": 16681 }, { "epoch": 33.364, "grad_norm": 0.8818876147270203, "learning_rate": 2e-05, "loss": 0.03428523, "step": 16682 }, { "epoch": 33.366, "grad_norm": 1.1541608572006226, "learning_rate": 2e-05, "loss": 0.04221281, "step": 16683 }, { "epoch": 33.368, "grad_norm": 2.0910086631774902, "learning_rate": 2e-05, "loss": 0.05484948, "step": 16684 }, { "epoch": 33.37, "grad_norm": 1.5280766487121582, "learning_rate": 2e-05, "loss": 0.04033177, "step": 16685 }, { "epoch": 33.372, "grad_norm": 1.5336265563964844, "learning_rate": 2e-05, "loss": 0.07201979, "step": 16686 }, { "epoch": 33.374, "grad_norm": 1.0325452089309692, "learning_rate": 2e-05, "loss": 0.04460095, "step": 16687 }, { "epoch": 33.376, "grad_norm": 1.1667121648788452, "learning_rate": 2e-05, "loss": 0.03729772, "step": 16688 }, { "epoch": 33.378, "grad_norm": 1.0049197673797607, "learning_rate": 2e-05, "loss": 0.03926473, "step": 16689 }, { "epoch": 33.38, "grad_norm": 1.0190849304199219, "learning_rate": 2e-05, "loss": 0.04429299, "step": 16690 }, { "epoch": 33.382, "grad_norm": 1.6374855041503906, "learning_rate": 2e-05, "loss": 0.0402529, "step": 16691 }, { "epoch": 33.384, "grad_norm": 1.9335492849349976, "learning_rate": 2e-05, "loss": 0.04418702, "step": 16692 }, { "epoch": 33.386, "grad_norm": 1.2775547504425049, "learning_rate": 2e-05, "loss": 0.06136301, "step": 16693 }, { "epoch": 33.388, "grad_norm": 1.6006691455841064, "learning_rate": 2e-05, "loss": 0.0314602, "step": 16694 }, { "epoch": 33.39, "grad_norm": 1.0697267055511475, "learning_rate": 2e-05, "loss": 0.0418988, "step": 16695 }, { "epoch": 33.392, "grad_norm": 0.9716179966926575, "learning_rate": 2e-05, "loss": 0.0483218, "step": 16696 }, { "epoch": 33.394, "grad_norm": 1.067556619644165, "learning_rate": 2e-05, "loss": 0.03973559, "step": 16697 }, { "epoch": 33.396, "grad_norm": 0.8622788190841675, "learning_rate": 2e-05, "loss": 0.03342386, "step": 16698 }, { "epoch": 33.398, "grad_norm": 0.8553065061569214, "learning_rate": 2e-05, "loss": 0.0209633, "step": 16699 }, { "epoch": 33.4, "grad_norm": 2.0682711601257324, "learning_rate": 2e-05, "loss": 0.04893482, "step": 16700 }, { "epoch": 33.402, "grad_norm": 1.1931493282318115, "learning_rate": 2e-05, "loss": 0.03984997, "step": 16701 }, { "epoch": 33.404, "grad_norm": 0.9387798309326172, "learning_rate": 2e-05, "loss": 0.04061756, "step": 16702 }, { "epoch": 33.406, "grad_norm": 0.978615403175354, "learning_rate": 2e-05, "loss": 0.04662415, "step": 16703 }, { "epoch": 33.408, "grad_norm": 1.0079689025878906, "learning_rate": 2e-05, "loss": 0.0481458, "step": 16704 }, { "epoch": 33.41, "grad_norm": 1.3838824033737183, "learning_rate": 2e-05, "loss": 0.0476383, "step": 16705 }, { "epoch": 33.412, "grad_norm": 6.977351188659668, "learning_rate": 2e-05, "loss": 0.055416, "step": 16706 }, { "epoch": 33.414, "grad_norm": 1.4241523742675781, "learning_rate": 2e-05, "loss": 0.05415621, "step": 16707 }, { "epoch": 33.416, "grad_norm": 1.1639354228973389, "learning_rate": 2e-05, "loss": 0.03803099, "step": 16708 }, { "epoch": 33.418, "grad_norm": 2.0808916091918945, "learning_rate": 2e-05, "loss": 0.05247238, "step": 16709 }, { "epoch": 33.42, "grad_norm": 1.085188627243042, "learning_rate": 2e-05, "loss": 0.0429457, "step": 16710 }, { "epoch": 33.422, "grad_norm": 1.10118567943573, "learning_rate": 2e-05, "loss": 0.02933662, "step": 16711 }, { "epoch": 33.424, "grad_norm": 0.9968042373657227, "learning_rate": 2e-05, "loss": 0.04994863, "step": 16712 }, { "epoch": 33.426, "grad_norm": 1.062031865119934, "learning_rate": 2e-05, "loss": 0.04247552, "step": 16713 }, { "epoch": 33.428, "grad_norm": 1.0574841499328613, "learning_rate": 2e-05, "loss": 0.03673478, "step": 16714 }, { "epoch": 33.43, "grad_norm": 2.7069244384765625, "learning_rate": 2e-05, "loss": 0.05789174, "step": 16715 }, { "epoch": 33.432, "grad_norm": 1.0931012630462646, "learning_rate": 2e-05, "loss": 0.04048319, "step": 16716 }, { "epoch": 33.434, "grad_norm": 1.2773255109786987, "learning_rate": 2e-05, "loss": 0.04112638, "step": 16717 }, { "epoch": 33.436, "grad_norm": 1.443752646446228, "learning_rate": 2e-05, "loss": 0.05293462, "step": 16718 }, { "epoch": 33.438, "grad_norm": 1.4061460494995117, "learning_rate": 2e-05, "loss": 0.04364372, "step": 16719 }, { "epoch": 33.44, "grad_norm": 1.7667838335037231, "learning_rate": 2e-05, "loss": 0.05355511, "step": 16720 }, { "epoch": 33.442, "grad_norm": 1.6574691534042358, "learning_rate": 2e-05, "loss": 0.05231334, "step": 16721 }, { "epoch": 33.444, "grad_norm": 1.1224724054336548, "learning_rate": 2e-05, "loss": 0.05360379, "step": 16722 }, { "epoch": 33.446, "grad_norm": 1.7101939916610718, "learning_rate": 2e-05, "loss": 0.03765354, "step": 16723 }, { "epoch": 33.448, "grad_norm": 1.159286379814148, "learning_rate": 2e-05, "loss": 0.0486055, "step": 16724 }, { "epoch": 33.45, "grad_norm": 1.726418137550354, "learning_rate": 2e-05, "loss": 0.04625576, "step": 16725 }, { "epoch": 33.452, "grad_norm": 1.9837815761566162, "learning_rate": 2e-05, "loss": 0.03466798, "step": 16726 }, { "epoch": 33.454, "grad_norm": 1.2571098804473877, "learning_rate": 2e-05, "loss": 0.0438376, "step": 16727 }, { "epoch": 33.456, "grad_norm": 1.7722887992858887, "learning_rate": 2e-05, "loss": 0.04552971, "step": 16728 }, { "epoch": 33.458, "grad_norm": 1.3634889125823975, "learning_rate": 2e-05, "loss": 0.05723719, "step": 16729 }, { "epoch": 33.46, "grad_norm": 1.541359782218933, "learning_rate": 2e-05, "loss": 0.04866739, "step": 16730 }, { "epoch": 33.462, "grad_norm": 1.0869359970092773, "learning_rate": 2e-05, "loss": 0.05120804, "step": 16731 }, { "epoch": 33.464, "grad_norm": 0.8874366283416748, "learning_rate": 2e-05, "loss": 0.03272026, "step": 16732 }, { "epoch": 33.466, "grad_norm": 2.1792409420013428, "learning_rate": 2e-05, "loss": 0.04636176, "step": 16733 }, { "epoch": 33.468, "grad_norm": 0.9029547572135925, "learning_rate": 2e-05, "loss": 0.03116245, "step": 16734 }, { "epoch": 33.47, "grad_norm": 0.8333254456520081, "learning_rate": 2e-05, "loss": 0.03217118, "step": 16735 }, { "epoch": 33.472, "grad_norm": 1.0044220685958862, "learning_rate": 2e-05, "loss": 0.04300441, "step": 16736 }, { "epoch": 33.474, "grad_norm": 1.1030489206314087, "learning_rate": 2e-05, "loss": 0.05166584, "step": 16737 }, { "epoch": 33.476, "grad_norm": 1.6261696815490723, "learning_rate": 2e-05, "loss": 0.06001935, "step": 16738 }, { "epoch": 33.478, "grad_norm": 1.111883282661438, "learning_rate": 2e-05, "loss": 0.06290857, "step": 16739 }, { "epoch": 33.48, "grad_norm": 1.878312349319458, "learning_rate": 2e-05, "loss": 0.04951257, "step": 16740 }, { "epoch": 33.482, "grad_norm": 1.1377012729644775, "learning_rate": 2e-05, "loss": 0.04261282, "step": 16741 }, { "epoch": 33.484, "grad_norm": 1.09139084815979, "learning_rate": 2e-05, "loss": 0.04241245, "step": 16742 }, { "epoch": 33.486, "grad_norm": 0.96734219789505, "learning_rate": 2e-05, "loss": 0.03415666, "step": 16743 }, { "epoch": 33.488, "grad_norm": 1.3798837661743164, "learning_rate": 2e-05, "loss": 0.04269566, "step": 16744 }, { "epoch": 33.49, "grad_norm": 1.590126633644104, "learning_rate": 2e-05, "loss": 0.05497338, "step": 16745 }, { "epoch": 33.492, "grad_norm": 1.3253952264785767, "learning_rate": 2e-05, "loss": 0.0640524, "step": 16746 }, { "epoch": 33.494, "grad_norm": 1.1076223850250244, "learning_rate": 2e-05, "loss": 0.03531731, "step": 16747 }, { "epoch": 33.496, "grad_norm": 1.294302225112915, "learning_rate": 2e-05, "loss": 0.05923747, "step": 16748 }, { "epoch": 33.498, "grad_norm": 0.8775267004966736, "learning_rate": 2e-05, "loss": 0.02593322, "step": 16749 }, { "epoch": 33.5, "grad_norm": 1.5242544412612915, "learning_rate": 2e-05, "loss": 0.05082632, "step": 16750 }, { "epoch": 33.502, "grad_norm": 1.1137971878051758, "learning_rate": 2e-05, "loss": 0.03520195, "step": 16751 }, { "epoch": 33.504, "grad_norm": 0.9322889447212219, "learning_rate": 2e-05, "loss": 0.02726999, "step": 16752 }, { "epoch": 33.506, "grad_norm": 1.2001891136169434, "learning_rate": 2e-05, "loss": 0.05200133, "step": 16753 }, { "epoch": 33.508, "grad_norm": 2.1276655197143555, "learning_rate": 2e-05, "loss": 0.052678, "step": 16754 }, { "epoch": 33.51, "grad_norm": 1.0112789869308472, "learning_rate": 2e-05, "loss": 0.03859164, "step": 16755 }, { "epoch": 33.512, "grad_norm": 1.2927403450012207, "learning_rate": 2e-05, "loss": 0.03665455, "step": 16756 }, { "epoch": 33.514, "grad_norm": 1.0401158332824707, "learning_rate": 2e-05, "loss": 0.03105365, "step": 16757 }, { "epoch": 33.516, "grad_norm": 1.1500132083892822, "learning_rate": 2e-05, "loss": 0.04472232, "step": 16758 }, { "epoch": 33.518, "grad_norm": 1.1022112369537354, "learning_rate": 2e-05, "loss": 0.04599692, "step": 16759 }, { "epoch": 33.52, "grad_norm": 1.0614567995071411, "learning_rate": 2e-05, "loss": 0.02671903, "step": 16760 }, { "epoch": 33.522, "grad_norm": 1.0899406671524048, "learning_rate": 2e-05, "loss": 0.04487581, "step": 16761 }, { "epoch": 33.524, "grad_norm": 4.4351420402526855, "learning_rate": 2e-05, "loss": 0.07421529, "step": 16762 }, { "epoch": 33.526, "grad_norm": 1.7236042022705078, "learning_rate": 2e-05, "loss": 0.05017624, "step": 16763 }, { "epoch": 33.528, "grad_norm": 2.098457098007202, "learning_rate": 2e-05, "loss": 0.06011597, "step": 16764 }, { "epoch": 33.53, "grad_norm": 1.382616639137268, "learning_rate": 2e-05, "loss": 0.04970013, "step": 16765 }, { "epoch": 33.532, "grad_norm": 1.0313087701797485, "learning_rate": 2e-05, "loss": 0.03498481, "step": 16766 }, { "epoch": 33.534, "grad_norm": 1.3942406177520752, "learning_rate": 2e-05, "loss": 0.06320328, "step": 16767 }, { "epoch": 33.536, "grad_norm": 1.248572587966919, "learning_rate": 2e-05, "loss": 0.04761049, "step": 16768 }, { "epoch": 33.538, "grad_norm": 1.1801948547363281, "learning_rate": 2e-05, "loss": 0.06184831, "step": 16769 }, { "epoch": 33.54, "grad_norm": 1.5862772464752197, "learning_rate": 2e-05, "loss": 0.0500682, "step": 16770 }, { "epoch": 33.542, "grad_norm": 1.1764925718307495, "learning_rate": 2e-05, "loss": 0.0524102, "step": 16771 }, { "epoch": 33.544, "grad_norm": 1.1759260892868042, "learning_rate": 2e-05, "loss": 0.06182307, "step": 16772 }, { "epoch": 33.546, "grad_norm": 1.9686436653137207, "learning_rate": 2e-05, "loss": 0.05447841, "step": 16773 }, { "epoch": 33.548, "grad_norm": 1.160290002822876, "learning_rate": 2e-05, "loss": 0.04832529, "step": 16774 }, { "epoch": 33.55, "grad_norm": 1.377897024154663, "learning_rate": 2e-05, "loss": 0.05795332, "step": 16775 }, { "epoch": 33.552, "grad_norm": 1.2845187187194824, "learning_rate": 2e-05, "loss": 0.04327268, "step": 16776 }, { "epoch": 33.554, "grad_norm": 1.2175860404968262, "learning_rate": 2e-05, "loss": 0.05182181, "step": 16777 }, { "epoch": 33.556, "grad_norm": 1.108862280845642, "learning_rate": 2e-05, "loss": 0.05602034, "step": 16778 }, { "epoch": 33.558, "grad_norm": 1.0894269943237305, "learning_rate": 2e-05, "loss": 0.0467835, "step": 16779 }, { "epoch": 33.56, "grad_norm": 1.2937209606170654, "learning_rate": 2e-05, "loss": 0.03083139, "step": 16780 }, { "epoch": 33.562, "grad_norm": 1.3319506645202637, "learning_rate": 2e-05, "loss": 0.0571822, "step": 16781 }, { "epoch": 33.564, "grad_norm": 1.2457597255706787, "learning_rate": 2e-05, "loss": 0.0526475, "step": 16782 }, { "epoch": 33.566, "grad_norm": 1.3428682088851929, "learning_rate": 2e-05, "loss": 0.06089408, "step": 16783 }, { "epoch": 33.568, "grad_norm": 1.1830236911773682, "learning_rate": 2e-05, "loss": 0.04613323, "step": 16784 }, { "epoch": 33.57, "grad_norm": 2.0973520278930664, "learning_rate": 2e-05, "loss": 0.04750288, "step": 16785 }, { "epoch": 33.572, "grad_norm": 1.1191425323486328, "learning_rate": 2e-05, "loss": 0.04366719, "step": 16786 }, { "epoch": 33.574, "grad_norm": 2.478419303894043, "learning_rate": 2e-05, "loss": 0.04870261, "step": 16787 }, { "epoch": 33.576, "grad_norm": 1.238233208656311, "learning_rate": 2e-05, "loss": 0.05543873, "step": 16788 }, { "epoch": 33.578, "grad_norm": 0.9477821588516235, "learning_rate": 2e-05, "loss": 0.04037328, "step": 16789 }, { "epoch": 33.58, "grad_norm": 1.0569583177566528, "learning_rate": 2e-05, "loss": 0.04398725, "step": 16790 }, { "epoch": 33.582, "grad_norm": 1.1038230657577515, "learning_rate": 2e-05, "loss": 0.04375236, "step": 16791 }, { "epoch": 33.584, "grad_norm": 2.425037145614624, "learning_rate": 2e-05, "loss": 0.0592554, "step": 16792 }, { "epoch": 33.586, "grad_norm": 1.1464440822601318, "learning_rate": 2e-05, "loss": 0.0462886, "step": 16793 }, { "epoch": 33.588, "grad_norm": 1.2883245944976807, "learning_rate": 2e-05, "loss": 0.04868394, "step": 16794 }, { "epoch": 33.59, "grad_norm": 1.1177384853363037, "learning_rate": 2e-05, "loss": 0.03865092, "step": 16795 }, { "epoch": 33.592, "grad_norm": 0.9656432271003723, "learning_rate": 2e-05, "loss": 0.04531487, "step": 16796 }, { "epoch": 33.594, "grad_norm": 1.1266521215438843, "learning_rate": 2e-05, "loss": 0.06155606, "step": 16797 }, { "epoch": 33.596, "grad_norm": 0.8288472890853882, "learning_rate": 2e-05, "loss": 0.034019, "step": 16798 }, { "epoch": 33.598, "grad_norm": 1.667454481124878, "learning_rate": 2e-05, "loss": 0.0538353, "step": 16799 }, { "epoch": 33.6, "grad_norm": 0.9877506494522095, "learning_rate": 2e-05, "loss": 0.03559139, "step": 16800 }, { "epoch": 33.602, "grad_norm": 1.1813974380493164, "learning_rate": 2e-05, "loss": 0.03930661, "step": 16801 }, { "epoch": 33.604, "grad_norm": 1.3372219800949097, "learning_rate": 2e-05, "loss": 0.04294479, "step": 16802 }, { "epoch": 33.606, "grad_norm": 1.194955587387085, "learning_rate": 2e-05, "loss": 0.05057523, "step": 16803 }, { "epoch": 33.608, "grad_norm": 1.4349327087402344, "learning_rate": 2e-05, "loss": 0.03331355, "step": 16804 }, { "epoch": 33.61, "grad_norm": 1.0969152450561523, "learning_rate": 2e-05, "loss": 0.04242614, "step": 16805 }, { "epoch": 33.612, "grad_norm": 1.0320405960083008, "learning_rate": 2e-05, "loss": 0.03203707, "step": 16806 }, { "epoch": 33.614, "grad_norm": 0.9669294953346252, "learning_rate": 2e-05, "loss": 0.04317578, "step": 16807 }, { "epoch": 33.616, "grad_norm": 1.8110538721084595, "learning_rate": 2e-05, "loss": 0.03592813, "step": 16808 }, { "epoch": 33.618, "grad_norm": 1.2686281204223633, "learning_rate": 2e-05, "loss": 0.03418735, "step": 16809 }, { "epoch": 33.62, "grad_norm": 1.1134467124938965, "learning_rate": 2e-05, "loss": 0.04533934, "step": 16810 }, { "epoch": 33.622, "grad_norm": 0.9419783353805542, "learning_rate": 2e-05, "loss": 0.03797448, "step": 16811 }, { "epoch": 33.624, "grad_norm": 1.224125623703003, "learning_rate": 2e-05, "loss": 0.05355991, "step": 16812 }, { "epoch": 33.626, "grad_norm": 1.371258020401001, "learning_rate": 2e-05, "loss": 0.04905194, "step": 16813 }, { "epoch": 33.628, "grad_norm": 0.901395857334137, "learning_rate": 2e-05, "loss": 0.02979318, "step": 16814 }, { "epoch": 33.63, "grad_norm": 1.3686851263046265, "learning_rate": 2e-05, "loss": 0.04690338, "step": 16815 }, { "epoch": 33.632, "grad_norm": 1.1568562984466553, "learning_rate": 2e-05, "loss": 0.04374412, "step": 16816 }, { "epoch": 33.634, "grad_norm": 1.0261744260787964, "learning_rate": 2e-05, "loss": 0.04495176, "step": 16817 }, { "epoch": 33.636, "grad_norm": 1.3978925943374634, "learning_rate": 2e-05, "loss": 0.04324072, "step": 16818 }, { "epoch": 33.638, "grad_norm": 1.0221015214920044, "learning_rate": 2e-05, "loss": 0.05086507, "step": 16819 }, { "epoch": 33.64, "grad_norm": 1.7645115852355957, "learning_rate": 2e-05, "loss": 0.05064385, "step": 16820 }, { "epoch": 33.642, "grad_norm": 1.6214035749435425, "learning_rate": 2e-05, "loss": 0.05282311, "step": 16821 }, { "epoch": 33.644, "grad_norm": 1.2322940826416016, "learning_rate": 2e-05, "loss": 0.04383982, "step": 16822 }, { "epoch": 33.646, "grad_norm": 1.1515724658966064, "learning_rate": 2e-05, "loss": 0.03066201, "step": 16823 }, { "epoch": 33.648, "grad_norm": 1.2402650117874146, "learning_rate": 2e-05, "loss": 0.04856699, "step": 16824 }, { "epoch": 33.65, "grad_norm": 0.8939598798751831, "learning_rate": 2e-05, "loss": 0.03899084, "step": 16825 }, { "epoch": 33.652, "grad_norm": 1.0902009010314941, "learning_rate": 2e-05, "loss": 0.0382041, "step": 16826 }, { "epoch": 33.654, "grad_norm": 0.9257661700248718, "learning_rate": 2e-05, "loss": 0.03412707, "step": 16827 }, { "epoch": 33.656, "grad_norm": 1.7373106479644775, "learning_rate": 2e-05, "loss": 0.06343647, "step": 16828 }, { "epoch": 33.658, "grad_norm": 1.1711173057556152, "learning_rate": 2e-05, "loss": 0.04290133, "step": 16829 }, { "epoch": 33.66, "grad_norm": 0.9491081237792969, "learning_rate": 2e-05, "loss": 0.03879829, "step": 16830 }, { "epoch": 33.662, "grad_norm": 1.0450215339660645, "learning_rate": 2e-05, "loss": 0.03610145, "step": 16831 }, { "epoch": 33.664, "grad_norm": 1.03641676902771, "learning_rate": 2e-05, "loss": 0.0412204, "step": 16832 }, { "epoch": 33.666, "grad_norm": 1.0499662160873413, "learning_rate": 2e-05, "loss": 0.03340659, "step": 16833 }, { "epoch": 33.668, "grad_norm": 2.071040630340576, "learning_rate": 2e-05, "loss": 0.05954115, "step": 16834 }, { "epoch": 33.67, "grad_norm": 1.0578644275665283, "learning_rate": 2e-05, "loss": 0.04512309, "step": 16835 }, { "epoch": 33.672, "grad_norm": 1.6728229522705078, "learning_rate": 2e-05, "loss": 0.05285092, "step": 16836 }, { "epoch": 33.674, "grad_norm": 1.0980989933013916, "learning_rate": 2e-05, "loss": 0.04752784, "step": 16837 }, { "epoch": 33.676, "grad_norm": 1.3729256391525269, "learning_rate": 2e-05, "loss": 0.04440143, "step": 16838 }, { "epoch": 33.678, "grad_norm": 1.3357291221618652, "learning_rate": 2e-05, "loss": 0.03731443, "step": 16839 }, { "epoch": 33.68, "grad_norm": 1.1958260536193848, "learning_rate": 2e-05, "loss": 0.0593696, "step": 16840 }, { "epoch": 33.682, "grad_norm": 1.0951577425003052, "learning_rate": 2e-05, "loss": 0.03997927, "step": 16841 }, { "epoch": 33.684, "grad_norm": 1.183790683746338, "learning_rate": 2e-05, "loss": 0.04655997, "step": 16842 }, { "epoch": 33.686, "grad_norm": 2.192628860473633, "learning_rate": 2e-05, "loss": 0.04453117, "step": 16843 }, { "epoch": 33.688, "grad_norm": 0.8896576762199402, "learning_rate": 2e-05, "loss": 0.03379082, "step": 16844 }, { "epoch": 33.69, "grad_norm": 1.2212423086166382, "learning_rate": 2e-05, "loss": 0.06039101, "step": 16845 }, { "epoch": 33.692, "grad_norm": 1.2938156127929688, "learning_rate": 2e-05, "loss": 0.0421749, "step": 16846 }, { "epoch": 33.694, "grad_norm": 0.9321994781494141, "learning_rate": 2e-05, "loss": 0.03197181, "step": 16847 }, { "epoch": 33.696, "grad_norm": 3.002802610397339, "learning_rate": 2e-05, "loss": 0.06231321, "step": 16848 }, { "epoch": 33.698, "grad_norm": 0.9160134792327881, "learning_rate": 2e-05, "loss": 0.03138274, "step": 16849 }, { "epoch": 33.7, "grad_norm": 0.976723313331604, "learning_rate": 2e-05, "loss": 0.02938232, "step": 16850 }, { "epoch": 33.702, "grad_norm": 2.152543306350708, "learning_rate": 2e-05, "loss": 0.05210479, "step": 16851 }, { "epoch": 33.704, "grad_norm": 1.4991562366485596, "learning_rate": 2e-05, "loss": 0.05715077, "step": 16852 }, { "epoch": 33.706, "grad_norm": 1.0149827003479004, "learning_rate": 2e-05, "loss": 0.03790408, "step": 16853 }, { "epoch": 33.708, "grad_norm": 1.6808401346206665, "learning_rate": 2e-05, "loss": 0.04934963, "step": 16854 }, { "epoch": 33.71, "grad_norm": 0.8762242197990417, "learning_rate": 2e-05, "loss": 0.02679922, "step": 16855 }, { "epoch": 33.712, "grad_norm": 1.0297927856445312, "learning_rate": 2e-05, "loss": 0.04202634, "step": 16856 }, { "epoch": 33.714, "grad_norm": 0.966117262840271, "learning_rate": 2e-05, "loss": 0.04612506, "step": 16857 }, { "epoch": 33.716, "grad_norm": 1.438841700553894, "learning_rate": 2e-05, "loss": 0.05706517, "step": 16858 }, { "epoch": 33.718, "grad_norm": 2.031083345413208, "learning_rate": 2e-05, "loss": 0.05738904, "step": 16859 }, { "epoch": 33.72, "grad_norm": 1.143618106842041, "learning_rate": 2e-05, "loss": 0.04492192, "step": 16860 }, { "epoch": 33.722, "grad_norm": 1.8136661052703857, "learning_rate": 2e-05, "loss": 0.03936419, "step": 16861 }, { "epoch": 33.724, "grad_norm": 0.9590321183204651, "learning_rate": 2e-05, "loss": 0.03326879, "step": 16862 }, { "epoch": 33.726, "grad_norm": 1.0741894245147705, "learning_rate": 2e-05, "loss": 0.04013322, "step": 16863 }, { "epoch": 33.728, "grad_norm": 2.253657817840576, "learning_rate": 2e-05, "loss": 0.05492151, "step": 16864 }, { "epoch": 33.73, "grad_norm": 1.2383841276168823, "learning_rate": 2e-05, "loss": 0.04975536, "step": 16865 }, { "epoch": 33.732, "grad_norm": 1.8037011623382568, "learning_rate": 2e-05, "loss": 0.05335984, "step": 16866 }, { "epoch": 33.734, "grad_norm": 1.0084309577941895, "learning_rate": 2e-05, "loss": 0.03202467, "step": 16867 }, { "epoch": 33.736, "grad_norm": 1.8690217733383179, "learning_rate": 2e-05, "loss": 0.04777067, "step": 16868 }, { "epoch": 33.738, "grad_norm": 1.0595221519470215, "learning_rate": 2e-05, "loss": 0.04209323, "step": 16869 }, { "epoch": 33.74, "grad_norm": 1.240251898765564, "learning_rate": 2e-05, "loss": 0.04407933, "step": 16870 }, { "epoch": 33.742, "grad_norm": 1.4290738105773926, "learning_rate": 2e-05, "loss": 0.05446694, "step": 16871 }, { "epoch": 33.744, "grad_norm": 1.563338041305542, "learning_rate": 2e-05, "loss": 0.04583538, "step": 16872 }, { "epoch": 33.746, "grad_norm": 1.0854274034500122, "learning_rate": 2e-05, "loss": 0.05009954, "step": 16873 }, { "epoch": 33.748, "grad_norm": 0.9773155450820923, "learning_rate": 2e-05, "loss": 0.02649113, "step": 16874 }, { "epoch": 33.75, "grad_norm": 1.1755632162094116, "learning_rate": 2e-05, "loss": 0.03560774, "step": 16875 }, { "epoch": 33.752, "grad_norm": 1.314285397529602, "learning_rate": 2e-05, "loss": 0.03981863, "step": 16876 }, { "epoch": 33.754, "grad_norm": 1.185733675956726, "learning_rate": 2e-05, "loss": 0.05086366, "step": 16877 }, { "epoch": 33.756, "grad_norm": 1.2062492370605469, "learning_rate": 2e-05, "loss": 0.05108063, "step": 16878 }, { "epoch": 33.758, "grad_norm": 1.1473814249038696, "learning_rate": 2e-05, "loss": 0.04527102, "step": 16879 }, { "epoch": 33.76, "grad_norm": 1.4825745820999146, "learning_rate": 2e-05, "loss": 0.03744708, "step": 16880 }, { "epoch": 33.762, "grad_norm": 1.0927659273147583, "learning_rate": 2e-05, "loss": 0.03676077, "step": 16881 }, { "epoch": 33.764, "grad_norm": 1.0006119012832642, "learning_rate": 2e-05, "loss": 0.04569704, "step": 16882 }, { "epoch": 33.766, "grad_norm": 1.4109697341918945, "learning_rate": 2e-05, "loss": 0.05128131, "step": 16883 }, { "epoch": 33.768, "grad_norm": 1.3863987922668457, "learning_rate": 2e-05, "loss": 0.04939978, "step": 16884 }, { "epoch": 33.77, "grad_norm": 1.5298664569854736, "learning_rate": 2e-05, "loss": 0.04874972, "step": 16885 }, { "epoch": 33.772, "grad_norm": 1.2842392921447754, "learning_rate": 2e-05, "loss": 0.04929227, "step": 16886 }, { "epoch": 33.774, "grad_norm": 1.0562992095947266, "learning_rate": 2e-05, "loss": 0.05435775, "step": 16887 }, { "epoch": 33.776, "grad_norm": 1.0382344722747803, "learning_rate": 2e-05, "loss": 0.03989664, "step": 16888 }, { "epoch": 33.778, "grad_norm": 1.3712424039840698, "learning_rate": 2e-05, "loss": 0.04753745, "step": 16889 }, { "epoch": 33.78, "grad_norm": 1.1194196939468384, "learning_rate": 2e-05, "loss": 0.04232106, "step": 16890 }, { "epoch": 33.782, "grad_norm": 0.8917251229286194, "learning_rate": 2e-05, "loss": 0.01889046, "step": 16891 }, { "epoch": 33.784, "grad_norm": 1.0603173971176147, "learning_rate": 2e-05, "loss": 0.03573737, "step": 16892 }, { "epoch": 33.786, "grad_norm": 1.00141441822052, "learning_rate": 2e-05, "loss": 0.03627339, "step": 16893 }, { "epoch": 33.788, "grad_norm": 2.2138540744781494, "learning_rate": 2e-05, "loss": 0.05433334, "step": 16894 }, { "epoch": 33.79, "grad_norm": 1.7396957874298096, "learning_rate": 2e-05, "loss": 0.05223975, "step": 16895 }, { "epoch": 33.792, "grad_norm": 2.3335800170898438, "learning_rate": 2e-05, "loss": 0.05424222, "step": 16896 }, { "epoch": 33.794, "grad_norm": 1.3231230974197388, "learning_rate": 2e-05, "loss": 0.04395835, "step": 16897 }, { "epoch": 33.796, "grad_norm": 1.0993092060089111, "learning_rate": 2e-05, "loss": 0.04490322, "step": 16898 }, { "epoch": 33.798, "grad_norm": 1.267382025718689, "learning_rate": 2e-05, "loss": 0.04597627, "step": 16899 }, { "epoch": 33.8, "grad_norm": 1.0230637788772583, "learning_rate": 2e-05, "loss": 0.04359692, "step": 16900 }, { "epoch": 33.802, "grad_norm": 1.1223065853118896, "learning_rate": 2e-05, "loss": 0.05198922, "step": 16901 }, { "epoch": 33.804, "grad_norm": 0.8443502187728882, "learning_rate": 2e-05, "loss": 0.02532313, "step": 16902 }, { "epoch": 33.806, "grad_norm": 0.9305548071861267, "learning_rate": 2e-05, "loss": 0.03588405, "step": 16903 }, { "epoch": 33.808, "grad_norm": 1.7526792287826538, "learning_rate": 2e-05, "loss": 0.05734141, "step": 16904 }, { "epoch": 33.81, "grad_norm": 1.0317646265029907, "learning_rate": 2e-05, "loss": 0.0394004, "step": 16905 }, { "epoch": 33.812, "grad_norm": 1.6118589639663696, "learning_rate": 2e-05, "loss": 0.04723007, "step": 16906 }, { "epoch": 33.814, "grad_norm": 1.0433012247085571, "learning_rate": 2e-05, "loss": 0.03369412, "step": 16907 }, { "epoch": 33.816, "grad_norm": 1.395377516746521, "learning_rate": 2e-05, "loss": 0.05028417, "step": 16908 }, { "epoch": 33.818, "grad_norm": 1.0370322465896606, "learning_rate": 2e-05, "loss": 0.03931456, "step": 16909 }, { "epoch": 33.82, "grad_norm": 0.9001449346542358, "learning_rate": 2e-05, "loss": 0.04309556, "step": 16910 }, { "epoch": 33.822, "grad_norm": 1.033655047416687, "learning_rate": 2e-05, "loss": 0.04509419, "step": 16911 }, { "epoch": 33.824, "grad_norm": 1.142377257347107, "learning_rate": 2e-05, "loss": 0.04231268, "step": 16912 }, { "epoch": 33.826, "grad_norm": 1.1938881874084473, "learning_rate": 2e-05, "loss": 0.04529939, "step": 16913 }, { "epoch": 33.828, "grad_norm": 1.1064121723175049, "learning_rate": 2e-05, "loss": 0.04935718, "step": 16914 }, { "epoch": 33.83, "grad_norm": 1.0276626348495483, "learning_rate": 2e-05, "loss": 0.04359814, "step": 16915 }, { "epoch": 33.832, "grad_norm": 2.030622959136963, "learning_rate": 2e-05, "loss": 0.04051562, "step": 16916 }, { "epoch": 33.834, "grad_norm": 1.0597505569458008, "learning_rate": 2e-05, "loss": 0.04974733, "step": 16917 }, { "epoch": 33.836, "grad_norm": 1.1788995265960693, "learning_rate": 2e-05, "loss": 0.04477675, "step": 16918 }, { "epoch": 33.838, "grad_norm": 1.919738531112671, "learning_rate": 2e-05, "loss": 0.05079059, "step": 16919 }, { "epoch": 33.84, "grad_norm": 2.4908084869384766, "learning_rate": 2e-05, "loss": 0.05442949, "step": 16920 }, { "epoch": 33.842, "grad_norm": 1.098594307899475, "learning_rate": 2e-05, "loss": 0.04451133, "step": 16921 }, { "epoch": 33.844, "grad_norm": 1.0580613613128662, "learning_rate": 2e-05, "loss": 0.04540512, "step": 16922 }, { "epoch": 33.846, "grad_norm": 1.1829973459243774, "learning_rate": 2e-05, "loss": 0.04305437, "step": 16923 }, { "epoch": 33.848, "grad_norm": 3.6124298572540283, "learning_rate": 2e-05, "loss": 0.04639693, "step": 16924 }, { "epoch": 33.85, "grad_norm": 1.1299645900726318, "learning_rate": 2e-05, "loss": 0.03338608, "step": 16925 }, { "epoch": 33.852, "grad_norm": 1.1065938472747803, "learning_rate": 2e-05, "loss": 0.0466036, "step": 16926 }, { "epoch": 33.854, "grad_norm": 1.0841280221939087, "learning_rate": 2e-05, "loss": 0.04306466, "step": 16927 }, { "epoch": 33.856, "grad_norm": 3.342581033706665, "learning_rate": 2e-05, "loss": 0.05247286, "step": 16928 }, { "epoch": 33.858, "grad_norm": 1.7423268556594849, "learning_rate": 2e-05, "loss": 0.07976319, "step": 16929 }, { "epoch": 33.86, "grad_norm": 0.9743460416793823, "learning_rate": 2e-05, "loss": 0.03803458, "step": 16930 }, { "epoch": 33.862, "grad_norm": 0.9892178177833557, "learning_rate": 2e-05, "loss": 0.0505831, "step": 16931 }, { "epoch": 33.864, "grad_norm": 1.1504428386688232, "learning_rate": 2e-05, "loss": 0.04546354, "step": 16932 }, { "epoch": 33.866, "grad_norm": 1.1111781597137451, "learning_rate": 2e-05, "loss": 0.04097139, "step": 16933 }, { "epoch": 33.868, "grad_norm": 1.0759490728378296, "learning_rate": 2e-05, "loss": 0.0341738, "step": 16934 }, { "epoch": 33.87, "grad_norm": 0.8595969080924988, "learning_rate": 2e-05, "loss": 0.03205074, "step": 16935 }, { "epoch": 33.872, "grad_norm": 0.9404729604721069, "learning_rate": 2e-05, "loss": 0.03432872, "step": 16936 }, { "epoch": 33.874, "grad_norm": 1.0249055624008179, "learning_rate": 2e-05, "loss": 0.0409686, "step": 16937 }, { "epoch": 33.876, "grad_norm": 1.2847564220428467, "learning_rate": 2e-05, "loss": 0.04386246, "step": 16938 }, { "epoch": 33.878, "grad_norm": 0.9297494292259216, "learning_rate": 2e-05, "loss": 0.03813757, "step": 16939 }, { "epoch": 33.88, "grad_norm": 1.110254168510437, "learning_rate": 2e-05, "loss": 0.03905823, "step": 16940 }, { "epoch": 33.882, "grad_norm": 1.7914894819259644, "learning_rate": 2e-05, "loss": 0.03943986, "step": 16941 }, { "epoch": 33.884, "grad_norm": 1.8473254442214966, "learning_rate": 2e-05, "loss": 0.05671333, "step": 16942 }, { "epoch": 33.886, "grad_norm": 1.9011701345443726, "learning_rate": 2e-05, "loss": 0.06308839, "step": 16943 }, { "epoch": 33.888, "grad_norm": 1.2795780897140503, "learning_rate": 2e-05, "loss": 0.05465973, "step": 16944 }, { "epoch": 33.89, "grad_norm": 0.9979121088981628, "learning_rate": 2e-05, "loss": 0.03175057, "step": 16945 }, { "epoch": 33.892, "grad_norm": 0.9960652589797974, "learning_rate": 2e-05, "loss": 0.03760148, "step": 16946 }, { "epoch": 33.894, "grad_norm": 1.0979626178741455, "learning_rate": 2e-05, "loss": 0.04225897, "step": 16947 }, { "epoch": 33.896, "grad_norm": 1.9761242866516113, "learning_rate": 2e-05, "loss": 0.06114539, "step": 16948 }, { "epoch": 33.898, "grad_norm": 1.7545679807662964, "learning_rate": 2e-05, "loss": 0.05525258, "step": 16949 }, { "epoch": 33.9, "grad_norm": 2.1747097969055176, "learning_rate": 2e-05, "loss": 0.06167426, "step": 16950 }, { "epoch": 33.902, "grad_norm": 1.2962758541107178, "learning_rate": 2e-05, "loss": 0.04283836, "step": 16951 }, { "epoch": 33.904, "grad_norm": 1.1787168979644775, "learning_rate": 2e-05, "loss": 0.03638425, "step": 16952 }, { "epoch": 33.906, "grad_norm": 1.0658072233200073, "learning_rate": 2e-05, "loss": 0.03799438, "step": 16953 }, { "epoch": 33.908, "grad_norm": 1.0449610948562622, "learning_rate": 2e-05, "loss": 0.04288002, "step": 16954 }, { "epoch": 33.91, "grad_norm": 0.9410510063171387, "learning_rate": 2e-05, "loss": 0.03957549, "step": 16955 }, { "epoch": 33.912, "grad_norm": 0.8891138434410095, "learning_rate": 2e-05, "loss": 0.03369577, "step": 16956 }, { "epoch": 33.914, "grad_norm": 1.6849256753921509, "learning_rate": 2e-05, "loss": 0.04120582, "step": 16957 }, { "epoch": 33.916, "grad_norm": 1.3625929355621338, "learning_rate": 2e-05, "loss": 0.03724352, "step": 16958 }, { "epoch": 33.918, "grad_norm": 1.1941505670547485, "learning_rate": 2e-05, "loss": 0.03642944, "step": 16959 }, { "epoch": 33.92, "grad_norm": 1.2956150770187378, "learning_rate": 2e-05, "loss": 0.03858388, "step": 16960 }, { "epoch": 33.922, "grad_norm": 1.2962173223495483, "learning_rate": 2e-05, "loss": 0.03574048, "step": 16961 }, { "epoch": 33.924, "grad_norm": 1.6249195337295532, "learning_rate": 2e-05, "loss": 0.05512132, "step": 16962 }, { "epoch": 33.926, "grad_norm": 1.317758321762085, "learning_rate": 2e-05, "loss": 0.03322713, "step": 16963 }, { "epoch": 33.928, "grad_norm": 1.0346006155014038, "learning_rate": 2e-05, "loss": 0.05115522, "step": 16964 }, { "epoch": 33.93, "grad_norm": 2.8760225772857666, "learning_rate": 2e-05, "loss": 0.02724821, "step": 16965 }, { "epoch": 33.932, "grad_norm": 1.096021294593811, "learning_rate": 2e-05, "loss": 0.04182556, "step": 16966 }, { "epoch": 33.934, "grad_norm": 1.1291561126708984, "learning_rate": 2e-05, "loss": 0.04082775, "step": 16967 }, { "epoch": 33.936, "grad_norm": 0.971754252910614, "learning_rate": 2e-05, "loss": 0.04104783, "step": 16968 }, { "epoch": 33.938, "grad_norm": 1.338118076324463, "learning_rate": 2e-05, "loss": 0.0264123, "step": 16969 }, { "epoch": 33.94, "grad_norm": 1.0293909311294556, "learning_rate": 2e-05, "loss": 0.04426341, "step": 16970 }, { "epoch": 33.942, "grad_norm": 1.1127252578735352, "learning_rate": 2e-05, "loss": 0.03996481, "step": 16971 }, { "epoch": 33.944, "grad_norm": 1.0948736667633057, "learning_rate": 2e-05, "loss": 0.04865056, "step": 16972 }, { "epoch": 33.946, "grad_norm": 1.1186541318893433, "learning_rate": 2e-05, "loss": 0.04746978, "step": 16973 }, { "epoch": 33.948, "grad_norm": 0.9960750341415405, "learning_rate": 2e-05, "loss": 0.03804594, "step": 16974 }, { "epoch": 33.95, "grad_norm": 1.2927501201629639, "learning_rate": 2e-05, "loss": 0.05450273, "step": 16975 }, { "epoch": 33.952, "grad_norm": 1.4125611782073975, "learning_rate": 2e-05, "loss": 0.05608493, "step": 16976 }, { "epoch": 33.954, "grad_norm": 0.9154422879219055, "learning_rate": 2e-05, "loss": 0.04654478, "step": 16977 }, { "epoch": 33.956, "grad_norm": 0.8705936670303345, "learning_rate": 2e-05, "loss": 0.02712861, "step": 16978 }, { "epoch": 33.958, "grad_norm": 1.2532387971878052, "learning_rate": 2e-05, "loss": 0.05182055, "step": 16979 }, { "epoch": 33.96, "grad_norm": 1.1744275093078613, "learning_rate": 2e-05, "loss": 0.04112976, "step": 16980 }, { "epoch": 33.962, "grad_norm": 1.4567869901657104, "learning_rate": 2e-05, "loss": 0.0405011, "step": 16981 }, { "epoch": 33.964, "grad_norm": 1.130476713180542, "learning_rate": 2e-05, "loss": 0.03737068, "step": 16982 }, { "epoch": 33.966, "grad_norm": 1.2590807676315308, "learning_rate": 2e-05, "loss": 0.04335234, "step": 16983 }, { "epoch": 33.968, "grad_norm": 1.120628833770752, "learning_rate": 2e-05, "loss": 0.04267873, "step": 16984 }, { "epoch": 33.97, "grad_norm": 1.123035192489624, "learning_rate": 2e-05, "loss": 0.050589, "step": 16985 }, { "epoch": 33.972, "grad_norm": 1.0445022583007812, "learning_rate": 2e-05, "loss": 0.03830654, "step": 16986 }, { "epoch": 33.974, "grad_norm": 1.1337977647781372, "learning_rate": 2e-05, "loss": 0.04412445, "step": 16987 }, { "epoch": 33.976, "grad_norm": 1.0844800472259521, "learning_rate": 2e-05, "loss": 0.03503221, "step": 16988 }, { "epoch": 33.978, "grad_norm": 1.723754644393921, "learning_rate": 2e-05, "loss": 0.04388619, "step": 16989 }, { "epoch": 33.98, "grad_norm": 1.293534755706787, "learning_rate": 2e-05, "loss": 0.05169663, "step": 16990 }, { "epoch": 33.982, "grad_norm": 0.9156273007392883, "learning_rate": 2e-05, "loss": 0.0327623, "step": 16991 }, { "epoch": 33.984, "grad_norm": 1.0908451080322266, "learning_rate": 2e-05, "loss": 0.05295175, "step": 16992 }, { "epoch": 33.986, "grad_norm": 1.2871785163879395, "learning_rate": 2e-05, "loss": 0.0453411, "step": 16993 }, { "epoch": 33.988, "grad_norm": 0.9044437408447266, "learning_rate": 2e-05, "loss": 0.02947322, "step": 16994 }, { "epoch": 33.99, "grad_norm": 1.4121615886688232, "learning_rate": 2e-05, "loss": 0.0451433, "step": 16995 }, { "epoch": 33.992, "grad_norm": 1.1805915832519531, "learning_rate": 2e-05, "loss": 0.0479506, "step": 16996 }, { "epoch": 33.994, "grad_norm": 1.3413439989089966, "learning_rate": 2e-05, "loss": 0.04659811, "step": 16997 }, { "epoch": 33.996, "grad_norm": 1.2541238069534302, "learning_rate": 2e-05, "loss": 0.0541524, "step": 16998 }, { "epoch": 33.998, "grad_norm": 0.9893317818641663, "learning_rate": 2e-05, "loss": 0.03523817, "step": 16999 }, { "epoch": 34.0, "grad_norm": 1.0588018894195557, "learning_rate": 2e-05, "loss": 0.02352007, "step": 17000 }, { "epoch": 34.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.994, "AngleClassification_3": 0.9700598802395209, "Equal_1": 0.996, "Equal_2": 0.9800399201596807, "Equal_3": 0.9780439121756487, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8957915831663327, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.9932000000000001, "PointLiesOnLine_1": 0.9879759519038076, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 224.5247, "eval_samples_per_second": 46.765, "eval_steps_per_second": 0.935, "step": 17000 }, { "epoch": 34.002, "grad_norm": 1.0813182592391968, "learning_rate": 2e-05, "loss": 0.04775403, "step": 17001 }, { "epoch": 34.004, "grad_norm": 1.79885995388031, "learning_rate": 2e-05, "loss": 0.06196499, "step": 17002 }, { "epoch": 34.006, "grad_norm": 9.993346214294434, "learning_rate": 2e-05, "loss": 0.04487408, "step": 17003 }, { "epoch": 34.008, "grad_norm": 1.0250211954116821, "learning_rate": 2e-05, "loss": 0.03856594, "step": 17004 }, { "epoch": 34.01, "grad_norm": 1.3865070343017578, "learning_rate": 2e-05, "loss": 0.05671635, "step": 17005 }, { "epoch": 34.012, "grad_norm": 0.9823437333106995, "learning_rate": 2e-05, "loss": 0.02985733, "step": 17006 }, { "epoch": 34.014, "grad_norm": 1.0553736686706543, "learning_rate": 2e-05, "loss": 0.04916045, "step": 17007 }, { "epoch": 34.016, "grad_norm": 0.9354760050773621, "learning_rate": 2e-05, "loss": 0.04175593, "step": 17008 }, { "epoch": 34.018, "grad_norm": 1.2979862689971924, "learning_rate": 2e-05, "loss": 0.0550199, "step": 17009 }, { "epoch": 34.02, "grad_norm": 0.82009357213974, "learning_rate": 2e-05, "loss": 0.03363721, "step": 17010 }, { "epoch": 34.022, "grad_norm": 1.0758121013641357, "learning_rate": 2e-05, "loss": 0.04446154, "step": 17011 }, { "epoch": 34.024, "grad_norm": 1.2119770050048828, "learning_rate": 2e-05, "loss": 0.04576187, "step": 17012 }, { "epoch": 34.026, "grad_norm": 1.0190367698669434, "learning_rate": 2e-05, "loss": 0.04084958, "step": 17013 }, { "epoch": 34.028, "grad_norm": 1.7401471138000488, "learning_rate": 2e-05, "loss": 0.0433848, "step": 17014 }, { "epoch": 34.03, "grad_norm": 1.412846326828003, "learning_rate": 2e-05, "loss": 0.06149884, "step": 17015 }, { "epoch": 34.032, "grad_norm": 1.255092740058899, "learning_rate": 2e-05, "loss": 0.05171508, "step": 17016 }, { "epoch": 34.034, "grad_norm": 1.0410019159317017, "learning_rate": 2e-05, "loss": 0.03645308, "step": 17017 }, { "epoch": 34.036, "grad_norm": 1.1988375186920166, "learning_rate": 2e-05, "loss": 0.04271626, "step": 17018 }, { "epoch": 34.038, "grad_norm": 1.0811110734939575, "learning_rate": 2e-05, "loss": 0.04431312, "step": 17019 }, { "epoch": 34.04, "grad_norm": 1.3588173389434814, "learning_rate": 2e-05, "loss": 0.05141532, "step": 17020 }, { "epoch": 34.042, "grad_norm": 1.0977833271026611, "learning_rate": 2e-05, "loss": 0.0505279, "step": 17021 }, { "epoch": 34.044, "grad_norm": 0.9698043465614319, "learning_rate": 2e-05, "loss": 0.04233631, "step": 17022 }, { "epoch": 34.046, "grad_norm": 1.0102258920669556, "learning_rate": 2e-05, "loss": 0.04521672, "step": 17023 }, { "epoch": 34.048, "grad_norm": 1.1435089111328125, "learning_rate": 2e-05, "loss": 0.04352826, "step": 17024 }, { "epoch": 34.05, "grad_norm": 1.225576400756836, "learning_rate": 2e-05, "loss": 0.0584626, "step": 17025 }, { "epoch": 34.052, "grad_norm": 1.345522165298462, "learning_rate": 2e-05, "loss": 0.06320105, "step": 17026 }, { "epoch": 34.054, "grad_norm": 1.4009912014007568, "learning_rate": 2e-05, "loss": 0.04466003, "step": 17027 }, { "epoch": 34.056, "grad_norm": 1.2241253852844238, "learning_rate": 2e-05, "loss": 0.04635874, "step": 17028 }, { "epoch": 34.058, "grad_norm": 0.8747177124023438, "learning_rate": 2e-05, "loss": 0.02473179, "step": 17029 }, { "epoch": 34.06, "grad_norm": 1.1722195148468018, "learning_rate": 2e-05, "loss": 0.0596193, "step": 17030 }, { "epoch": 34.062, "grad_norm": 1.0516356229782104, "learning_rate": 2e-05, "loss": 0.04335109, "step": 17031 }, { "epoch": 34.064, "grad_norm": 0.9199697971343994, "learning_rate": 2e-05, "loss": 0.04237282, "step": 17032 }, { "epoch": 34.066, "grad_norm": 1.2853758335113525, "learning_rate": 2e-05, "loss": 0.05734375, "step": 17033 }, { "epoch": 34.068, "grad_norm": 0.9952258467674255, "learning_rate": 2e-05, "loss": 0.04137858, "step": 17034 }, { "epoch": 34.07, "grad_norm": 0.9764089584350586, "learning_rate": 2e-05, "loss": 0.04493326, "step": 17035 }, { "epoch": 34.072, "grad_norm": 1.0238741636276245, "learning_rate": 2e-05, "loss": 0.02735502, "step": 17036 }, { "epoch": 34.074, "grad_norm": 1.0455256700515747, "learning_rate": 2e-05, "loss": 0.04233541, "step": 17037 }, { "epoch": 34.076, "grad_norm": 2.609733819961548, "learning_rate": 2e-05, "loss": 0.0593502, "step": 17038 }, { "epoch": 34.078, "grad_norm": 0.9262585639953613, "learning_rate": 2e-05, "loss": 0.04030884, "step": 17039 }, { "epoch": 34.08, "grad_norm": 1.331033706665039, "learning_rate": 2e-05, "loss": 0.03557055, "step": 17040 }, { "epoch": 34.082, "grad_norm": 1.259609341621399, "learning_rate": 2e-05, "loss": 0.04652098, "step": 17041 }, { "epoch": 34.084, "grad_norm": 1.8722888231277466, "learning_rate": 2e-05, "loss": 0.04107783, "step": 17042 }, { "epoch": 34.086, "grad_norm": 1.1356465816497803, "learning_rate": 2e-05, "loss": 0.05214574, "step": 17043 }, { "epoch": 34.088, "grad_norm": 1.11617910861969, "learning_rate": 2e-05, "loss": 0.04913379, "step": 17044 }, { "epoch": 34.09, "grad_norm": 1.4429476261138916, "learning_rate": 2e-05, "loss": 0.06344774, "step": 17045 }, { "epoch": 34.092, "grad_norm": 1.139707088470459, "learning_rate": 2e-05, "loss": 0.03199575, "step": 17046 }, { "epoch": 34.094, "grad_norm": 1.6412746906280518, "learning_rate": 2e-05, "loss": 0.04055404, "step": 17047 }, { "epoch": 34.096, "grad_norm": 1.0526760816574097, "learning_rate": 2e-05, "loss": 0.04357124, "step": 17048 }, { "epoch": 34.098, "grad_norm": 2.819547653198242, "learning_rate": 2e-05, "loss": 0.07075398, "step": 17049 }, { "epoch": 34.1, "grad_norm": 0.9028600454330444, "learning_rate": 2e-05, "loss": 0.03504702, "step": 17050 }, { "epoch": 34.102, "grad_norm": 2.2238333225250244, "learning_rate": 2e-05, "loss": 0.07137491, "step": 17051 }, { "epoch": 34.104, "grad_norm": 0.6745779514312744, "learning_rate": 2e-05, "loss": 0.01565693, "step": 17052 }, { "epoch": 34.106, "grad_norm": 1.0407601594924927, "learning_rate": 2e-05, "loss": 0.03353124, "step": 17053 }, { "epoch": 34.108, "grad_norm": 1.653902530670166, "learning_rate": 2e-05, "loss": 0.05800138, "step": 17054 }, { "epoch": 34.11, "grad_norm": 1.451026201248169, "learning_rate": 2e-05, "loss": 0.0496862, "step": 17055 }, { "epoch": 34.112, "grad_norm": 1.47234046459198, "learning_rate": 2e-05, "loss": 0.05115438, "step": 17056 }, { "epoch": 34.114, "grad_norm": 4.454383850097656, "learning_rate": 2e-05, "loss": 0.05116962, "step": 17057 }, { "epoch": 34.116, "grad_norm": 1.4397034645080566, "learning_rate": 2e-05, "loss": 0.03358687, "step": 17058 }, { "epoch": 34.118, "grad_norm": 1.5460132360458374, "learning_rate": 2e-05, "loss": 0.05168049, "step": 17059 }, { "epoch": 34.12, "grad_norm": 1.117152452468872, "learning_rate": 2e-05, "loss": 0.04947547, "step": 17060 }, { "epoch": 34.122, "grad_norm": 1.1949907541275024, "learning_rate": 2e-05, "loss": 0.0322676, "step": 17061 }, { "epoch": 34.124, "grad_norm": 1.4172089099884033, "learning_rate": 2e-05, "loss": 0.05783451, "step": 17062 }, { "epoch": 34.126, "grad_norm": 1.0522807836532593, "learning_rate": 2e-05, "loss": 0.0373672, "step": 17063 }, { "epoch": 34.128, "grad_norm": 1.039962649345398, "learning_rate": 2e-05, "loss": 0.05063032, "step": 17064 }, { "epoch": 34.13, "grad_norm": 1.226788878440857, "learning_rate": 2e-05, "loss": 0.04751919, "step": 17065 }, { "epoch": 34.132, "grad_norm": 2.3012855052948, "learning_rate": 2e-05, "loss": 0.05077946, "step": 17066 }, { "epoch": 34.134, "grad_norm": 0.9739378690719604, "learning_rate": 2e-05, "loss": 0.03525701, "step": 17067 }, { "epoch": 34.136, "grad_norm": 1.1725682020187378, "learning_rate": 2e-05, "loss": 0.05755719, "step": 17068 }, { "epoch": 34.138, "grad_norm": 1.0152853727340698, "learning_rate": 2e-05, "loss": 0.03319298, "step": 17069 }, { "epoch": 34.14, "grad_norm": 0.8973001837730408, "learning_rate": 2e-05, "loss": 0.03391767, "step": 17070 }, { "epoch": 34.142, "grad_norm": 1.3244316577911377, "learning_rate": 2e-05, "loss": 0.06148648, "step": 17071 }, { "epoch": 34.144, "grad_norm": 1.3709831237792969, "learning_rate": 2e-05, "loss": 0.05361862, "step": 17072 }, { "epoch": 34.146, "grad_norm": 1.0121313333511353, "learning_rate": 2e-05, "loss": 0.05141288, "step": 17073 }, { "epoch": 34.148, "grad_norm": 1.0701656341552734, "learning_rate": 2e-05, "loss": 0.05238366, "step": 17074 }, { "epoch": 34.15, "grad_norm": 0.9535888433456421, "learning_rate": 2e-05, "loss": 0.03243857, "step": 17075 }, { "epoch": 34.152, "grad_norm": 2.0076236724853516, "learning_rate": 2e-05, "loss": 0.06148713, "step": 17076 }, { "epoch": 34.154, "grad_norm": 1.4813634157180786, "learning_rate": 2e-05, "loss": 0.03409386, "step": 17077 }, { "epoch": 34.156, "grad_norm": 1.0397002696990967, "learning_rate": 2e-05, "loss": 0.04693798, "step": 17078 }, { "epoch": 34.158, "grad_norm": 1.2457900047302246, "learning_rate": 2e-05, "loss": 0.05045411, "step": 17079 }, { "epoch": 34.16, "grad_norm": 1.300113558769226, "learning_rate": 2e-05, "loss": 0.03491394, "step": 17080 }, { "epoch": 34.162, "grad_norm": 1.0805597305297852, "learning_rate": 2e-05, "loss": 0.04618974, "step": 17081 }, { "epoch": 34.164, "grad_norm": 0.9743577837944031, "learning_rate": 2e-05, "loss": 0.04028146, "step": 17082 }, { "epoch": 34.166, "grad_norm": 1.0597054958343506, "learning_rate": 2e-05, "loss": 0.04748031, "step": 17083 }, { "epoch": 34.168, "grad_norm": 0.8433523178100586, "learning_rate": 2e-05, "loss": 0.02639165, "step": 17084 }, { "epoch": 34.17, "grad_norm": 1.0904850959777832, "learning_rate": 2e-05, "loss": 0.04953726, "step": 17085 }, { "epoch": 34.172, "grad_norm": 1.2517693042755127, "learning_rate": 2e-05, "loss": 0.03420606, "step": 17086 }, { "epoch": 34.174, "grad_norm": 1.3932130336761475, "learning_rate": 2e-05, "loss": 0.03660133, "step": 17087 }, { "epoch": 34.176, "grad_norm": 1.8332695960998535, "learning_rate": 2e-05, "loss": 0.05497397, "step": 17088 }, { "epoch": 34.178, "grad_norm": 1.2887295484542847, "learning_rate": 2e-05, "loss": 0.06870924, "step": 17089 }, { "epoch": 34.18, "grad_norm": 1.1736273765563965, "learning_rate": 2e-05, "loss": 0.05270137, "step": 17090 }, { "epoch": 34.182, "grad_norm": 1.1702964305877686, "learning_rate": 2e-05, "loss": 0.04671011, "step": 17091 }, { "epoch": 34.184, "grad_norm": 1.3048673868179321, "learning_rate": 2e-05, "loss": 0.03825036, "step": 17092 }, { "epoch": 34.186, "grad_norm": 1.6871826648712158, "learning_rate": 2e-05, "loss": 0.05655403, "step": 17093 }, { "epoch": 34.188, "grad_norm": 1.0636309385299683, "learning_rate": 2e-05, "loss": 0.04244737, "step": 17094 }, { "epoch": 34.19, "grad_norm": 1.0260772705078125, "learning_rate": 2e-05, "loss": 0.03519968, "step": 17095 }, { "epoch": 34.192, "grad_norm": 1.0678563117980957, "learning_rate": 2e-05, "loss": 0.04407102, "step": 17096 }, { "epoch": 34.194, "grad_norm": 1.4780380725860596, "learning_rate": 2e-05, "loss": 0.05176915, "step": 17097 }, { "epoch": 34.196, "grad_norm": 1.1720207929611206, "learning_rate": 2e-05, "loss": 0.05116225, "step": 17098 }, { "epoch": 34.198, "grad_norm": 1.1889187097549438, "learning_rate": 2e-05, "loss": 0.05367809, "step": 17099 }, { "epoch": 34.2, "grad_norm": 0.9148311018943787, "learning_rate": 2e-05, "loss": 0.04013107, "step": 17100 }, { "epoch": 34.202, "grad_norm": 3.55346941947937, "learning_rate": 2e-05, "loss": 0.0505816, "step": 17101 }, { "epoch": 34.204, "grad_norm": 1.5328108072280884, "learning_rate": 2e-05, "loss": 0.0514542, "step": 17102 }, { "epoch": 34.206, "grad_norm": 0.9438298344612122, "learning_rate": 2e-05, "loss": 0.03244053, "step": 17103 }, { "epoch": 34.208, "grad_norm": 1.0209169387817383, "learning_rate": 2e-05, "loss": 0.03285018, "step": 17104 }, { "epoch": 34.21, "grad_norm": 1.2115740776062012, "learning_rate": 2e-05, "loss": 0.06700765, "step": 17105 }, { "epoch": 34.212, "grad_norm": 1.540995478630066, "learning_rate": 2e-05, "loss": 0.05693542, "step": 17106 }, { "epoch": 34.214, "grad_norm": 1.1210746765136719, "learning_rate": 2e-05, "loss": 0.03737418, "step": 17107 }, { "epoch": 34.216, "grad_norm": 2.2489943504333496, "learning_rate": 2e-05, "loss": 0.04224968, "step": 17108 }, { "epoch": 34.218, "grad_norm": 1.154567003250122, "learning_rate": 2e-05, "loss": 0.05113642, "step": 17109 }, { "epoch": 34.22, "grad_norm": 0.8803784847259521, "learning_rate": 2e-05, "loss": 0.03221222, "step": 17110 }, { "epoch": 34.222, "grad_norm": 1.251283884048462, "learning_rate": 2e-05, "loss": 0.06108729, "step": 17111 }, { "epoch": 34.224, "grad_norm": 1.1169072389602661, "learning_rate": 2e-05, "loss": 0.04225818, "step": 17112 }, { "epoch": 34.226, "grad_norm": 1.3303180932998657, "learning_rate": 2e-05, "loss": 0.03801825, "step": 17113 }, { "epoch": 34.228, "grad_norm": 1.6138463020324707, "learning_rate": 2e-05, "loss": 0.05414141, "step": 17114 }, { "epoch": 34.23, "grad_norm": 0.9638254046440125, "learning_rate": 2e-05, "loss": 0.04354844, "step": 17115 }, { "epoch": 34.232, "grad_norm": 2.520341396331787, "learning_rate": 2e-05, "loss": 0.05455326, "step": 17116 }, { "epoch": 34.234, "grad_norm": 0.9619042277336121, "learning_rate": 2e-05, "loss": 0.02963806, "step": 17117 }, { "epoch": 34.236, "grad_norm": 1.1978199481964111, "learning_rate": 2e-05, "loss": 0.043663, "step": 17118 }, { "epoch": 34.238, "grad_norm": 1.0521676540374756, "learning_rate": 2e-05, "loss": 0.04142124, "step": 17119 }, { "epoch": 34.24, "grad_norm": 0.9165552258491516, "learning_rate": 2e-05, "loss": 0.04432365, "step": 17120 }, { "epoch": 34.242, "grad_norm": 0.9963357448577881, "learning_rate": 2e-05, "loss": 0.05390811, "step": 17121 }, { "epoch": 34.244, "grad_norm": 1.2486971616744995, "learning_rate": 2e-05, "loss": 0.04316922, "step": 17122 }, { "epoch": 34.246, "grad_norm": 0.8598348498344421, "learning_rate": 2e-05, "loss": 0.02887846, "step": 17123 }, { "epoch": 34.248, "grad_norm": 1.636795997619629, "learning_rate": 2e-05, "loss": 0.04200181, "step": 17124 }, { "epoch": 34.25, "grad_norm": 2.1948068141937256, "learning_rate": 2e-05, "loss": 0.05976803, "step": 17125 }, { "epoch": 34.252, "grad_norm": 1.3988673686981201, "learning_rate": 2e-05, "loss": 0.06815295, "step": 17126 }, { "epoch": 34.254, "grad_norm": 1.8134987354278564, "learning_rate": 2e-05, "loss": 0.05416913, "step": 17127 }, { "epoch": 34.256, "grad_norm": 1.218163251876831, "learning_rate": 2e-05, "loss": 0.05030614, "step": 17128 }, { "epoch": 34.258, "grad_norm": 1.3112636804580688, "learning_rate": 2e-05, "loss": 0.04937346, "step": 17129 }, { "epoch": 34.26, "grad_norm": 1.0734925270080566, "learning_rate": 2e-05, "loss": 0.04259723, "step": 17130 }, { "epoch": 34.262, "grad_norm": 2.0758414268493652, "learning_rate": 2e-05, "loss": 0.03459087, "step": 17131 }, { "epoch": 34.264, "grad_norm": 1.2600023746490479, "learning_rate": 2e-05, "loss": 0.04453474, "step": 17132 }, { "epoch": 34.266, "grad_norm": 6.241335391998291, "learning_rate": 2e-05, "loss": 0.07125821, "step": 17133 }, { "epoch": 34.268, "grad_norm": 1.1067910194396973, "learning_rate": 2e-05, "loss": 0.04146163, "step": 17134 }, { "epoch": 34.27, "grad_norm": 1.4377071857452393, "learning_rate": 2e-05, "loss": 0.05019745, "step": 17135 }, { "epoch": 34.272, "grad_norm": 0.8701745867729187, "learning_rate": 2e-05, "loss": 0.02947097, "step": 17136 }, { "epoch": 34.274, "grad_norm": 1.793082594871521, "learning_rate": 2e-05, "loss": 0.04656201, "step": 17137 }, { "epoch": 34.276, "grad_norm": 1.42355477809906, "learning_rate": 2e-05, "loss": 0.04446046, "step": 17138 }, { "epoch": 34.278, "grad_norm": 1.2346354722976685, "learning_rate": 2e-05, "loss": 0.05122849, "step": 17139 }, { "epoch": 34.28, "grad_norm": 1.2974516153335571, "learning_rate": 2e-05, "loss": 0.04853995, "step": 17140 }, { "epoch": 34.282, "grad_norm": 5.939311981201172, "learning_rate": 2e-05, "loss": 0.0425652, "step": 17141 }, { "epoch": 34.284, "grad_norm": 1.5456867218017578, "learning_rate": 2e-05, "loss": 0.04539612, "step": 17142 }, { "epoch": 34.286, "grad_norm": 1.3790383338928223, "learning_rate": 2e-05, "loss": 0.0601688, "step": 17143 }, { "epoch": 34.288, "grad_norm": 1.6992055177688599, "learning_rate": 2e-05, "loss": 0.03951364, "step": 17144 }, { "epoch": 34.29, "grad_norm": 1.1658952236175537, "learning_rate": 2e-05, "loss": 0.06133916, "step": 17145 }, { "epoch": 34.292, "grad_norm": 1.222755789756775, "learning_rate": 2e-05, "loss": 0.04160428, "step": 17146 }, { "epoch": 34.294, "grad_norm": 1.9384597539901733, "learning_rate": 2e-05, "loss": 0.05730987, "step": 17147 }, { "epoch": 34.296, "grad_norm": 1.0469677448272705, "learning_rate": 2e-05, "loss": 0.0340088, "step": 17148 }, { "epoch": 34.298, "grad_norm": 4.811890602111816, "learning_rate": 2e-05, "loss": 0.06405757, "step": 17149 }, { "epoch": 34.3, "grad_norm": 1.0627214908599854, "learning_rate": 2e-05, "loss": 0.0434816, "step": 17150 }, { "epoch": 34.302, "grad_norm": 1.1335147619247437, "learning_rate": 2e-05, "loss": 0.04534355, "step": 17151 }, { "epoch": 34.304, "grad_norm": 1.0481853485107422, "learning_rate": 2e-05, "loss": 0.04281599, "step": 17152 }, { "epoch": 34.306, "grad_norm": 0.9987990856170654, "learning_rate": 2e-05, "loss": 0.04687274, "step": 17153 }, { "epoch": 34.308, "grad_norm": 0.8880828022956848, "learning_rate": 2e-05, "loss": 0.03379878, "step": 17154 }, { "epoch": 34.31, "grad_norm": 3.4225730895996094, "learning_rate": 2e-05, "loss": 0.04249279, "step": 17155 }, { "epoch": 34.312, "grad_norm": 0.9572854042053223, "learning_rate": 2e-05, "loss": 0.03737252, "step": 17156 }, { "epoch": 34.314, "grad_norm": 1.0833467245101929, "learning_rate": 2e-05, "loss": 0.0478721, "step": 17157 }, { "epoch": 34.316, "grad_norm": 1.6381075382232666, "learning_rate": 2e-05, "loss": 0.05687353, "step": 17158 }, { "epoch": 34.318, "grad_norm": 1.0394964218139648, "learning_rate": 2e-05, "loss": 0.04196269, "step": 17159 }, { "epoch": 34.32, "grad_norm": 1.2679455280303955, "learning_rate": 2e-05, "loss": 0.03954285, "step": 17160 }, { "epoch": 34.322, "grad_norm": 3.54329776763916, "learning_rate": 2e-05, "loss": 0.05063166, "step": 17161 }, { "epoch": 34.324, "grad_norm": 1.1054974794387817, "learning_rate": 2e-05, "loss": 0.04165071, "step": 17162 }, { "epoch": 34.326, "grad_norm": 1.2040807008743286, "learning_rate": 2e-05, "loss": 0.03694673, "step": 17163 }, { "epoch": 34.328, "grad_norm": 0.9723226428031921, "learning_rate": 2e-05, "loss": 0.0387465, "step": 17164 }, { "epoch": 34.33, "grad_norm": 1.7822750806808472, "learning_rate": 2e-05, "loss": 0.06982434, "step": 17165 }, { "epoch": 34.332, "grad_norm": 0.8794524073600769, "learning_rate": 2e-05, "loss": 0.03231228, "step": 17166 }, { "epoch": 34.334, "grad_norm": 1.2693003416061401, "learning_rate": 2e-05, "loss": 0.04900923, "step": 17167 }, { "epoch": 34.336, "grad_norm": 1.255731463432312, "learning_rate": 2e-05, "loss": 0.04806737, "step": 17168 }, { "epoch": 34.338, "grad_norm": 1.0378990173339844, "learning_rate": 2e-05, "loss": 0.04312072, "step": 17169 }, { "epoch": 34.34, "grad_norm": 1.7802931070327759, "learning_rate": 2e-05, "loss": 0.03918452, "step": 17170 }, { "epoch": 34.342, "grad_norm": 0.8944633603096008, "learning_rate": 2e-05, "loss": 0.03537128, "step": 17171 }, { "epoch": 34.344, "grad_norm": 1.1033025979995728, "learning_rate": 2e-05, "loss": 0.04002212, "step": 17172 }, { "epoch": 34.346, "grad_norm": 2.037821054458618, "learning_rate": 2e-05, "loss": 0.03672659, "step": 17173 }, { "epoch": 34.348, "grad_norm": 1.2263180017471313, "learning_rate": 2e-05, "loss": 0.03930723, "step": 17174 }, { "epoch": 34.35, "grad_norm": 0.9324464201927185, "learning_rate": 2e-05, "loss": 0.0384024, "step": 17175 }, { "epoch": 34.352, "grad_norm": 1.9529005289077759, "learning_rate": 2e-05, "loss": 0.07437837, "step": 17176 }, { "epoch": 34.354, "grad_norm": 1.056804895401001, "learning_rate": 2e-05, "loss": 0.04414742, "step": 17177 }, { "epoch": 34.356, "grad_norm": 1.6486568450927734, "learning_rate": 2e-05, "loss": 0.05605358, "step": 17178 }, { "epoch": 34.358, "grad_norm": 1.8184200525283813, "learning_rate": 2e-05, "loss": 0.07752745, "step": 17179 }, { "epoch": 34.36, "grad_norm": 1.3310307264328003, "learning_rate": 2e-05, "loss": 0.06570064, "step": 17180 }, { "epoch": 34.362, "grad_norm": 1.0654106140136719, "learning_rate": 2e-05, "loss": 0.04631539, "step": 17181 }, { "epoch": 34.364, "grad_norm": 1.4559305906295776, "learning_rate": 2e-05, "loss": 0.02549034, "step": 17182 }, { "epoch": 34.366, "grad_norm": 1.3885945081710815, "learning_rate": 2e-05, "loss": 0.06019825, "step": 17183 }, { "epoch": 34.368, "grad_norm": 1.0742771625518799, "learning_rate": 2e-05, "loss": 0.04950587, "step": 17184 }, { "epoch": 34.37, "grad_norm": 1.0170137882232666, "learning_rate": 2e-05, "loss": 0.04402692, "step": 17185 }, { "epoch": 34.372, "grad_norm": 1.1808441877365112, "learning_rate": 2e-05, "loss": 0.05725706, "step": 17186 }, { "epoch": 34.374, "grad_norm": 1.2643585205078125, "learning_rate": 2e-05, "loss": 0.04692946, "step": 17187 }, { "epoch": 34.376, "grad_norm": 1.8227148056030273, "learning_rate": 2e-05, "loss": 0.06398594, "step": 17188 }, { "epoch": 34.378, "grad_norm": 1.179686188697815, "learning_rate": 2e-05, "loss": 0.04844396, "step": 17189 }, { "epoch": 34.38, "grad_norm": 1.1651767492294312, "learning_rate": 2e-05, "loss": 0.03253875, "step": 17190 }, { "epoch": 34.382, "grad_norm": 1.1371257305145264, "learning_rate": 2e-05, "loss": 0.03852693, "step": 17191 }, { "epoch": 34.384, "grad_norm": 1.3502544164657593, "learning_rate": 2e-05, "loss": 0.04762468, "step": 17192 }, { "epoch": 34.386, "grad_norm": 1.142296552658081, "learning_rate": 2e-05, "loss": 0.04313561, "step": 17193 }, { "epoch": 34.388, "grad_norm": 1.7440104484558105, "learning_rate": 2e-05, "loss": 0.05508664, "step": 17194 }, { "epoch": 34.39, "grad_norm": 1.344459056854248, "learning_rate": 2e-05, "loss": 0.0568133, "step": 17195 }, { "epoch": 34.392, "grad_norm": 1.323464274406433, "learning_rate": 2e-05, "loss": 0.06226826, "step": 17196 }, { "epoch": 34.394, "grad_norm": 0.9054190516471863, "learning_rate": 2e-05, "loss": 0.03479025, "step": 17197 }, { "epoch": 34.396, "grad_norm": 1.7127602100372314, "learning_rate": 2e-05, "loss": 0.05586579, "step": 17198 }, { "epoch": 34.398, "grad_norm": 1.196912169456482, "learning_rate": 2e-05, "loss": 0.04376396, "step": 17199 }, { "epoch": 34.4, "grad_norm": 2.4722952842712402, "learning_rate": 2e-05, "loss": 0.04456051, "step": 17200 }, { "epoch": 34.402, "grad_norm": 1.064764380455017, "learning_rate": 2e-05, "loss": 0.03922116, "step": 17201 }, { "epoch": 34.404, "grad_norm": 1.3010603189468384, "learning_rate": 2e-05, "loss": 0.03888344, "step": 17202 }, { "epoch": 34.406, "grad_norm": 1.2522550821304321, "learning_rate": 2e-05, "loss": 0.05896273, "step": 17203 }, { "epoch": 34.408, "grad_norm": 0.9243654012680054, "learning_rate": 2e-05, "loss": 0.03974604, "step": 17204 }, { "epoch": 34.41, "grad_norm": 1.7199246883392334, "learning_rate": 2e-05, "loss": 0.04469179, "step": 17205 }, { "epoch": 34.412, "grad_norm": 0.9649671316146851, "learning_rate": 2e-05, "loss": 0.02917315, "step": 17206 }, { "epoch": 34.414, "grad_norm": 1.1990541219711304, "learning_rate": 2e-05, "loss": 0.04986425, "step": 17207 }, { "epoch": 34.416, "grad_norm": 0.9723196029663086, "learning_rate": 2e-05, "loss": 0.03854433, "step": 17208 }, { "epoch": 34.418, "grad_norm": 1.0894447565078735, "learning_rate": 2e-05, "loss": 0.04604191, "step": 17209 }, { "epoch": 34.42, "grad_norm": 0.9879698753356934, "learning_rate": 2e-05, "loss": 0.03904828, "step": 17210 }, { "epoch": 34.422, "grad_norm": 1.0254758596420288, "learning_rate": 2e-05, "loss": 0.03631346, "step": 17211 }, { "epoch": 34.424, "grad_norm": 1.7050204277038574, "learning_rate": 2e-05, "loss": 0.04208519, "step": 17212 }, { "epoch": 34.426, "grad_norm": 6.833503246307373, "learning_rate": 2e-05, "loss": 0.04807124, "step": 17213 }, { "epoch": 34.428, "grad_norm": 2.2164909839630127, "learning_rate": 2e-05, "loss": 0.05099214, "step": 17214 }, { "epoch": 34.43, "grad_norm": 1.1032756567001343, "learning_rate": 2e-05, "loss": 0.04240026, "step": 17215 }, { "epoch": 34.432, "grad_norm": 1.570851445198059, "learning_rate": 2e-05, "loss": 0.04978696, "step": 17216 }, { "epoch": 34.434, "grad_norm": 1.141745924949646, "learning_rate": 2e-05, "loss": 0.04013623, "step": 17217 }, { "epoch": 34.436, "grad_norm": 1.0062901973724365, "learning_rate": 2e-05, "loss": 0.03431761, "step": 17218 }, { "epoch": 34.438, "grad_norm": 1.2855632305145264, "learning_rate": 2e-05, "loss": 0.05700184, "step": 17219 }, { "epoch": 34.44, "grad_norm": 0.9581730365753174, "learning_rate": 2e-05, "loss": 0.03853309, "step": 17220 }, { "epoch": 34.442, "grad_norm": 0.9563824534416199, "learning_rate": 2e-05, "loss": 0.03054016, "step": 17221 }, { "epoch": 34.444, "grad_norm": 1.1309500932693481, "learning_rate": 2e-05, "loss": 0.04658124, "step": 17222 }, { "epoch": 34.446, "grad_norm": 1.0278185606002808, "learning_rate": 2e-05, "loss": 0.03786182, "step": 17223 }, { "epoch": 34.448, "grad_norm": 1.2145733833312988, "learning_rate": 2e-05, "loss": 0.04492437, "step": 17224 }, { "epoch": 34.45, "grad_norm": 1.0846318006515503, "learning_rate": 2e-05, "loss": 0.03960449, "step": 17225 }, { "epoch": 34.452, "grad_norm": 1.0192991495132446, "learning_rate": 2e-05, "loss": 0.04288488, "step": 17226 }, { "epoch": 34.454, "grad_norm": 1.144559621810913, "learning_rate": 2e-05, "loss": 0.05009259, "step": 17227 }, { "epoch": 34.456, "grad_norm": 1.1344889402389526, "learning_rate": 2e-05, "loss": 0.04760866, "step": 17228 }, { "epoch": 34.458, "grad_norm": 1.0482244491577148, "learning_rate": 2e-05, "loss": 0.04170975, "step": 17229 }, { "epoch": 34.46, "grad_norm": 1.0298725366592407, "learning_rate": 2e-05, "loss": 0.03907231, "step": 17230 }, { "epoch": 34.462, "grad_norm": 1.0794728994369507, "learning_rate": 2e-05, "loss": 0.05371661, "step": 17231 }, { "epoch": 34.464, "grad_norm": 2.2302026748657227, "learning_rate": 2e-05, "loss": 0.07158783, "step": 17232 }, { "epoch": 34.466, "grad_norm": 0.9466357827186584, "learning_rate": 2e-05, "loss": 0.03780546, "step": 17233 }, { "epoch": 34.468, "grad_norm": 0.9313338994979858, "learning_rate": 2e-05, "loss": 0.03165032, "step": 17234 }, { "epoch": 34.47, "grad_norm": 1.813313364982605, "learning_rate": 2e-05, "loss": 0.06320791, "step": 17235 }, { "epoch": 34.472, "grad_norm": 1.2306967973709106, "learning_rate": 2e-05, "loss": 0.04111777, "step": 17236 }, { "epoch": 34.474, "grad_norm": 1.2754193544387817, "learning_rate": 2e-05, "loss": 0.05048329, "step": 17237 }, { "epoch": 34.476, "grad_norm": 1.0783175230026245, "learning_rate": 2e-05, "loss": 0.02934643, "step": 17238 }, { "epoch": 34.478, "grad_norm": 1.0188827514648438, "learning_rate": 2e-05, "loss": 0.04093478, "step": 17239 }, { "epoch": 34.48, "grad_norm": 0.9061264991760254, "learning_rate": 2e-05, "loss": 0.03135334, "step": 17240 }, { "epoch": 34.482, "grad_norm": 1.0545909404754639, "learning_rate": 2e-05, "loss": 0.04299493, "step": 17241 }, { "epoch": 34.484, "grad_norm": 1.0597087144851685, "learning_rate": 2e-05, "loss": 0.05160675, "step": 17242 }, { "epoch": 34.486, "grad_norm": 1.4301875829696655, "learning_rate": 2e-05, "loss": 0.05078311, "step": 17243 }, { "epoch": 34.488, "grad_norm": 2.4146711826324463, "learning_rate": 2e-05, "loss": 0.05218497, "step": 17244 }, { "epoch": 34.49, "grad_norm": 1.0220338106155396, "learning_rate": 2e-05, "loss": 0.04061006, "step": 17245 }, { "epoch": 34.492, "grad_norm": 1.4312618970870972, "learning_rate": 2e-05, "loss": 0.05962217, "step": 17246 }, { "epoch": 34.494, "grad_norm": 1.2545678615570068, "learning_rate": 2e-05, "loss": 0.05518552, "step": 17247 }, { "epoch": 34.496, "grad_norm": 1.155526876449585, "learning_rate": 2e-05, "loss": 0.0355588, "step": 17248 }, { "epoch": 34.498, "grad_norm": 0.9511224031448364, "learning_rate": 2e-05, "loss": 0.02615048, "step": 17249 }, { "epoch": 34.5, "grad_norm": 1.1427955627441406, "learning_rate": 2e-05, "loss": 0.05111597, "step": 17250 }, { "epoch": 34.502, "grad_norm": 0.9777476191520691, "learning_rate": 2e-05, "loss": 0.02932965, "step": 17251 }, { "epoch": 34.504, "grad_norm": 1.2976810932159424, "learning_rate": 2e-05, "loss": 0.0463884, "step": 17252 }, { "epoch": 34.506, "grad_norm": 0.8444451689720154, "learning_rate": 2e-05, "loss": 0.02426597, "step": 17253 }, { "epoch": 34.508, "grad_norm": 1.2487907409667969, "learning_rate": 2e-05, "loss": 0.0419631, "step": 17254 }, { "epoch": 34.51, "grad_norm": 0.9758622646331787, "learning_rate": 2e-05, "loss": 0.03309517, "step": 17255 }, { "epoch": 34.512, "grad_norm": 2.119077682495117, "learning_rate": 2e-05, "loss": 0.04612582, "step": 17256 }, { "epoch": 34.514, "grad_norm": 0.9678782820701599, "learning_rate": 2e-05, "loss": 0.03152514, "step": 17257 }, { "epoch": 34.516, "grad_norm": 1.374767780303955, "learning_rate": 2e-05, "loss": 0.03712862, "step": 17258 }, { "epoch": 34.518, "grad_norm": 1.1364487409591675, "learning_rate": 2e-05, "loss": 0.04112991, "step": 17259 }, { "epoch": 34.52, "grad_norm": 1.001591444015503, "learning_rate": 2e-05, "loss": 0.03927646, "step": 17260 }, { "epoch": 34.522, "grad_norm": 1.076517939567566, "learning_rate": 2e-05, "loss": 0.03902861, "step": 17261 }, { "epoch": 34.524, "grad_norm": 1.4721672534942627, "learning_rate": 2e-05, "loss": 0.03493863, "step": 17262 }, { "epoch": 34.526, "grad_norm": 0.958672046661377, "learning_rate": 2e-05, "loss": 0.03901869, "step": 17263 }, { "epoch": 34.528, "grad_norm": 0.947724461555481, "learning_rate": 2e-05, "loss": 0.04129472, "step": 17264 }, { "epoch": 34.53, "grad_norm": 1.4992616176605225, "learning_rate": 2e-05, "loss": 0.05480441, "step": 17265 }, { "epoch": 34.532, "grad_norm": 1.4268319606781006, "learning_rate": 2e-05, "loss": 0.05849118, "step": 17266 }, { "epoch": 34.534, "grad_norm": 1.2194082736968994, "learning_rate": 2e-05, "loss": 0.04569989, "step": 17267 }, { "epoch": 34.536, "grad_norm": 1.1879225969314575, "learning_rate": 2e-05, "loss": 0.05354061, "step": 17268 }, { "epoch": 34.538, "grad_norm": 1.1701525449752808, "learning_rate": 2e-05, "loss": 0.04698183, "step": 17269 }, { "epoch": 34.54, "grad_norm": 1.1337878704071045, "learning_rate": 2e-05, "loss": 0.03659814, "step": 17270 }, { "epoch": 34.542, "grad_norm": 0.9269596934318542, "learning_rate": 2e-05, "loss": 0.03073806, "step": 17271 }, { "epoch": 34.544, "grad_norm": 1.2493841648101807, "learning_rate": 2e-05, "loss": 0.06172349, "step": 17272 }, { "epoch": 34.546, "grad_norm": 1.491633415222168, "learning_rate": 2e-05, "loss": 0.05550639, "step": 17273 }, { "epoch": 34.548, "grad_norm": 1.888570785522461, "learning_rate": 2e-05, "loss": 0.06070855, "step": 17274 }, { "epoch": 34.55, "grad_norm": 1.3882776498794556, "learning_rate": 2e-05, "loss": 0.03783091, "step": 17275 }, { "epoch": 34.552, "grad_norm": 1.1110395193099976, "learning_rate": 2e-05, "loss": 0.04781444, "step": 17276 }, { "epoch": 34.554, "grad_norm": 0.9404047131538391, "learning_rate": 2e-05, "loss": 0.03459203, "step": 17277 }, { "epoch": 34.556, "grad_norm": 1.4040842056274414, "learning_rate": 2e-05, "loss": 0.0376682, "step": 17278 }, { "epoch": 34.558, "grad_norm": 0.9887487292289734, "learning_rate": 2e-05, "loss": 0.03730477, "step": 17279 }, { "epoch": 34.56, "grad_norm": 1.3203293085098267, "learning_rate": 2e-05, "loss": 0.05541973, "step": 17280 }, { "epoch": 34.562, "grad_norm": 3.0385563373565674, "learning_rate": 2e-05, "loss": 0.05273008, "step": 17281 }, { "epoch": 34.564, "grad_norm": 1.02402663230896, "learning_rate": 2e-05, "loss": 0.03825521, "step": 17282 }, { "epoch": 34.566, "grad_norm": 1.4072762727737427, "learning_rate": 2e-05, "loss": 0.05669183, "step": 17283 }, { "epoch": 34.568, "grad_norm": 0.94670170545578, "learning_rate": 2e-05, "loss": 0.030951, "step": 17284 }, { "epoch": 34.57, "grad_norm": 0.9878659844398499, "learning_rate": 2e-05, "loss": 0.04278901, "step": 17285 }, { "epoch": 34.572, "grad_norm": 0.9426774382591248, "learning_rate": 2e-05, "loss": 0.03506008, "step": 17286 }, { "epoch": 34.574, "grad_norm": 1.2874869108200073, "learning_rate": 2e-05, "loss": 0.05332602, "step": 17287 }, { "epoch": 34.576, "grad_norm": 0.9953237771987915, "learning_rate": 2e-05, "loss": 0.03367607, "step": 17288 }, { "epoch": 34.578, "grad_norm": 1.1052265167236328, "learning_rate": 2e-05, "loss": 0.04065955, "step": 17289 }, { "epoch": 34.58, "grad_norm": 1.302668571472168, "learning_rate": 2e-05, "loss": 0.03839137, "step": 17290 }, { "epoch": 34.582, "grad_norm": 0.9161083698272705, "learning_rate": 2e-05, "loss": 0.03425333, "step": 17291 }, { "epoch": 34.584, "grad_norm": 0.8974297046661377, "learning_rate": 2e-05, "loss": 0.02879751, "step": 17292 }, { "epoch": 34.586, "grad_norm": 0.8633301854133606, "learning_rate": 2e-05, "loss": 0.03415179, "step": 17293 }, { "epoch": 34.588, "grad_norm": 1.370519757270813, "learning_rate": 2e-05, "loss": 0.05036892, "step": 17294 }, { "epoch": 34.59, "grad_norm": 1.408907175064087, "learning_rate": 2e-05, "loss": 0.04020501, "step": 17295 }, { "epoch": 34.592, "grad_norm": 3.699692726135254, "learning_rate": 2e-05, "loss": 0.0602615, "step": 17296 }, { "epoch": 34.594, "grad_norm": 1.2987639904022217, "learning_rate": 2e-05, "loss": 0.04135382, "step": 17297 }, { "epoch": 34.596, "grad_norm": 1.006319522857666, "learning_rate": 2e-05, "loss": 0.04302901, "step": 17298 }, { "epoch": 34.598, "grad_norm": 4.096602439880371, "learning_rate": 2e-05, "loss": 0.05378917, "step": 17299 }, { "epoch": 34.6, "grad_norm": 1.7802878618240356, "learning_rate": 2e-05, "loss": 0.06264147, "step": 17300 }, { "epoch": 34.602, "grad_norm": 1.0398523807525635, "learning_rate": 2e-05, "loss": 0.02535326, "step": 17301 }, { "epoch": 34.604, "grad_norm": 1.003915548324585, "learning_rate": 2e-05, "loss": 0.03914617, "step": 17302 }, { "epoch": 34.606, "grad_norm": 1.01604425907135, "learning_rate": 2e-05, "loss": 0.0440813, "step": 17303 }, { "epoch": 34.608, "grad_norm": 1.1370083093643188, "learning_rate": 2e-05, "loss": 0.04044215, "step": 17304 }, { "epoch": 34.61, "grad_norm": 0.8037679195404053, "learning_rate": 2e-05, "loss": 0.02355415, "step": 17305 }, { "epoch": 34.612, "grad_norm": 1.607362151145935, "learning_rate": 2e-05, "loss": 0.04869013, "step": 17306 }, { "epoch": 34.614, "grad_norm": 1.4024226665496826, "learning_rate": 2e-05, "loss": 0.03503479, "step": 17307 }, { "epoch": 34.616, "grad_norm": 1.7453526258468628, "learning_rate": 2e-05, "loss": 0.03976201, "step": 17308 }, { "epoch": 34.618, "grad_norm": 1.2523118257522583, "learning_rate": 2e-05, "loss": 0.05295001, "step": 17309 }, { "epoch": 34.62, "grad_norm": 1.2281060218811035, "learning_rate": 2e-05, "loss": 0.05975485, "step": 17310 }, { "epoch": 34.622, "grad_norm": 1.2202659845352173, "learning_rate": 2e-05, "loss": 0.043715, "step": 17311 }, { "epoch": 34.624, "grad_norm": 2.5343337059020996, "learning_rate": 2e-05, "loss": 0.05755136, "step": 17312 }, { "epoch": 34.626, "grad_norm": 1.4939541816711426, "learning_rate": 2e-05, "loss": 0.0324131, "step": 17313 }, { "epoch": 34.628, "grad_norm": 0.9299662113189697, "learning_rate": 2e-05, "loss": 0.03801748, "step": 17314 }, { "epoch": 34.63, "grad_norm": 1.4129657745361328, "learning_rate": 2e-05, "loss": 0.04810465, "step": 17315 }, { "epoch": 34.632, "grad_norm": 2.3096113204956055, "learning_rate": 2e-05, "loss": 0.03955406, "step": 17316 }, { "epoch": 34.634, "grad_norm": 1.2234951257705688, "learning_rate": 2e-05, "loss": 0.03399597, "step": 17317 }, { "epoch": 34.636, "grad_norm": 0.9615769386291504, "learning_rate": 2e-05, "loss": 0.03469737, "step": 17318 }, { "epoch": 34.638, "grad_norm": 3.7953057289123535, "learning_rate": 2e-05, "loss": 0.0582743, "step": 17319 }, { "epoch": 34.64, "grad_norm": 1.0247529745101929, "learning_rate": 2e-05, "loss": 0.03982718, "step": 17320 }, { "epoch": 34.642, "grad_norm": 1.153660774230957, "learning_rate": 2e-05, "loss": 0.04286103, "step": 17321 }, { "epoch": 34.644, "grad_norm": 2.755707025527954, "learning_rate": 2e-05, "loss": 0.05077897, "step": 17322 }, { "epoch": 34.646, "grad_norm": 1.1930323839187622, "learning_rate": 2e-05, "loss": 0.04944639, "step": 17323 }, { "epoch": 34.648, "grad_norm": 1.179976224899292, "learning_rate": 2e-05, "loss": 0.03271005, "step": 17324 }, { "epoch": 34.65, "grad_norm": 1.1533175706863403, "learning_rate": 2e-05, "loss": 0.03806565, "step": 17325 }, { "epoch": 34.652, "grad_norm": 1.3322744369506836, "learning_rate": 2e-05, "loss": 0.04510138, "step": 17326 }, { "epoch": 34.654, "grad_norm": 1.2426185607910156, "learning_rate": 2e-05, "loss": 0.0325065, "step": 17327 }, { "epoch": 34.656, "grad_norm": 1.201393485069275, "learning_rate": 2e-05, "loss": 0.0500529, "step": 17328 }, { "epoch": 34.658, "grad_norm": 1.1238892078399658, "learning_rate": 2e-05, "loss": 0.04271996, "step": 17329 }, { "epoch": 34.66, "grad_norm": 1.175925612449646, "learning_rate": 2e-05, "loss": 0.0367612, "step": 17330 }, { "epoch": 34.662, "grad_norm": 1.164878249168396, "learning_rate": 2e-05, "loss": 0.03411833, "step": 17331 }, { "epoch": 34.664, "grad_norm": 2.097522497177124, "learning_rate": 2e-05, "loss": 0.0542042, "step": 17332 }, { "epoch": 34.666, "grad_norm": 1.0560789108276367, "learning_rate": 2e-05, "loss": 0.04442062, "step": 17333 }, { "epoch": 34.668, "grad_norm": 1.082340955734253, "learning_rate": 2e-05, "loss": 0.03951849, "step": 17334 }, { "epoch": 34.67, "grad_norm": 1.2747083902359009, "learning_rate": 2e-05, "loss": 0.04362657, "step": 17335 }, { "epoch": 34.672, "grad_norm": 1.2096962928771973, "learning_rate": 2e-05, "loss": 0.04659062, "step": 17336 }, { "epoch": 34.674, "grad_norm": 1.096097707748413, "learning_rate": 2e-05, "loss": 0.04575984, "step": 17337 }, { "epoch": 34.676, "grad_norm": 1.6780627965927124, "learning_rate": 2e-05, "loss": 0.05064568, "step": 17338 }, { "epoch": 34.678, "grad_norm": 2.1115145683288574, "learning_rate": 2e-05, "loss": 0.05828436, "step": 17339 }, { "epoch": 34.68, "grad_norm": 1.3405977487564087, "learning_rate": 2e-05, "loss": 0.0530773, "step": 17340 }, { "epoch": 34.682, "grad_norm": 1.1422169208526611, "learning_rate": 2e-05, "loss": 0.02775931, "step": 17341 }, { "epoch": 34.684, "grad_norm": 0.9781467318534851, "learning_rate": 2e-05, "loss": 0.0334505, "step": 17342 }, { "epoch": 34.686, "grad_norm": 1.3836034536361694, "learning_rate": 2e-05, "loss": 0.04607526, "step": 17343 }, { "epoch": 34.688, "grad_norm": 1.6095694303512573, "learning_rate": 2e-05, "loss": 0.03260849, "step": 17344 }, { "epoch": 34.69, "grad_norm": 0.9684262275695801, "learning_rate": 2e-05, "loss": 0.03473915, "step": 17345 }, { "epoch": 34.692, "grad_norm": 1.0258201360702515, "learning_rate": 2e-05, "loss": 0.03583578, "step": 17346 }, { "epoch": 34.694, "grad_norm": 0.9890029430389404, "learning_rate": 2e-05, "loss": 0.02697755, "step": 17347 }, { "epoch": 34.696, "grad_norm": 1.7557021379470825, "learning_rate": 2e-05, "loss": 0.04590514, "step": 17348 }, { "epoch": 34.698, "grad_norm": 0.9564387202262878, "learning_rate": 2e-05, "loss": 0.03249798, "step": 17349 }, { "epoch": 34.7, "grad_norm": 1.119845986366272, "learning_rate": 2e-05, "loss": 0.03657874, "step": 17350 }, { "epoch": 34.702, "grad_norm": 1.4675902128219604, "learning_rate": 2e-05, "loss": 0.04621536, "step": 17351 }, { "epoch": 34.704, "grad_norm": 1.0927159786224365, "learning_rate": 2e-05, "loss": 0.04045237, "step": 17352 }, { "epoch": 34.706, "grad_norm": 1.2731096744537354, "learning_rate": 2e-05, "loss": 0.05830338, "step": 17353 }, { "epoch": 34.708, "grad_norm": 3.94342041015625, "learning_rate": 2e-05, "loss": 0.05567334, "step": 17354 }, { "epoch": 34.71, "grad_norm": 1.829927921295166, "learning_rate": 2e-05, "loss": 0.04827532, "step": 17355 }, { "epoch": 34.712, "grad_norm": 1.1922739744186401, "learning_rate": 2e-05, "loss": 0.04752831, "step": 17356 }, { "epoch": 34.714, "grad_norm": 1.1289398670196533, "learning_rate": 2e-05, "loss": 0.05670695, "step": 17357 }, { "epoch": 34.716, "grad_norm": 1.7289245128631592, "learning_rate": 2e-05, "loss": 0.04594773, "step": 17358 }, { "epoch": 34.718, "grad_norm": 1.0295947790145874, "learning_rate": 2e-05, "loss": 0.04748521, "step": 17359 }, { "epoch": 34.72, "grad_norm": 1.3546435832977295, "learning_rate": 2e-05, "loss": 0.05076949, "step": 17360 }, { "epoch": 34.722, "grad_norm": 1.1636558771133423, "learning_rate": 2e-05, "loss": 0.05013094, "step": 17361 }, { "epoch": 34.724, "grad_norm": 1.2177501916885376, "learning_rate": 2e-05, "loss": 0.05669533, "step": 17362 }, { "epoch": 34.726, "grad_norm": 1.3747395277023315, "learning_rate": 2e-05, "loss": 0.03559513, "step": 17363 }, { "epoch": 34.728, "grad_norm": 1.0851410627365112, "learning_rate": 2e-05, "loss": 0.0377779, "step": 17364 }, { "epoch": 34.73, "grad_norm": 1.7162500619888306, "learning_rate": 2e-05, "loss": 0.05712839, "step": 17365 }, { "epoch": 34.732, "grad_norm": 1.8854362964630127, "learning_rate": 2e-05, "loss": 0.05300543, "step": 17366 }, { "epoch": 34.734, "grad_norm": 1.4041353464126587, "learning_rate": 2e-05, "loss": 0.0517244, "step": 17367 }, { "epoch": 34.736, "grad_norm": 1.104217529296875, "learning_rate": 2e-05, "loss": 0.0403502, "step": 17368 }, { "epoch": 34.738, "grad_norm": 3.5156617164611816, "learning_rate": 2e-05, "loss": 0.04651166, "step": 17369 }, { "epoch": 34.74, "grad_norm": 1.0277109146118164, "learning_rate": 2e-05, "loss": 0.03690248, "step": 17370 }, { "epoch": 34.742, "grad_norm": 1.3331364393234253, "learning_rate": 2e-05, "loss": 0.04669707, "step": 17371 }, { "epoch": 34.744, "grad_norm": 1.085112452507019, "learning_rate": 2e-05, "loss": 0.03929215, "step": 17372 }, { "epoch": 34.746, "grad_norm": 0.8301331400871277, "learning_rate": 2e-05, "loss": 0.02301613, "step": 17373 }, { "epoch": 34.748, "grad_norm": 1.3663872480392456, "learning_rate": 2e-05, "loss": 0.05277127, "step": 17374 }, { "epoch": 34.75, "grad_norm": 0.9460889101028442, "learning_rate": 2e-05, "loss": 0.03504899, "step": 17375 }, { "epoch": 34.752, "grad_norm": 0.9396779537200928, "learning_rate": 2e-05, "loss": 0.03774077, "step": 17376 }, { "epoch": 34.754, "grad_norm": 1.2399020195007324, "learning_rate": 2e-05, "loss": 0.05220488, "step": 17377 }, { "epoch": 34.756, "grad_norm": 1.065956711769104, "learning_rate": 2e-05, "loss": 0.03877483, "step": 17378 }, { "epoch": 34.758, "grad_norm": 1.0520079135894775, "learning_rate": 2e-05, "loss": 0.03968885, "step": 17379 }, { "epoch": 34.76, "grad_norm": 0.9007804989814758, "learning_rate": 2e-05, "loss": 0.03319176, "step": 17380 }, { "epoch": 34.762, "grad_norm": 1.1114633083343506, "learning_rate": 2e-05, "loss": 0.04269458, "step": 17381 }, { "epoch": 34.764, "grad_norm": 1.1199363470077515, "learning_rate": 2e-05, "loss": 0.03674524, "step": 17382 }, { "epoch": 34.766, "grad_norm": 1.0618280172348022, "learning_rate": 2e-05, "loss": 0.03060028, "step": 17383 }, { "epoch": 34.768, "grad_norm": 1.5965828895568848, "learning_rate": 2e-05, "loss": 0.05331475, "step": 17384 }, { "epoch": 34.77, "grad_norm": 0.9755058288574219, "learning_rate": 2e-05, "loss": 0.03655314, "step": 17385 }, { "epoch": 34.772, "grad_norm": 1.0321615934371948, "learning_rate": 2e-05, "loss": 0.03229146, "step": 17386 }, { "epoch": 34.774, "grad_norm": 0.9792773723602295, "learning_rate": 2e-05, "loss": 0.04507142, "step": 17387 }, { "epoch": 34.776, "grad_norm": 2.4639945030212402, "learning_rate": 2e-05, "loss": 0.073479, "step": 17388 }, { "epoch": 34.778, "grad_norm": 2.794674873352051, "learning_rate": 2e-05, "loss": 0.05546476, "step": 17389 }, { "epoch": 34.78, "grad_norm": 2.544067859649658, "learning_rate": 2e-05, "loss": 0.04424439, "step": 17390 }, { "epoch": 34.782, "grad_norm": 1.1105905771255493, "learning_rate": 2e-05, "loss": 0.05513006, "step": 17391 }, { "epoch": 34.784, "grad_norm": 1.1320130825042725, "learning_rate": 2e-05, "loss": 0.05860891, "step": 17392 }, { "epoch": 34.786, "grad_norm": 0.8131487369537354, "learning_rate": 2e-05, "loss": 0.02320004, "step": 17393 }, { "epoch": 34.788, "grad_norm": 0.9977858066558838, "learning_rate": 2e-05, "loss": 0.0381477, "step": 17394 }, { "epoch": 34.79, "grad_norm": 1.118066430091858, "learning_rate": 2e-05, "loss": 0.04671922, "step": 17395 }, { "epoch": 34.792, "grad_norm": 1.282545566558838, "learning_rate": 2e-05, "loss": 0.05404096, "step": 17396 }, { "epoch": 34.794, "grad_norm": 1.3975576162338257, "learning_rate": 2e-05, "loss": 0.06878631, "step": 17397 }, { "epoch": 34.796, "grad_norm": 1.360386610031128, "learning_rate": 2e-05, "loss": 0.06892541, "step": 17398 }, { "epoch": 34.798, "grad_norm": 1.45335853099823, "learning_rate": 2e-05, "loss": 0.05018577, "step": 17399 }, { "epoch": 34.8, "grad_norm": 1.6314353942871094, "learning_rate": 2e-05, "loss": 0.05542941, "step": 17400 }, { "epoch": 34.802, "grad_norm": 1.476161003112793, "learning_rate": 2e-05, "loss": 0.04480565, "step": 17401 }, { "epoch": 34.804, "grad_norm": 1.0519765615463257, "learning_rate": 2e-05, "loss": 0.0289671, "step": 17402 }, { "epoch": 34.806, "grad_norm": 1.3698680400848389, "learning_rate": 2e-05, "loss": 0.03819375, "step": 17403 }, { "epoch": 34.808, "grad_norm": 0.9910497665405273, "learning_rate": 2e-05, "loss": 0.04323296, "step": 17404 }, { "epoch": 34.81, "grad_norm": 1.0575556755065918, "learning_rate": 2e-05, "loss": 0.03465891, "step": 17405 }, { "epoch": 34.812, "grad_norm": 0.9736021161079407, "learning_rate": 2e-05, "loss": 0.03983516, "step": 17406 }, { "epoch": 34.814, "grad_norm": 1.180785059928894, "learning_rate": 2e-05, "loss": 0.04701866, "step": 17407 }, { "epoch": 34.816, "grad_norm": 1.2198784351348877, "learning_rate": 2e-05, "loss": 0.06252494, "step": 17408 }, { "epoch": 34.818, "grad_norm": 1.338168740272522, "learning_rate": 2e-05, "loss": 0.05533778, "step": 17409 }, { "epoch": 34.82, "grad_norm": 0.8965152502059937, "learning_rate": 2e-05, "loss": 0.03544335, "step": 17410 }, { "epoch": 34.822, "grad_norm": 1.0662354230880737, "learning_rate": 2e-05, "loss": 0.04395933, "step": 17411 }, { "epoch": 34.824, "grad_norm": 1.2522810697555542, "learning_rate": 2e-05, "loss": 0.02941951, "step": 17412 }, { "epoch": 34.826, "grad_norm": 1.1798815727233887, "learning_rate": 2e-05, "loss": 0.05084882, "step": 17413 }, { "epoch": 34.828, "grad_norm": 1.2098127603530884, "learning_rate": 2e-05, "loss": 0.05086075, "step": 17414 }, { "epoch": 34.83, "grad_norm": 1.1224452257156372, "learning_rate": 2e-05, "loss": 0.03716393, "step": 17415 }, { "epoch": 34.832, "grad_norm": 0.9916290044784546, "learning_rate": 2e-05, "loss": 0.03262686, "step": 17416 }, { "epoch": 34.834, "grad_norm": 1.0005525350570679, "learning_rate": 2e-05, "loss": 0.04301282, "step": 17417 }, { "epoch": 34.836, "grad_norm": 1.0677385330200195, "learning_rate": 2e-05, "loss": 0.0463312, "step": 17418 }, { "epoch": 34.838, "grad_norm": 0.9889830350875854, "learning_rate": 2e-05, "loss": 0.0281502, "step": 17419 }, { "epoch": 34.84, "grad_norm": 0.8128519654273987, "learning_rate": 2e-05, "loss": 0.02465032, "step": 17420 }, { "epoch": 34.842, "grad_norm": 1.15475332736969, "learning_rate": 2e-05, "loss": 0.049545, "step": 17421 }, { "epoch": 34.844, "grad_norm": 1.3552649021148682, "learning_rate": 2e-05, "loss": 0.05437973, "step": 17422 }, { "epoch": 34.846, "grad_norm": 1.1397968530654907, "learning_rate": 2e-05, "loss": 0.04250487, "step": 17423 }, { "epoch": 34.848, "grad_norm": 1.2482610940933228, "learning_rate": 2e-05, "loss": 0.03756886, "step": 17424 }, { "epoch": 34.85, "grad_norm": 1.508941411972046, "learning_rate": 2e-05, "loss": 0.04109079, "step": 17425 }, { "epoch": 34.852, "grad_norm": 1.1376099586486816, "learning_rate": 2e-05, "loss": 0.04516655, "step": 17426 }, { "epoch": 34.854, "grad_norm": 1.350716471672058, "learning_rate": 2e-05, "loss": 0.03932498, "step": 17427 }, { "epoch": 34.856, "grad_norm": 1.1709167957305908, "learning_rate": 2e-05, "loss": 0.03975515, "step": 17428 }, { "epoch": 34.858, "grad_norm": 1.7732312679290771, "learning_rate": 2e-05, "loss": 0.03694304, "step": 17429 }, { "epoch": 34.86, "grad_norm": 1.0338153839111328, "learning_rate": 2e-05, "loss": 0.05116407, "step": 17430 }, { "epoch": 34.862, "grad_norm": 1.0339096784591675, "learning_rate": 2e-05, "loss": 0.04853801, "step": 17431 }, { "epoch": 34.864, "grad_norm": 1.1931415796279907, "learning_rate": 2e-05, "loss": 0.05871486, "step": 17432 }, { "epoch": 34.866, "grad_norm": 0.9298121333122253, "learning_rate": 2e-05, "loss": 0.02905935, "step": 17433 }, { "epoch": 34.868, "grad_norm": 1.1926735639572144, "learning_rate": 2e-05, "loss": 0.05302522, "step": 17434 }, { "epoch": 34.87, "grad_norm": 0.8261333703994751, "learning_rate": 2e-05, "loss": 0.02749117, "step": 17435 }, { "epoch": 34.872, "grad_norm": 1.1333870887756348, "learning_rate": 2e-05, "loss": 0.04432245, "step": 17436 }, { "epoch": 34.874, "grad_norm": 3.3269906044006348, "learning_rate": 2e-05, "loss": 0.03778184, "step": 17437 }, { "epoch": 34.876, "grad_norm": 0.9553112983703613, "learning_rate": 2e-05, "loss": 0.04338606, "step": 17438 }, { "epoch": 34.878, "grad_norm": 1.1333266496658325, "learning_rate": 2e-05, "loss": 0.04250003, "step": 17439 }, { "epoch": 34.88, "grad_norm": 1.249476432800293, "learning_rate": 2e-05, "loss": 0.05652367, "step": 17440 }, { "epoch": 34.882, "grad_norm": 1.452951192855835, "learning_rate": 2e-05, "loss": 0.05415319, "step": 17441 }, { "epoch": 34.884, "grad_norm": 2.504230260848999, "learning_rate": 2e-05, "loss": 0.04133745, "step": 17442 }, { "epoch": 34.886, "grad_norm": 1.095824122428894, "learning_rate": 2e-05, "loss": 0.03693581, "step": 17443 }, { "epoch": 34.888, "grad_norm": 1.001352071762085, "learning_rate": 2e-05, "loss": 0.04378172, "step": 17444 }, { "epoch": 34.89, "grad_norm": 1.4028725624084473, "learning_rate": 2e-05, "loss": 0.04782668, "step": 17445 }, { "epoch": 34.892, "grad_norm": 1.0283355712890625, "learning_rate": 2e-05, "loss": 0.04526603, "step": 17446 }, { "epoch": 34.894, "grad_norm": 0.8928292989730835, "learning_rate": 2e-05, "loss": 0.03074392, "step": 17447 }, { "epoch": 34.896, "grad_norm": 0.9636794328689575, "learning_rate": 2e-05, "loss": 0.04015881, "step": 17448 }, { "epoch": 34.898, "grad_norm": 0.9662042856216431, "learning_rate": 2e-05, "loss": 0.03868334, "step": 17449 }, { "epoch": 34.9, "grad_norm": 0.7631124258041382, "learning_rate": 2e-05, "loss": 0.0177309, "step": 17450 }, { "epoch": 34.902, "grad_norm": 1.013450264930725, "learning_rate": 2e-05, "loss": 0.0424903, "step": 17451 }, { "epoch": 34.904, "grad_norm": 1.1101561784744263, "learning_rate": 2e-05, "loss": 0.04629656, "step": 17452 }, { "epoch": 34.906, "grad_norm": 1.099001169204712, "learning_rate": 2e-05, "loss": 0.04971351, "step": 17453 }, { "epoch": 34.908, "grad_norm": 1.1630494594573975, "learning_rate": 2e-05, "loss": 0.04590615, "step": 17454 }, { "epoch": 34.91, "grad_norm": 2.4016902446746826, "learning_rate": 2e-05, "loss": 0.03962929, "step": 17455 }, { "epoch": 34.912, "grad_norm": 1.5115602016448975, "learning_rate": 2e-05, "loss": 0.04877982, "step": 17456 }, { "epoch": 34.914, "grad_norm": 1.5647093057632446, "learning_rate": 2e-05, "loss": 0.04914349, "step": 17457 }, { "epoch": 34.916, "grad_norm": 0.8313471078872681, "learning_rate": 2e-05, "loss": 0.03280953, "step": 17458 }, { "epoch": 34.918, "grad_norm": 1.0135865211486816, "learning_rate": 2e-05, "loss": 0.03129869, "step": 17459 }, { "epoch": 34.92, "grad_norm": 1.5138708353042603, "learning_rate": 2e-05, "loss": 0.06413849, "step": 17460 }, { "epoch": 34.922, "grad_norm": 0.8478360772132874, "learning_rate": 2e-05, "loss": 0.03446319, "step": 17461 }, { "epoch": 34.924, "grad_norm": 1.1181224584579468, "learning_rate": 2e-05, "loss": 0.03928749, "step": 17462 }, { "epoch": 34.926, "grad_norm": 1.2706669569015503, "learning_rate": 2e-05, "loss": 0.05042338, "step": 17463 }, { "epoch": 34.928, "grad_norm": 1.7319210767745972, "learning_rate": 2e-05, "loss": 0.04715657, "step": 17464 }, { "epoch": 34.93, "grad_norm": 1.0087729692459106, "learning_rate": 2e-05, "loss": 0.04004742, "step": 17465 }, { "epoch": 34.932, "grad_norm": 3.010061025619507, "learning_rate": 2e-05, "loss": 0.0449556, "step": 17466 }, { "epoch": 34.934, "grad_norm": 0.9879313111305237, "learning_rate": 2e-05, "loss": 0.04271141, "step": 17467 }, { "epoch": 34.936, "grad_norm": 1.2568382024765015, "learning_rate": 2e-05, "loss": 0.04353319, "step": 17468 }, { "epoch": 34.938, "grad_norm": 1.0577069520950317, "learning_rate": 2e-05, "loss": 0.03563489, "step": 17469 }, { "epoch": 34.94, "grad_norm": 1.1462761163711548, "learning_rate": 2e-05, "loss": 0.04594369, "step": 17470 }, { "epoch": 34.942, "grad_norm": 1.3913559913635254, "learning_rate": 2e-05, "loss": 0.05015879, "step": 17471 }, { "epoch": 34.944, "grad_norm": 1.3430120944976807, "learning_rate": 2e-05, "loss": 0.0499382, "step": 17472 }, { "epoch": 34.946, "grad_norm": 1.3897830247879028, "learning_rate": 2e-05, "loss": 0.06003839, "step": 17473 }, { "epoch": 34.948, "grad_norm": 2.9135901927948, "learning_rate": 2e-05, "loss": 0.05451025, "step": 17474 }, { "epoch": 34.95, "grad_norm": 1.6587684154510498, "learning_rate": 2e-05, "loss": 0.04405323, "step": 17475 }, { "epoch": 34.952, "grad_norm": 16.431175231933594, "learning_rate": 2e-05, "loss": 0.06101349, "step": 17476 }, { "epoch": 34.954, "grad_norm": 1.6508738994598389, "learning_rate": 2e-05, "loss": 0.05426374, "step": 17477 }, { "epoch": 34.956, "grad_norm": 1.0128573179244995, "learning_rate": 2e-05, "loss": 0.04134047, "step": 17478 }, { "epoch": 34.958, "grad_norm": 1.038770318031311, "learning_rate": 2e-05, "loss": 0.04339874, "step": 17479 }, { "epoch": 34.96, "grad_norm": 0.9866028428077698, "learning_rate": 2e-05, "loss": 0.03807293, "step": 17480 }, { "epoch": 34.962, "grad_norm": 0.7313461899757385, "learning_rate": 2e-05, "loss": 0.02388552, "step": 17481 }, { "epoch": 34.964, "grad_norm": 1.3967745304107666, "learning_rate": 2e-05, "loss": 0.05446218, "step": 17482 }, { "epoch": 34.966, "grad_norm": 2.6369152069091797, "learning_rate": 2e-05, "loss": 0.05669343, "step": 17483 }, { "epoch": 34.968, "grad_norm": 1.2083170413970947, "learning_rate": 2e-05, "loss": 0.0498525, "step": 17484 }, { "epoch": 34.97, "grad_norm": 1.1737569570541382, "learning_rate": 2e-05, "loss": 0.0361472, "step": 17485 }, { "epoch": 34.972, "grad_norm": 1.307645559310913, "learning_rate": 2e-05, "loss": 0.04401141, "step": 17486 }, { "epoch": 34.974, "grad_norm": 1.0356417894363403, "learning_rate": 2e-05, "loss": 0.04182448, "step": 17487 }, { "epoch": 34.976, "grad_norm": 1.1776436567306519, "learning_rate": 2e-05, "loss": 0.05049908, "step": 17488 }, { "epoch": 34.978, "grad_norm": 0.943390429019928, "learning_rate": 2e-05, "loss": 0.03511337, "step": 17489 }, { "epoch": 34.98, "grad_norm": 1.3408476114273071, "learning_rate": 2e-05, "loss": 0.04584191, "step": 17490 }, { "epoch": 34.982, "grad_norm": 1.5149743556976318, "learning_rate": 2e-05, "loss": 0.05642883, "step": 17491 }, { "epoch": 34.984, "grad_norm": 1.3052691221237183, "learning_rate": 2e-05, "loss": 0.04319417, "step": 17492 }, { "epoch": 34.986, "grad_norm": 1.2136170864105225, "learning_rate": 2e-05, "loss": 0.04910697, "step": 17493 }, { "epoch": 34.988, "grad_norm": 0.9490352272987366, "learning_rate": 2e-05, "loss": 0.03300906, "step": 17494 }, { "epoch": 34.99, "grad_norm": 1.1584285497665405, "learning_rate": 2e-05, "loss": 0.0407235, "step": 17495 }, { "epoch": 34.992, "grad_norm": 1.501214861869812, "learning_rate": 2e-05, "loss": 0.04469844, "step": 17496 }, { "epoch": 34.994, "grad_norm": 1.1645262241363525, "learning_rate": 2e-05, "loss": 0.03266477, "step": 17497 }, { "epoch": 34.996, "grad_norm": 1.0962436199188232, "learning_rate": 2e-05, "loss": 0.04484088, "step": 17498 }, { "epoch": 34.998, "grad_norm": 0.8866491317749023, "learning_rate": 2e-05, "loss": 0.03239752, "step": 17499 }, { "epoch": 35.0, "grad_norm": 0.9882741570472717, "learning_rate": 2e-05, "loss": 0.03945159, "step": 17500 }, { "epoch": 35.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9860279441117764, "Equal_1": 0.996, "Equal_2": 0.9820359281437125, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9839679358717435, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.998, "Perpendicular_3": 0.8917835671342685, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9919839679358717, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 226.4278, "eval_samples_per_second": 46.372, "eval_steps_per_second": 0.927, "step": 17500 }, { "epoch": 35.002, "grad_norm": 1.807010531425476, "learning_rate": 2e-05, "loss": 0.02976769, "step": 17501 }, { "epoch": 35.004, "grad_norm": 0.9377430081367493, "learning_rate": 2e-05, "loss": 0.025191, "step": 17502 }, { "epoch": 35.006, "grad_norm": 1.5465960502624512, "learning_rate": 2e-05, "loss": 0.03942408, "step": 17503 }, { "epoch": 35.008, "grad_norm": 1.0734102725982666, "learning_rate": 2e-05, "loss": 0.03788696, "step": 17504 }, { "epoch": 35.01, "grad_norm": 1.1284676790237427, "learning_rate": 2e-05, "loss": 0.04951893, "step": 17505 }, { "epoch": 35.012, "grad_norm": 1.3621853590011597, "learning_rate": 2e-05, "loss": 0.06844484, "step": 17506 }, { "epoch": 35.014, "grad_norm": 0.8983585238456726, "learning_rate": 2e-05, "loss": 0.03789803, "step": 17507 }, { "epoch": 35.016, "grad_norm": 0.9668340682983398, "learning_rate": 2e-05, "loss": 0.03981137, "step": 17508 }, { "epoch": 35.018, "grad_norm": 1.3125338554382324, "learning_rate": 2e-05, "loss": 0.03835117, "step": 17509 }, { "epoch": 35.02, "grad_norm": 1.623358964920044, "learning_rate": 2e-05, "loss": 0.04929879, "step": 17510 }, { "epoch": 35.022, "grad_norm": 1.3916130065917969, "learning_rate": 2e-05, "loss": 0.04938906, "step": 17511 }, { "epoch": 35.024, "grad_norm": 1.0056376457214355, "learning_rate": 2e-05, "loss": 0.04209897, "step": 17512 }, { "epoch": 35.026, "grad_norm": 1.1668142080307007, "learning_rate": 2e-05, "loss": 0.05428746, "step": 17513 }, { "epoch": 35.028, "grad_norm": 0.9621237516403198, "learning_rate": 2e-05, "loss": 0.03810575, "step": 17514 }, { "epoch": 35.03, "grad_norm": 1.0386854410171509, "learning_rate": 2e-05, "loss": 0.04068698, "step": 17515 }, { "epoch": 35.032, "grad_norm": 1.2794911861419678, "learning_rate": 2e-05, "loss": 0.03032846, "step": 17516 }, { "epoch": 35.034, "grad_norm": 1.055626630783081, "learning_rate": 2e-05, "loss": 0.03911695, "step": 17517 }, { "epoch": 35.036, "grad_norm": 1.2126388549804688, "learning_rate": 2e-05, "loss": 0.05142342, "step": 17518 }, { "epoch": 35.038, "grad_norm": 0.8865519165992737, "learning_rate": 2e-05, "loss": 0.03785376, "step": 17519 }, { "epoch": 35.04, "grad_norm": 1.177388310432434, "learning_rate": 2e-05, "loss": 0.05000926, "step": 17520 }, { "epoch": 35.042, "grad_norm": 0.8592489957809448, "learning_rate": 2e-05, "loss": 0.02907897, "step": 17521 }, { "epoch": 35.044, "grad_norm": 1.2507890462875366, "learning_rate": 2e-05, "loss": 0.04459122, "step": 17522 }, { "epoch": 35.046, "grad_norm": 0.9813108444213867, "learning_rate": 2e-05, "loss": 0.03512271, "step": 17523 }, { "epoch": 35.048, "grad_norm": 1.038568377494812, "learning_rate": 2e-05, "loss": 0.03452795, "step": 17524 }, { "epoch": 35.05, "grad_norm": 1.0534279346466064, "learning_rate": 2e-05, "loss": 0.03196262, "step": 17525 }, { "epoch": 35.052, "grad_norm": 1.256226897239685, "learning_rate": 2e-05, "loss": 0.05305724, "step": 17526 }, { "epoch": 35.054, "grad_norm": 1.4074029922485352, "learning_rate": 2e-05, "loss": 0.05258299, "step": 17527 }, { "epoch": 35.056, "grad_norm": 0.9168592691421509, "learning_rate": 2e-05, "loss": 0.0384104, "step": 17528 }, { "epoch": 35.058, "grad_norm": 1.1938879489898682, "learning_rate": 2e-05, "loss": 0.04373552, "step": 17529 }, { "epoch": 35.06, "grad_norm": 1.4167543649673462, "learning_rate": 2e-05, "loss": 0.05945568, "step": 17530 }, { "epoch": 35.062, "grad_norm": 0.9096139073371887, "learning_rate": 2e-05, "loss": 0.03134483, "step": 17531 }, { "epoch": 35.064, "grad_norm": 1.0717324018478394, "learning_rate": 2e-05, "loss": 0.03063212, "step": 17532 }, { "epoch": 35.066, "grad_norm": 1.4877620935440063, "learning_rate": 2e-05, "loss": 0.05728324, "step": 17533 }, { "epoch": 35.068, "grad_norm": 1.273405909538269, "learning_rate": 2e-05, "loss": 0.05511542, "step": 17534 }, { "epoch": 35.07, "grad_norm": 1.200867772102356, "learning_rate": 2e-05, "loss": 0.03923905, "step": 17535 }, { "epoch": 35.072, "grad_norm": 1.0522817373275757, "learning_rate": 2e-05, "loss": 0.0457083, "step": 17536 }, { "epoch": 35.074, "grad_norm": 0.8867217898368835, "learning_rate": 2e-05, "loss": 0.0306708, "step": 17537 }, { "epoch": 35.076, "grad_norm": 1.0921623706817627, "learning_rate": 2e-05, "loss": 0.04330845, "step": 17538 }, { "epoch": 35.078, "grad_norm": 0.9952976703643799, "learning_rate": 2e-05, "loss": 0.03329832, "step": 17539 }, { "epoch": 35.08, "grad_norm": 1.2085686922073364, "learning_rate": 2e-05, "loss": 0.0417196, "step": 17540 }, { "epoch": 35.082, "grad_norm": 1.0761547088623047, "learning_rate": 2e-05, "loss": 0.03382145, "step": 17541 }, { "epoch": 35.084, "grad_norm": 1.858634114265442, "learning_rate": 2e-05, "loss": 0.05110903, "step": 17542 }, { "epoch": 35.086, "grad_norm": 2.047222852706909, "learning_rate": 2e-05, "loss": 0.04374942, "step": 17543 }, { "epoch": 35.088, "grad_norm": 2.090027093887329, "learning_rate": 2e-05, "loss": 0.04773934, "step": 17544 }, { "epoch": 35.09, "grad_norm": 1.050370454788208, "learning_rate": 2e-05, "loss": 0.0422223, "step": 17545 }, { "epoch": 35.092, "grad_norm": 1.5183378458023071, "learning_rate": 2e-05, "loss": 0.06885757, "step": 17546 }, { "epoch": 35.094, "grad_norm": 1.1304771900177002, "learning_rate": 2e-05, "loss": 0.04425477, "step": 17547 }, { "epoch": 35.096, "grad_norm": 1.0506104230880737, "learning_rate": 2e-05, "loss": 0.0447881, "step": 17548 }, { "epoch": 35.098, "grad_norm": 1.0431349277496338, "learning_rate": 2e-05, "loss": 0.03268544, "step": 17549 }, { "epoch": 35.1, "grad_norm": 0.9254025816917419, "learning_rate": 2e-05, "loss": 0.03306852, "step": 17550 }, { "epoch": 35.102, "grad_norm": 0.9495605230331421, "learning_rate": 2e-05, "loss": 0.03805057, "step": 17551 }, { "epoch": 35.104, "grad_norm": 0.9899808168411255, "learning_rate": 2e-05, "loss": 0.03881633, "step": 17552 }, { "epoch": 35.106, "grad_norm": 0.9630085229873657, "learning_rate": 2e-05, "loss": 0.03783602, "step": 17553 }, { "epoch": 35.108, "grad_norm": 1.25407075881958, "learning_rate": 2e-05, "loss": 0.06233543, "step": 17554 }, { "epoch": 35.11, "grad_norm": 0.8933363556861877, "learning_rate": 2e-05, "loss": 0.03949576, "step": 17555 }, { "epoch": 35.112, "grad_norm": 0.9513004422187805, "learning_rate": 2e-05, "loss": 0.03538863, "step": 17556 }, { "epoch": 35.114, "grad_norm": 1.0609948635101318, "learning_rate": 2e-05, "loss": 0.04746865, "step": 17557 }, { "epoch": 35.116, "grad_norm": 0.9222297668457031, "learning_rate": 2e-05, "loss": 0.03491361, "step": 17558 }, { "epoch": 35.118, "grad_norm": 1.0360534191131592, "learning_rate": 2e-05, "loss": 0.04416903, "step": 17559 }, { "epoch": 35.12, "grad_norm": 0.9607548117637634, "learning_rate": 2e-05, "loss": 0.03806097, "step": 17560 }, { "epoch": 35.122, "grad_norm": 1.6293730735778809, "learning_rate": 2e-05, "loss": 0.04908408, "step": 17561 }, { "epoch": 35.124, "grad_norm": 1.554187536239624, "learning_rate": 2e-05, "loss": 0.07036849, "step": 17562 }, { "epoch": 35.126, "grad_norm": 0.883334219455719, "learning_rate": 2e-05, "loss": 0.03584832, "step": 17563 }, { "epoch": 35.128, "grad_norm": 1.0425816774368286, "learning_rate": 2e-05, "loss": 0.04246851, "step": 17564 }, { "epoch": 35.13, "grad_norm": 1.3546010255813599, "learning_rate": 2e-05, "loss": 0.03809312, "step": 17565 }, { "epoch": 35.132, "grad_norm": 1.2746822834014893, "learning_rate": 2e-05, "loss": 0.04557358, "step": 17566 }, { "epoch": 35.134, "grad_norm": 1.4614131450653076, "learning_rate": 2e-05, "loss": 0.04252045, "step": 17567 }, { "epoch": 35.136, "grad_norm": 1.1536229848861694, "learning_rate": 2e-05, "loss": 0.04292948, "step": 17568 }, { "epoch": 35.138, "grad_norm": 1.01869797706604, "learning_rate": 2e-05, "loss": 0.028421, "step": 17569 }, { "epoch": 35.14, "grad_norm": 1.2533504962921143, "learning_rate": 2e-05, "loss": 0.04138442, "step": 17570 }, { "epoch": 35.142, "grad_norm": 1.0690712928771973, "learning_rate": 2e-05, "loss": 0.04452455, "step": 17571 }, { "epoch": 35.144, "grad_norm": 0.9953153133392334, "learning_rate": 2e-05, "loss": 0.04418472, "step": 17572 }, { "epoch": 35.146, "grad_norm": 1.1776041984558105, "learning_rate": 2e-05, "loss": 0.03083918, "step": 17573 }, { "epoch": 35.148, "grad_norm": 2.670074701309204, "learning_rate": 2e-05, "loss": 0.04440989, "step": 17574 }, { "epoch": 35.15, "grad_norm": 1.6662096977233887, "learning_rate": 2e-05, "loss": 0.0412053, "step": 17575 }, { "epoch": 35.152, "grad_norm": 1.0319550037384033, "learning_rate": 2e-05, "loss": 0.05101796, "step": 17576 }, { "epoch": 35.154, "grad_norm": 0.9185923933982849, "learning_rate": 2e-05, "loss": 0.02901392, "step": 17577 }, { "epoch": 35.156, "grad_norm": 1.1012095212936401, "learning_rate": 2e-05, "loss": 0.04848382, "step": 17578 }, { "epoch": 35.158, "grad_norm": 1.0120359659194946, "learning_rate": 2e-05, "loss": 0.04992912, "step": 17579 }, { "epoch": 35.16, "grad_norm": 0.9404464960098267, "learning_rate": 2e-05, "loss": 0.04230688, "step": 17580 }, { "epoch": 35.162, "grad_norm": 0.8423766493797302, "learning_rate": 2e-05, "loss": 0.02515163, "step": 17581 }, { "epoch": 35.164, "grad_norm": 1.0743728876113892, "learning_rate": 2e-05, "loss": 0.03420008, "step": 17582 }, { "epoch": 35.166, "grad_norm": 0.9131585359573364, "learning_rate": 2e-05, "loss": 0.03229, "step": 17583 }, { "epoch": 35.168, "grad_norm": 1.1327663660049438, "learning_rate": 2e-05, "loss": 0.04063525, "step": 17584 }, { "epoch": 35.17, "grad_norm": 1.011610984802246, "learning_rate": 2e-05, "loss": 0.04288485, "step": 17585 }, { "epoch": 35.172, "grad_norm": 1.0595221519470215, "learning_rate": 2e-05, "loss": 0.04253446, "step": 17586 }, { "epoch": 35.174, "grad_norm": 1.5316667556762695, "learning_rate": 2e-05, "loss": 0.04784918, "step": 17587 }, { "epoch": 35.176, "grad_norm": 1.2387864589691162, "learning_rate": 2e-05, "loss": 0.04527666, "step": 17588 }, { "epoch": 35.178, "grad_norm": 1.4482325315475464, "learning_rate": 2e-05, "loss": 0.03369762, "step": 17589 }, { "epoch": 35.18, "grad_norm": 2.109994888305664, "learning_rate": 2e-05, "loss": 0.04244148, "step": 17590 }, { "epoch": 35.182, "grad_norm": 0.9868069291114807, "learning_rate": 2e-05, "loss": 0.04317181, "step": 17591 }, { "epoch": 35.184, "grad_norm": 2.3310647010803223, "learning_rate": 2e-05, "loss": 0.05063179, "step": 17592 }, { "epoch": 35.186, "grad_norm": 1.4115225076675415, "learning_rate": 2e-05, "loss": 0.04429448, "step": 17593 }, { "epoch": 35.188, "grad_norm": 2.5410616397857666, "learning_rate": 2e-05, "loss": 0.05019169, "step": 17594 }, { "epoch": 35.19, "grad_norm": 1.1264679431915283, "learning_rate": 2e-05, "loss": 0.0532867, "step": 17595 }, { "epoch": 35.192, "grad_norm": 1.255260705947876, "learning_rate": 2e-05, "loss": 0.04294121, "step": 17596 }, { "epoch": 35.194, "grad_norm": 1.936521053314209, "learning_rate": 2e-05, "loss": 0.06231557, "step": 17597 }, { "epoch": 35.196, "grad_norm": 1.6419144868850708, "learning_rate": 2e-05, "loss": 0.0381153, "step": 17598 }, { "epoch": 35.198, "grad_norm": 1.7481536865234375, "learning_rate": 2e-05, "loss": 0.04617708, "step": 17599 }, { "epoch": 35.2, "grad_norm": 1.1084064245224, "learning_rate": 2e-05, "loss": 0.04347561, "step": 17600 }, { "epoch": 35.202, "grad_norm": 1.3430471420288086, "learning_rate": 2e-05, "loss": 0.04348384, "step": 17601 }, { "epoch": 35.204, "grad_norm": 1.2742668390274048, "learning_rate": 2e-05, "loss": 0.0461786, "step": 17602 }, { "epoch": 35.206, "grad_norm": 2.894005060195923, "learning_rate": 2e-05, "loss": 0.0513533, "step": 17603 }, { "epoch": 35.208, "grad_norm": 1.1306184530258179, "learning_rate": 2e-05, "loss": 0.04468352, "step": 17604 }, { "epoch": 35.21, "grad_norm": 1.0650429725646973, "learning_rate": 2e-05, "loss": 0.0459475, "step": 17605 }, { "epoch": 35.212, "grad_norm": 0.9335548281669617, "learning_rate": 2e-05, "loss": 0.03720281, "step": 17606 }, { "epoch": 35.214, "grad_norm": 1.2227981090545654, "learning_rate": 2e-05, "loss": 0.05689692, "step": 17607 }, { "epoch": 35.216, "grad_norm": 0.9883827567100525, "learning_rate": 2e-05, "loss": 0.03453005, "step": 17608 }, { "epoch": 35.218, "grad_norm": 1.1639258861541748, "learning_rate": 2e-05, "loss": 0.04172406, "step": 17609 }, { "epoch": 35.22, "grad_norm": 1.023895502090454, "learning_rate": 2e-05, "loss": 0.03983181, "step": 17610 }, { "epoch": 35.222, "grad_norm": 1.0217078924179077, "learning_rate": 2e-05, "loss": 0.03555222, "step": 17611 }, { "epoch": 35.224, "grad_norm": 1.4535253047943115, "learning_rate": 2e-05, "loss": 0.05830119, "step": 17612 }, { "epoch": 35.226, "grad_norm": 1.7434368133544922, "learning_rate": 2e-05, "loss": 0.04690488, "step": 17613 }, { "epoch": 35.228, "grad_norm": 1.0099235773086548, "learning_rate": 2e-05, "loss": 0.04631438, "step": 17614 }, { "epoch": 35.23, "grad_norm": 0.9461044073104858, "learning_rate": 2e-05, "loss": 0.03487935, "step": 17615 }, { "epoch": 35.232, "grad_norm": 3.918426990509033, "learning_rate": 2e-05, "loss": 0.04773463, "step": 17616 }, { "epoch": 35.234, "grad_norm": 1.0953538417816162, "learning_rate": 2e-05, "loss": 0.04620247, "step": 17617 }, { "epoch": 35.236, "grad_norm": 1.3998392820358276, "learning_rate": 2e-05, "loss": 0.05513449, "step": 17618 }, { "epoch": 35.238, "grad_norm": 0.8595099449157715, "learning_rate": 2e-05, "loss": 0.03652699, "step": 17619 }, { "epoch": 35.24, "grad_norm": 1.0213959217071533, "learning_rate": 2e-05, "loss": 0.03280999, "step": 17620 }, { "epoch": 35.242, "grad_norm": 1.0699174404144287, "learning_rate": 2e-05, "loss": 0.04822638, "step": 17621 }, { "epoch": 35.244, "grad_norm": 1.0143264532089233, "learning_rate": 2e-05, "loss": 0.04762748, "step": 17622 }, { "epoch": 35.246, "grad_norm": 1.4901297092437744, "learning_rate": 2e-05, "loss": 0.05250069, "step": 17623 }, { "epoch": 35.248, "grad_norm": 1.0826385021209717, "learning_rate": 2e-05, "loss": 0.05125062, "step": 17624 }, { "epoch": 35.25, "grad_norm": 1.0855374336242676, "learning_rate": 2e-05, "loss": 0.05381385, "step": 17625 }, { "epoch": 35.252, "grad_norm": 1.0669958591461182, "learning_rate": 2e-05, "loss": 0.04015147, "step": 17626 }, { "epoch": 35.254, "grad_norm": 1.1593743562698364, "learning_rate": 2e-05, "loss": 0.05097338, "step": 17627 }, { "epoch": 35.256, "grad_norm": 1.511268138885498, "learning_rate": 2e-05, "loss": 0.0474214, "step": 17628 }, { "epoch": 35.258, "grad_norm": 1.3611929416656494, "learning_rate": 2e-05, "loss": 0.05130062, "step": 17629 }, { "epoch": 35.26, "grad_norm": 1.0102423429489136, "learning_rate": 2e-05, "loss": 0.0444267, "step": 17630 }, { "epoch": 35.262, "grad_norm": 1.4529967308044434, "learning_rate": 2e-05, "loss": 0.05789261, "step": 17631 }, { "epoch": 35.264, "grad_norm": 1.2478184700012207, "learning_rate": 2e-05, "loss": 0.06281422, "step": 17632 }, { "epoch": 35.266, "grad_norm": 1.527511715888977, "learning_rate": 2e-05, "loss": 0.03950737, "step": 17633 }, { "epoch": 35.268, "grad_norm": 1.141655445098877, "learning_rate": 2e-05, "loss": 0.05682784, "step": 17634 }, { "epoch": 35.27, "grad_norm": 0.8920691013336182, "learning_rate": 2e-05, "loss": 0.030084, "step": 17635 }, { "epoch": 35.272, "grad_norm": 0.9463072419166565, "learning_rate": 2e-05, "loss": 0.03435849, "step": 17636 }, { "epoch": 35.274, "grad_norm": 1.265901803970337, "learning_rate": 2e-05, "loss": 0.04987226, "step": 17637 }, { "epoch": 35.276, "grad_norm": 0.9563845992088318, "learning_rate": 2e-05, "loss": 0.04158399, "step": 17638 }, { "epoch": 35.278, "grad_norm": 1.1061853170394897, "learning_rate": 2e-05, "loss": 0.05366402, "step": 17639 }, { "epoch": 35.28, "grad_norm": 4.611207485198975, "learning_rate": 2e-05, "loss": 0.05742697, "step": 17640 }, { "epoch": 35.282, "grad_norm": 0.852905809879303, "learning_rate": 2e-05, "loss": 0.03568864, "step": 17641 }, { "epoch": 35.284, "grad_norm": 1.0243034362792969, "learning_rate": 2e-05, "loss": 0.03229213, "step": 17642 }, { "epoch": 35.286, "grad_norm": 1.063140630722046, "learning_rate": 2e-05, "loss": 0.03908344, "step": 17643 }, { "epoch": 35.288, "grad_norm": 1.3021539449691772, "learning_rate": 2e-05, "loss": 0.04453167, "step": 17644 }, { "epoch": 35.29, "grad_norm": 1.3543224334716797, "learning_rate": 2e-05, "loss": 0.04341988, "step": 17645 }, { "epoch": 35.292, "grad_norm": 1.0332841873168945, "learning_rate": 2e-05, "loss": 0.03383926, "step": 17646 }, { "epoch": 35.294, "grad_norm": 1.3309950828552246, "learning_rate": 2e-05, "loss": 0.04284353, "step": 17647 }, { "epoch": 35.296, "grad_norm": 1.209456443786621, "learning_rate": 2e-05, "loss": 0.0431093, "step": 17648 }, { "epoch": 35.298, "grad_norm": 0.9340609908103943, "learning_rate": 2e-05, "loss": 0.0337215, "step": 17649 }, { "epoch": 35.3, "grad_norm": 1.0196163654327393, "learning_rate": 2e-05, "loss": 0.04464494, "step": 17650 }, { "epoch": 35.302, "grad_norm": 0.840370774269104, "learning_rate": 2e-05, "loss": 0.02968617, "step": 17651 }, { "epoch": 35.304, "grad_norm": 1.09442138671875, "learning_rate": 2e-05, "loss": 0.04684709, "step": 17652 }, { "epoch": 35.306, "grad_norm": 1.85187566280365, "learning_rate": 2e-05, "loss": 0.06059234, "step": 17653 }, { "epoch": 35.308, "grad_norm": 0.9538354873657227, "learning_rate": 2e-05, "loss": 0.04173321, "step": 17654 }, { "epoch": 35.31, "grad_norm": 1.0849088430404663, "learning_rate": 2e-05, "loss": 0.03570964, "step": 17655 }, { "epoch": 35.312, "grad_norm": 1.4101598262786865, "learning_rate": 2e-05, "loss": 0.05651437, "step": 17656 }, { "epoch": 35.314, "grad_norm": 1.9974442720413208, "learning_rate": 2e-05, "loss": 0.04331794, "step": 17657 }, { "epoch": 35.316, "grad_norm": 1.3309555053710938, "learning_rate": 2e-05, "loss": 0.05525503, "step": 17658 }, { "epoch": 35.318, "grad_norm": 1.2612769603729248, "learning_rate": 2e-05, "loss": 0.04997455, "step": 17659 }, { "epoch": 35.32, "grad_norm": 0.861672580242157, "learning_rate": 2e-05, "loss": 0.02711889, "step": 17660 }, { "epoch": 35.322, "grad_norm": 1.1754769086837769, "learning_rate": 2e-05, "loss": 0.03256553, "step": 17661 }, { "epoch": 35.324, "grad_norm": 0.9169670939445496, "learning_rate": 2e-05, "loss": 0.04146589, "step": 17662 }, { "epoch": 35.326, "grad_norm": 1.1292067766189575, "learning_rate": 2e-05, "loss": 0.05579875, "step": 17663 }, { "epoch": 35.328, "grad_norm": 1.0622222423553467, "learning_rate": 2e-05, "loss": 0.04203863, "step": 17664 }, { "epoch": 35.33, "grad_norm": 1.288285493850708, "learning_rate": 2e-05, "loss": 0.05435022, "step": 17665 }, { "epoch": 35.332, "grad_norm": 1.7340805530548096, "learning_rate": 2e-05, "loss": 0.04209695, "step": 17666 }, { "epoch": 35.334, "grad_norm": 1.1800674200057983, "learning_rate": 2e-05, "loss": 0.05009048, "step": 17667 }, { "epoch": 35.336, "grad_norm": 1.3120408058166504, "learning_rate": 2e-05, "loss": 0.05273797, "step": 17668 }, { "epoch": 35.338, "grad_norm": 1.2309207916259766, "learning_rate": 2e-05, "loss": 0.04623915, "step": 17669 }, { "epoch": 35.34, "grad_norm": 1.2292450666427612, "learning_rate": 2e-05, "loss": 0.0423172, "step": 17670 }, { "epoch": 35.342, "grad_norm": 1.52309250831604, "learning_rate": 2e-05, "loss": 0.06222498, "step": 17671 }, { "epoch": 35.344, "grad_norm": 0.9260351061820984, "learning_rate": 2e-05, "loss": 0.0379132, "step": 17672 }, { "epoch": 35.346, "grad_norm": 1.2316185235977173, "learning_rate": 2e-05, "loss": 0.03675316, "step": 17673 }, { "epoch": 35.348, "grad_norm": 1.0844045877456665, "learning_rate": 2e-05, "loss": 0.05681034, "step": 17674 }, { "epoch": 35.35, "grad_norm": 1.1189839839935303, "learning_rate": 2e-05, "loss": 0.04851883, "step": 17675 }, { "epoch": 35.352, "grad_norm": 1.1107527017593384, "learning_rate": 2e-05, "loss": 0.03947585, "step": 17676 }, { "epoch": 35.354, "grad_norm": 1.212267279624939, "learning_rate": 2e-05, "loss": 0.04684102, "step": 17677 }, { "epoch": 35.356, "grad_norm": 1.1278090476989746, "learning_rate": 2e-05, "loss": 0.05221531, "step": 17678 }, { "epoch": 35.358, "grad_norm": 2.178744316101074, "learning_rate": 2e-05, "loss": 0.05131196, "step": 17679 }, { "epoch": 35.36, "grad_norm": 0.885080099105835, "learning_rate": 2e-05, "loss": 0.02827149, "step": 17680 }, { "epoch": 35.362, "grad_norm": 1.2502079010009766, "learning_rate": 2e-05, "loss": 0.05260678, "step": 17681 }, { "epoch": 35.364, "grad_norm": 1.4147708415985107, "learning_rate": 2e-05, "loss": 0.04275967, "step": 17682 }, { "epoch": 35.366, "grad_norm": 1.0567364692687988, "learning_rate": 2e-05, "loss": 0.04111627, "step": 17683 }, { "epoch": 35.368, "grad_norm": 1.0135693550109863, "learning_rate": 2e-05, "loss": 0.03558561, "step": 17684 }, { "epoch": 35.37, "grad_norm": 0.9155202507972717, "learning_rate": 2e-05, "loss": 0.03962982, "step": 17685 }, { "epoch": 35.372, "grad_norm": 1.0973446369171143, "learning_rate": 2e-05, "loss": 0.04567979, "step": 17686 }, { "epoch": 35.374, "grad_norm": 1.1090086698532104, "learning_rate": 2e-05, "loss": 0.04091753, "step": 17687 }, { "epoch": 35.376, "grad_norm": 4.153783321380615, "learning_rate": 2e-05, "loss": 0.02635382, "step": 17688 }, { "epoch": 35.378, "grad_norm": 1.5617598295211792, "learning_rate": 2e-05, "loss": 0.05339186, "step": 17689 }, { "epoch": 35.38, "grad_norm": 1.6091276407241821, "learning_rate": 2e-05, "loss": 0.04465017, "step": 17690 }, { "epoch": 35.382, "grad_norm": 0.9319542050361633, "learning_rate": 2e-05, "loss": 0.03993105, "step": 17691 }, { "epoch": 35.384, "grad_norm": 2.022355079650879, "learning_rate": 2e-05, "loss": 0.03834549, "step": 17692 }, { "epoch": 35.386, "grad_norm": 1.0256010293960571, "learning_rate": 2e-05, "loss": 0.03824114, "step": 17693 }, { "epoch": 35.388, "grad_norm": 1.1768181324005127, "learning_rate": 2e-05, "loss": 0.04413105, "step": 17694 }, { "epoch": 35.39, "grad_norm": 0.9824259281158447, "learning_rate": 2e-05, "loss": 0.02771544, "step": 17695 }, { "epoch": 35.392, "grad_norm": 1.2983273267745972, "learning_rate": 2e-05, "loss": 0.04002289, "step": 17696 }, { "epoch": 35.394, "grad_norm": 0.9368212819099426, "learning_rate": 2e-05, "loss": 0.03699147, "step": 17697 }, { "epoch": 35.396, "grad_norm": 1.3806641101837158, "learning_rate": 2e-05, "loss": 0.05399401, "step": 17698 }, { "epoch": 35.398, "grad_norm": 0.9939367771148682, "learning_rate": 2e-05, "loss": 0.03571153, "step": 17699 }, { "epoch": 35.4, "grad_norm": 1.427320957183838, "learning_rate": 2e-05, "loss": 0.05340093, "step": 17700 }, { "epoch": 35.402, "grad_norm": 1.188673973083496, "learning_rate": 2e-05, "loss": 0.04576518, "step": 17701 }, { "epoch": 35.404, "grad_norm": 1.4022289514541626, "learning_rate": 2e-05, "loss": 0.05224154, "step": 17702 }, { "epoch": 35.406, "grad_norm": 0.956570029258728, "learning_rate": 2e-05, "loss": 0.04083812, "step": 17703 }, { "epoch": 35.408, "grad_norm": 1.4729084968566895, "learning_rate": 2e-05, "loss": 0.03523451, "step": 17704 }, { "epoch": 35.41, "grad_norm": 2.227999687194824, "learning_rate": 2e-05, "loss": 0.02766923, "step": 17705 }, { "epoch": 35.412, "grad_norm": 0.9519136548042297, "learning_rate": 2e-05, "loss": 0.03202899, "step": 17706 }, { "epoch": 35.414, "grad_norm": 1.2759313583374023, "learning_rate": 2e-05, "loss": 0.04123229, "step": 17707 }, { "epoch": 35.416, "grad_norm": 2.022313117980957, "learning_rate": 2e-05, "loss": 0.0543904, "step": 17708 }, { "epoch": 35.418, "grad_norm": 1.616140365600586, "learning_rate": 2e-05, "loss": 0.04884363, "step": 17709 }, { "epoch": 35.42, "grad_norm": 1.424131989479065, "learning_rate": 2e-05, "loss": 0.05453948, "step": 17710 }, { "epoch": 35.422, "grad_norm": 1.3241058588027954, "learning_rate": 2e-05, "loss": 0.05644249, "step": 17711 }, { "epoch": 35.424, "grad_norm": 2.7332921028137207, "learning_rate": 2e-05, "loss": 0.04308933, "step": 17712 }, { "epoch": 35.426, "grad_norm": 2.1034982204437256, "learning_rate": 2e-05, "loss": 0.06158004, "step": 17713 }, { "epoch": 35.428, "grad_norm": 1.3037017583847046, "learning_rate": 2e-05, "loss": 0.04348128, "step": 17714 }, { "epoch": 35.43, "grad_norm": 1.0696301460266113, "learning_rate": 2e-05, "loss": 0.03787622, "step": 17715 }, { "epoch": 35.432, "grad_norm": 1.0378317832946777, "learning_rate": 2e-05, "loss": 0.03195773, "step": 17716 }, { "epoch": 35.434, "grad_norm": 1.275861382484436, "learning_rate": 2e-05, "loss": 0.03302122, "step": 17717 }, { "epoch": 35.436, "grad_norm": 1.2428311109542847, "learning_rate": 2e-05, "loss": 0.05002519, "step": 17718 }, { "epoch": 35.438, "grad_norm": 0.988196611404419, "learning_rate": 2e-05, "loss": 0.04524065, "step": 17719 }, { "epoch": 35.44, "grad_norm": 1.172174096107483, "learning_rate": 2e-05, "loss": 0.04146743, "step": 17720 }, { "epoch": 35.442, "grad_norm": 1.149983286857605, "learning_rate": 2e-05, "loss": 0.04835968, "step": 17721 }, { "epoch": 35.444, "grad_norm": 1.4672528505325317, "learning_rate": 2e-05, "loss": 0.04507277, "step": 17722 }, { "epoch": 35.446, "grad_norm": 0.8580577373504639, "learning_rate": 2e-05, "loss": 0.03269992, "step": 17723 }, { "epoch": 35.448, "grad_norm": 1.9096990823745728, "learning_rate": 2e-05, "loss": 0.05402298, "step": 17724 }, { "epoch": 35.45, "grad_norm": 1.2691797018051147, "learning_rate": 2e-05, "loss": 0.03194361, "step": 17725 }, { "epoch": 35.452, "grad_norm": 1.3220056295394897, "learning_rate": 2e-05, "loss": 0.03200767, "step": 17726 }, { "epoch": 35.454, "grad_norm": 1.0757757425308228, "learning_rate": 2e-05, "loss": 0.05129121, "step": 17727 }, { "epoch": 35.456, "grad_norm": 1.0469292402267456, "learning_rate": 2e-05, "loss": 0.03614257, "step": 17728 }, { "epoch": 35.458, "grad_norm": 0.9992891550064087, "learning_rate": 2e-05, "loss": 0.04821371, "step": 17729 }, { "epoch": 35.46, "grad_norm": 0.8726563453674316, "learning_rate": 2e-05, "loss": 0.03163296, "step": 17730 }, { "epoch": 35.462, "grad_norm": 1.2163383960723877, "learning_rate": 2e-05, "loss": 0.05531082, "step": 17731 }, { "epoch": 35.464, "grad_norm": 0.9599181413650513, "learning_rate": 2e-05, "loss": 0.03643164, "step": 17732 }, { "epoch": 35.466, "grad_norm": 1.0758107900619507, "learning_rate": 2e-05, "loss": 0.04268233, "step": 17733 }, { "epoch": 35.468, "grad_norm": 1.127024531364441, "learning_rate": 2e-05, "loss": 0.04199637, "step": 17734 }, { "epoch": 35.47, "grad_norm": 3.7262139320373535, "learning_rate": 2e-05, "loss": 0.05797809, "step": 17735 }, { "epoch": 35.472, "grad_norm": 2.14314866065979, "learning_rate": 2e-05, "loss": 0.04778343, "step": 17736 }, { "epoch": 35.474, "grad_norm": 1.3994665145874023, "learning_rate": 2e-05, "loss": 0.04588185, "step": 17737 }, { "epoch": 35.476, "grad_norm": 1.011435627937317, "learning_rate": 2e-05, "loss": 0.04821397, "step": 17738 }, { "epoch": 35.478, "grad_norm": 1.0817642211914062, "learning_rate": 2e-05, "loss": 0.0571139, "step": 17739 }, { "epoch": 35.48, "grad_norm": 1.3570966720581055, "learning_rate": 2e-05, "loss": 0.0451093, "step": 17740 }, { "epoch": 35.482, "grad_norm": 1.2720415592193604, "learning_rate": 2e-05, "loss": 0.0419554, "step": 17741 }, { "epoch": 35.484, "grad_norm": 1.1292437314987183, "learning_rate": 2e-05, "loss": 0.03537326, "step": 17742 }, { "epoch": 35.486, "grad_norm": 4.787904262542725, "learning_rate": 2e-05, "loss": 0.0445136, "step": 17743 }, { "epoch": 35.488, "grad_norm": 1.1936686038970947, "learning_rate": 2e-05, "loss": 0.07041204, "step": 17744 }, { "epoch": 35.49, "grad_norm": 1.009709119796753, "learning_rate": 2e-05, "loss": 0.04087082, "step": 17745 }, { "epoch": 35.492, "grad_norm": 0.8851386308670044, "learning_rate": 2e-05, "loss": 0.05184007, "step": 17746 }, { "epoch": 35.494, "grad_norm": 1.107445478439331, "learning_rate": 2e-05, "loss": 0.04365218, "step": 17747 }, { "epoch": 35.496, "grad_norm": 1.2579169273376465, "learning_rate": 2e-05, "loss": 0.06756692, "step": 17748 }, { "epoch": 35.498, "grad_norm": 1.118126630783081, "learning_rate": 2e-05, "loss": 0.04613429, "step": 17749 }, { "epoch": 35.5, "grad_norm": 1.1846469640731812, "learning_rate": 2e-05, "loss": 0.0385599, "step": 17750 }, { "epoch": 35.502, "grad_norm": 1.0665580034255981, "learning_rate": 2e-05, "loss": 0.03990706, "step": 17751 }, { "epoch": 35.504, "grad_norm": 1.0185344219207764, "learning_rate": 2e-05, "loss": 0.04657588, "step": 17752 }, { "epoch": 35.506, "grad_norm": 1.1447497606277466, "learning_rate": 2e-05, "loss": 0.04588531, "step": 17753 }, { "epoch": 35.508, "grad_norm": 1.118882417678833, "learning_rate": 2e-05, "loss": 0.02719714, "step": 17754 }, { "epoch": 35.51, "grad_norm": 1.905713438987732, "learning_rate": 2e-05, "loss": 0.04117458, "step": 17755 }, { "epoch": 35.512, "grad_norm": 1.6821480989456177, "learning_rate": 2e-05, "loss": 0.06954286, "step": 17756 }, { "epoch": 35.514, "grad_norm": 0.8703315854072571, "learning_rate": 2e-05, "loss": 0.03895139, "step": 17757 }, { "epoch": 35.516, "grad_norm": 1.7936755418777466, "learning_rate": 2e-05, "loss": 0.04810158, "step": 17758 }, { "epoch": 35.518, "grad_norm": 0.7935605645179749, "learning_rate": 2e-05, "loss": 0.02763297, "step": 17759 }, { "epoch": 35.52, "grad_norm": 1.044564127922058, "learning_rate": 2e-05, "loss": 0.04207416, "step": 17760 }, { "epoch": 35.522, "grad_norm": 0.7122366428375244, "learning_rate": 2e-05, "loss": 0.01845409, "step": 17761 }, { "epoch": 35.524, "grad_norm": 1.1472291946411133, "learning_rate": 2e-05, "loss": 0.05360861, "step": 17762 }, { "epoch": 35.526, "grad_norm": 0.972972571849823, "learning_rate": 2e-05, "loss": 0.03553871, "step": 17763 }, { "epoch": 35.528, "grad_norm": 1.0764737129211426, "learning_rate": 2e-05, "loss": 0.04770009, "step": 17764 }, { "epoch": 35.53, "grad_norm": 0.9953265190124512, "learning_rate": 2e-05, "loss": 0.0485123, "step": 17765 }, { "epoch": 35.532, "grad_norm": 1.3701955080032349, "learning_rate": 2e-05, "loss": 0.0558629, "step": 17766 }, { "epoch": 35.534, "grad_norm": 1.0354093313217163, "learning_rate": 2e-05, "loss": 0.03873002, "step": 17767 }, { "epoch": 35.536, "grad_norm": 2.023448944091797, "learning_rate": 2e-05, "loss": 0.03817055, "step": 17768 }, { "epoch": 35.538, "grad_norm": 1.1391485929489136, "learning_rate": 2e-05, "loss": 0.04843132, "step": 17769 }, { "epoch": 35.54, "grad_norm": 1.5263519287109375, "learning_rate": 2e-05, "loss": 0.04642747, "step": 17770 }, { "epoch": 35.542, "grad_norm": 0.9989811778068542, "learning_rate": 2e-05, "loss": 0.0397132, "step": 17771 }, { "epoch": 35.544, "grad_norm": 0.9769954085350037, "learning_rate": 2e-05, "loss": 0.04003306, "step": 17772 }, { "epoch": 35.546, "grad_norm": 1.764196753501892, "learning_rate": 2e-05, "loss": 0.03632543, "step": 17773 }, { "epoch": 35.548, "grad_norm": 1.017839789390564, "learning_rate": 2e-05, "loss": 0.04212029, "step": 17774 }, { "epoch": 35.55, "grad_norm": 1.3519413471221924, "learning_rate": 2e-05, "loss": 0.03564759, "step": 17775 }, { "epoch": 35.552, "grad_norm": 2.3475871086120605, "learning_rate": 2e-05, "loss": 0.05288629, "step": 17776 }, { "epoch": 35.554, "grad_norm": 1.1782852411270142, "learning_rate": 2e-05, "loss": 0.04206895, "step": 17777 }, { "epoch": 35.556, "grad_norm": 1.176536202430725, "learning_rate": 2e-05, "loss": 0.04446234, "step": 17778 }, { "epoch": 35.558, "grad_norm": 1.3534846305847168, "learning_rate": 2e-05, "loss": 0.0504674, "step": 17779 }, { "epoch": 35.56, "grad_norm": 1.4990696907043457, "learning_rate": 2e-05, "loss": 0.03484973, "step": 17780 }, { "epoch": 35.562, "grad_norm": 1.0900075435638428, "learning_rate": 2e-05, "loss": 0.03626657, "step": 17781 }, { "epoch": 35.564, "grad_norm": 0.9624567031860352, "learning_rate": 2e-05, "loss": 0.03484332, "step": 17782 }, { "epoch": 35.566, "grad_norm": 1.3573051691055298, "learning_rate": 2e-05, "loss": 0.04963438, "step": 17783 }, { "epoch": 35.568, "grad_norm": 0.9251115322113037, "learning_rate": 2e-05, "loss": 0.03892969, "step": 17784 }, { "epoch": 35.57, "grad_norm": 0.9245318174362183, "learning_rate": 2e-05, "loss": 0.02878282, "step": 17785 }, { "epoch": 35.572, "grad_norm": 0.9210284352302551, "learning_rate": 2e-05, "loss": 0.02540288, "step": 17786 }, { "epoch": 35.574, "grad_norm": 1.5669573545455933, "learning_rate": 2e-05, "loss": 0.04102181, "step": 17787 }, { "epoch": 35.576, "grad_norm": 0.953571081161499, "learning_rate": 2e-05, "loss": 0.03061294, "step": 17788 }, { "epoch": 35.578, "grad_norm": 1.0442678928375244, "learning_rate": 2e-05, "loss": 0.04631881, "step": 17789 }, { "epoch": 35.58, "grad_norm": 1.0590013265609741, "learning_rate": 2e-05, "loss": 0.04192845, "step": 17790 }, { "epoch": 35.582, "grad_norm": 0.9320895671844482, "learning_rate": 2e-05, "loss": 0.04079748, "step": 17791 }, { "epoch": 35.584, "grad_norm": 1.3087483644485474, "learning_rate": 2e-05, "loss": 0.04591719, "step": 17792 }, { "epoch": 35.586, "grad_norm": 2.451941967010498, "learning_rate": 2e-05, "loss": 0.03583793, "step": 17793 }, { "epoch": 35.588, "grad_norm": 1.6351011991500854, "learning_rate": 2e-05, "loss": 0.05808794, "step": 17794 }, { "epoch": 35.59, "grad_norm": 1.1539368629455566, "learning_rate": 2e-05, "loss": 0.03932741, "step": 17795 }, { "epoch": 35.592, "grad_norm": 1.0968551635742188, "learning_rate": 2e-05, "loss": 0.04897058, "step": 17796 }, { "epoch": 35.594, "grad_norm": 1.1344934701919556, "learning_rate": 2e-05, "loss": 0.04373275, "step": 17797 }, { "epoch": 35.596, "grad_norm": 1.0937422513961792, "learning_rate": 2e-05, "loss": 0.04663185, "step": 17798 }, { "epoch": 35.598, "grad_norm": 1.8810701370239258, "learning_rate": 2e-05, "loss": 0.03928865, "step": 17799 }, { "epoch": 35.6, "grad_norm": 1.154462218284607, "learning_rate": 2e-05, "loss": 0.05194328, "step": 17800 }, { "epoch": 35.602, "grad_norm": 0.7318021655082703, "learning_rate": 2e-05, "loss": 0.0248587, "step": 17801 }, { "epoch": 35.604, "grad_norm": 1.1430221796035767, "learning_rate": 2e-05, "loss": 0.04740971, "step": 17802 }, { "epoch": 35.606, "grad_norm": 1.309664249420166, "learning_rate": 2e-05, "loss": 0.04505972, "step": 17803 }, { "epoch": 35.608, "grad_norm": 1.3802859783172607, "learning_rate": 2e-05, "loss": 0.04448079, "step": 17804 }, { "epoch": 35.61, "grad_norm": 1.0077226161956787, "learning_rate": 2e-05, "loss": 0.04205176, "step": 17805 }, { "epoch": 35.612, "grad_norm": 1.4358106851577759, "learning_rate": 2e-05, "loss": 0.03876767, "step": 17806 }, { "epoch": 35.614, "grad_norm": 0.9969291687011719, "learning_rate": 2e-05, "loss": 0.03736145, "step": 17807 }, { "epoch": 35.616, "grad_norm": 1.5516763925552368, "learning_rate": 2e-05, "loss": 0.04513609, "step": 17808 }, { "epoch": 35.618, "grad_norm": 1.0012342929840088, "learning_rate": 2e-05, "loss": 0.03688586, "step": 17809 }, { "epoch": 35.62, "grad_norm": 0.9941564202308655, "learning_rate": 2e-05, "loss": 0.04614796, "step": 17810 }, { "epoch": 35.622, "grad_norm": 1.2094674110412598, "learning_rate": 2e-05, "loss": 0.04694629, "step": 17811 }, { "epoch": 35.624, "grad_norm": 1.0084950923919678, "learning_rate": 2e-05, "loss": 0.04975464, "step": 17812 }, { "epoch": 35.626, "grad_norm": 1.2530282735824585, "learning_rate": 2e-05, "loss": 0.059588, "step": 17813 }, { "epoch": 35.628, "grad_norm": 1.6957203149795532, "learning_rate": 2e-05, "loss": 0.05111027, "step": 17814 }, { "epoch": 35.63, "grad_norm": 1.7073659896850586, "learning_rate": 2e-05, "loss": 0.0391752, "step": 17815 }, { "epoch": 35.632, "grad_norm": 1.105798602104187, "learning_rate": 2e-05, "loss": 0.04066662, "step": 17816 }, { "epoch": 35.634, "grad_norm": 1.6232260465621948, "learning_rate": 2e-05, "loss": 0.06263505, "step": 17817 }, { "epoch": 35.636, "grad_norm": 1.2729047536849976, "learning_rate": 2e-05, "loss": 0.03372806, "step": 17818 }, { "epoch": 35.638, "grad_norm": 1.830386757850647, "learning_rate": 2e-05, "loss": 0.05839159, "step": 17819 }, { "epoch": 35.64, "grad_norm": 1.4902586936950684, "learning_rate": 2e-05, "loss": 0.06051061, "step": 17820 }, { "epoch": 35.642, "grad_norm": 1.289066195487976, "learning_rate": 2e-05, "loss": 0.03476595, "step": 17821 }, { "epoch": 35.644, "grad_norm": 1.5072979927062988, "learning_rate": 2e-05, "loss": 0.05749911, "step": 17822 }, { "epoch": 35.646, "grad_norm": 1.089073657989502, "learning_rate": 2e-05, "loss": 0.04390016, "step": 17823 }, { "epoch": 35.648, "grad_norm": 1.0387096405029297, "learning_rate": 2e-05, "loss": 0.04868191, "step": 17824 }, { "epoch": 35.65, "grad_norm": 2.3330564498901367, "learning_rate": 2e-05, "loss": 0.03920126, "step": 17825 }, { "epoch": 35.652, "grad_norm": 0.8156403303146362, "learning_rate": 2e-05, "loss": 0.02710369, "step": 17826 }, { "epoch": 35.654, "grad_norm": 1.2842808961868286, "learning_rate": 2e-05, "loss": 0.04716428, "step": 17827 }, { "epoch": 35.656, "grad_norm": 1.6243064403533936, "learning_rate": 2e-05, "loss": 0.04945413, "step": 17828 }, { "epoch": 35.658, "grad_norm": 1.1617268323898315, "learning_rate": 2e-05, "loss": 0.05433542, "step": 17829 }, { "epoch": 35.66, "grad_norm": 0.9933856129646301, "learning_rate": 2e-05, "loss": 0.04535313, "step": 17830 }, { "epoch": 35.662, "grad_norm": 0.9829065799713135, "learning_rate": 2e-05, "loss": 0.03355701, "step": 17831 }, { "epoch": 35.664, "grad_norm": 0.96443772315979, "learning_rate": 2e-05, "loss": 0.04178785, "step": 17832 }, { "epoch": 35.666, "grad_norm": 2.2146899700164795, "learning_rate": 2e-05, "loss": 0.04975067, "step": 17833 }, { "epoch": 35.668, "grad_norm": 1.0088889598846436, "learning_rate": 2e-05, "loss": 0.03016124, "step": 17834 }, { "epoch": 35.67, "grad_norm": 1.836744785308838, "learning_rate": 2e-05, "loss": 0.05857106, "step": 17835 }, { "epoch": 35.672, "grad_norm": 1.479236125946045, "learning_rate": 2e-05, "loss": 0.04426521, "step": 17836 }, { "epoch": 35.674, "grad_norm": 1.334231972694397, "learning_rate": 2e-05, "loss": 0.0651462, "step": 17837 }, { "epoch": 35.676, "grad_norm": 1.1459414958953857, "learning_rate": 2e-05, "loss": 0.04898876, "step": 17838 }, { "epoch": 35.678, "grad_norm": 1.0009347200393677, "learning_rate": 2e-05, "loss": 0.03742814, "step": 17839 }, { "epoch": 35.68, "grad_norm": 1.045682668685913, "learning_rate": 2e-05, "loss": 0.03534093, "step": 17840 }, { "epoch": 35.682, "grad_norm": 0.9055586457252502, "learning_rate": 2e-05, "loss": 0.03842632, "step": 17841 }, { "epoch": 35.684, "grad_norm": 1.1022517681121826, "learning_rate": 2e-05, "loss": 0.03496824, "step": 17842 }, { "epoch": 35.686, "grad_norm": 1.030848741531372, "learning_rate": 2e-05, "loss": 0.04140773, "step": 17843 }, { "epoch": 35.688, "grad_norm": 1.2500731945037842, "learning_rate": 2e-05, "loss": 0.03619286, "step": 17844 }, { "epoch": 35.69, "grad_norm": 1.255372166633606, "learning_rate": 2e-05, "loss": 0.04953964, "step": 17845 }, { "epoch": 35.692, "grad_norm": 1.3639477491378784, "learning_rate": 2e-05, "loss": 0.04427748, "step": 17846 }, { "epoch": 35.694, "grad_norm": 1.2349683046340942, "learning_rate": 2e-05, "loss": 0.05718009, "step": 17847 }, { "epoch": 35.696, "grad_norm": 1.3479573726654053, "learning_rate": 2e-05, "loss": 0.05271432, "step": 17848 }, { "epoch": 35.698, "grad_norm": 2.0954861640930176, "learning_rate": 2e-05, "loss": 0.04573008, "step": 17849 }, { "epoch": 35.7, "grad_norm": 0.9008009433746338, "learning_rate": 2e-05, "loss": 0.03625199, "step": 17850 }, { "epoch": 35.702, "grad_norm": 0.899972140789032, "learning_rate": 2e-05, "loss": 0.02966314, "step": 17851 }, { "epoch": 35.704, "grad_norm": 1.0539798736572266, "learning_rate": 2e-05, "loss": 0.04530566, "step": 17852 }, { "epoch": 35.706, "grad_norm": 1.519480586051941, "learning_rate": 2e-05, "loss": 0.04442572, "step": 17853 }, { "epoch": 35.708, "grad_norm": 1.4738776683807373, "learning_rate": 2e-05, "loss": 0.05070395, "step": 17854 }, { "epoch": 35.71, "grad_norm": 1.1291126012802124, "learning_rate": 2e-05, "loss": 0.04831242, "step": 17855 }, { "epoch": 35.712, "grad_norm": 1.1266931295394897, "learning_rate": 2e-05, "loss": 0.05407539, "step": 17856 }, { "epoch": 35.714, "grad_norm": 0.9076749682426453, "learning_rate": 2e-05, "loss": 0.02798611, "step": 17857 }, { "epoch": 35.716, "grad_norm": 1.0951939821243286, "learning_rate": 2e-05, "loss": 0.03466193, "step": 17858 }, { "epoch": 35.718, "grad_norm": 1.0534316301345825, "learning_rate": 2e-05, "loss": 0.04609064, "step": 17859 }, { "epoch": 35.72, "grad_norm": 1.2272918224334717, "learning_rate": 2e-05, "loss": 0.05875445, "step": 17860 }, { "epoch": 35.722, "grad_norm": 1.824316382408142, "learning_rate": 2e-05, "loss": 0.03652216, "step": 17861 }, { "epoch": 35.724, "grad_norm": 1.4697301387786865, "learning_rate": 2e-05, "loss": 0.03582899, "step": 17862 }, { "epoch": 35.726, "grad_norm": 1.179726481437683, "learning_rate": 2e-05, "loss": 0.0458418, "step": 17863 }, { "epoch": 35.728, "grad_norm": 1.1877864599227905, "learning_rate": 2e-05, "loss": 0.03948677, "step": 17864 }, { "epoch": 35.73, "grad_norm": 1.2530378103256226, "learning_rate": 2e-05, "loss": 0.05002031, "step": 17865 }, { "epoch": 35.732, "grad_norm": 1.0493865013122559, "learning_rate": 2e-05, "loss": 0.03972576, "step": 17866 }, { "epoch": 35.734, "grad_norm": 1.2503560781478882, "learning_rate": 2e-05, "loss": 0.04502755, "step": 17867 }, { "epoch": 35.736, "grad_norm": 1.091678500175476, "learning_rate": 2e-05, "loss": 0.04928856, "step": 17868 }, { "epoch": 35.738, "grad_norm": 1.0536844730377197, "learning_rate": 2e-05, "loss": 0.05150354, "step": 17869 }, { "epoch": 35.74, "grad_norm": 0.8112476468086243, "learning_rate": 2e-05, "loss": 0.02828744, "step": 17870 }, { "epoch": 35.742, "grad_norm": 0.9733346104621887, "learning_rate": 2e-05, "loss": 0.03046471, "step": 17871 }, { "epoch": 35.744, "grad_norm": 1.1976494789123535, "learning_rate": 2e-05, "loss": 0.04932082, "step": 17872 }, { "epoch": 35.746, "grad_norm": 1.190445065498352, "learning_rate": 2e-05, "loss": 0.05254158, "step": 17873 }, { "epoch": 35.748, "grad_norm": 1.1881194114685059, "learning_rate": 2e-05, "loss": 0.05419042, "step": 17874 }, { "epoch": 35.75, "grad_norm": 1.0244606733322144, "learning_rate": 2e-05, "loss": 0.03196706, "step": 17875 }, { "epoch": 35.752, "grad_norm": 1.100379467010498, "learning_rate": 2e-05, "loss": 0.04613109, "step": 17876 }, { "epoch": 35.754, "grad_norm": 1.0622649192810059, "learning_rate": 2e-05, "loss": 0.05097427, "step": 17877 }, { "epoch": 35.756, "grad_norm": 1.0975067615509033, "learning_rate": 2e-05, "loss": 0.04049646, "step": 17878 }, { "epoch": 35.758, "grad_norm": 1.1919175386428833, "learning_rate": 2e-05, "loss": 0.03593216, "step": 17879 }, { "epoch": 35.76, "grad_norm": 1.0537738800048828, "learning_rate": 2e-05, "loss": 0.04305983, "step": 17880 }, { "epoch": 35.762, "grad_norm": 1.262789249420166, "learning_rate": 2e-05, "loss": 0.0311901, "step": 17881 }, { "epoch": 35.764, "grad_norm": 1.2861202955245972, "learning_rate": 2e-05, "loss": 0.05621336, "step": 17882 }, { "epoch": 35.766, "grad_norm": 1.7805184125900269, "learning_rate": 2e-05, "loss": 0.04182611, "step": 17883 }, { "epoch": 35.768, "grad_norm": 1.8726239204406738, "learning_rate": 2e-05, "loss": 0.04629755, "step": 17884 }, { "epoch": 35.77, "grad_norm": 2.6237668991088867, "learning_rate": 2e-05, "loss": 0.05220035, "step": 17885 }, { "epoch": 35.772, "grad_norm": 1.0690679550170898, "learning_rate": 2e-05, "loss": 0.04517773, "step": 17886 }, { "epoch": 35.774, "grad_norm": 1.0745878219604492, "learning_rate": 2e-05, "loss": 0.03235887, "step": 17887 }, { "epoch": 35.776, "grad_norm": 1.2051985263824463, "learning_rate": 2e-05, "loss": 0.05141218, "step": 17888 }, { "epoch": 35.778, "grad_norm": 1.5735156536102295, "learning_rate": 2e-05, "loss": 0.05923137, "step": 17889 }, { "epoch": 35.78, "grad_norm": 2.7049155235290527, "learning_rate": 2e-05, "loss": 0.05662517, "step": 17890 }, { "epoch": 35.782, "grad_norm": 1.578530192375183, "learning_rate": 2e-05, "loss": 0.04163588, "step": 17891 }, { "epoch": 35.784, "grad_norm": 1.0815675258636475, "learning_rate": 2e-05, "loss": 0.03740942, "step": 17892 }, { "epoch": 35.786, "grad_norm": 1.0301393270492554, "learning_rate": 2e-05, "loss": 0.038887, "step": 17893 }, { "epoch": 35.788, "grad_norm": 1.4960992336273193, "learning_rate": 2e-05, "loss": 0.0459294, "step": 17894 }, { "epoch": 35.79, "grad_norm": 0.9120678901672363, "learning_rate": 2e-05, "loss": 0.02372322, "step": 17895 }, { "epoch": 35.792, "grad_norm": 1.9509999752044678, "learning_rate": 2e-05, "loss": 0.05730008, "step": 17896 }, { "epoch": 35.794, "grad_norm": 1.1410529613494873, "learning_rate": 2e-05, "loss": 0.0459688, "step": 17897 }, { "epoch": 35.796, "grad_norm": 3.154674768447876, "learning_rate": 2e-05, "loss": 0.04180706, "step": 17898 }, { "epoch": 35.798, "grad_norm": 2.2525460720062256, "learning_rate": 2e-05, "loss": 0.05114174, "step": 17899 }, { "epoch": 35.8, "grad_norm": 1.8642394542694092, "learning_rate": 2e-05, "loss": 0.06002184, "step": 17900 }, { "epoch": 35.802, "grad_norm": 1.152767300605774, "learning_rate": 2e-05, "loss": 0.03703151, "step": 17901 }, { "epoch": 35.804, "grad_norm": 1.0915579795837402, "learning_rate": 2e-05, "loss": 0.04075465, "step": 17902 }, { "epoch": 35.806, "grad_norm": 1.020706057548523, "learning_rate": 2e-05, "loss": 0.03943242, "step": 17903 }, { "epoch": 35.808, "grad_norm": 1.5543339252471924, "learning_rate": 2e-05, "loss": 0.05778353, "step": 17904 }, { "epoch": 35.81, "grad_norm": 0.9286485910415649, "learning_rate": 2e-05, "loss": 0.03517792, "step": 17905 }, { "epoch": 35.812, "grad_norm": 1.8696341514587402, "learning_rate": 2e-05, "loss": 0.05286719, "step": 17906 }, { "epoch": 35.814, "grad_norm": 0.8600780367851257, "learning_rate": 2e-05, "loss": 0.0298844, "step": 17907 }, { "epoch": 35.816, "grad_norm": 1.0908299684524536, "learning_rate": 2e-05, "loss": 0.04075044, "step": 17908 }, { "epoch": 35.818, "grad_norm": 0.9931163787841797, "learning_rate": 2e-05, "loss": 0.03857734, "step": 17909 }, { "epoch": 35.82, "grad_norm": 1.0548137426376343, "learning_rate": 2e-05, "loss": 0.03267374, "step": 17910 }, { "epoch": 35.822, "grad_norm": 1.446677565574646, "learning_rate": 2e-05, "loss": 0.02439149, "step": 17911 }, { "epoch": 35.824, "grad_norm": 0.9765915274620056, "learning_rate": 2e-05, "loss": 0.02738787, "step": 17912 }, { "epoch": 35.826, "grad_norm": 1.7646278142929077, "learning_rate": 2e-05, "loss": 0.04994683, "step": 17913 }, { "epoch": 35.828, "grad_norm": 1.0197588205337524, "learning_rate": 2e-05, "loss": 0.03421141, "step": 17914 }, { "epoch": 35.83, "grad_norm": 1.2300595045089722, "learning_rate": 2e-05, "loss": 0.04027767, "step": 17915 }, { "epoch": 35.832, "grad_norm": 1.3133021593093872, "learning_rate": 2e-05, "loss": 0.04190224, "step": 17916 }, { "epoch": 35.834, "grad_norm": 1.259387493133545, "learning_rate": 2e-05, "loss": 0.04040014, "step": 17917 }, { "epoch": 35.836, "grad_norm": 1.1941673755645752, "learning_rate": 2e-05, "loss": 0.03587606, "step": 17918 }, { "epoch": 35.838, "grad_norm": 1.1251119375228882, "learning_rate": 2e-05, "loss": 0.05083871, "step": 17919 }, { "epoch": 35.84, "grad_norm": 1.2659329175949097, "learning_rate": 2e-05, "loss": 0.05246072, "step": 17920 }, { "epoch": 35.842, "grad_norm": 1.2498339414596558, "learning_rate": 2e-05, "loss": 0.05451684, "step": 17921 }, { "epoch": 35.844, "grad_norm": 1.420396089553833, "learning_rate": 2e-05, "loss": 0.03543719, "step": 17922 }, { "epoch": 35.846, "grad_norm": 3.514441967010498, "learning_rate": 2e-05, "loss": 0.06811243, "step": 17923 }, { "epoch": 35.848, "grad_norm": 1.0977075099945068, "learning_rate": 2e-05, "loss": 0.05242419, "step": 17924 }, { "epoch": 35.85, "grad_norm": 1.050682783126831, "learning_rate": 2e-05, "loss": 0.04714052, "step": 17925 }, { "epoch": 35.852, "grad_norm": 0.9766338467597961, "learning_rate": 2e-05, "loss": 0.04299784, "step": 17926 }, { "epoch": 35.854, "grad_norm": 0.9510364532470703, "learning_rate": 2e-05, "loss": 0.03990202, "step": 17927 }, { "epoch": 35.856, "grad_norm": 1.9610737562179565, "learning_rate": 2e-05, "loss": 0.05586188, "step": 17928 }, { "epoch": 35.858, "grad_norm": 0.9748345017433167, "learning_rate": 2e-05, "loss": 0.04170553, "step": 17929 }, { "epoch": 35.86, "grad_norm": 1.0629684925079346, "learning_rate": 2e-05, "loss": 0.03746425, "step": 17930 }, { "epoch": 35.862, "grad_norm": 1.1884340047836304, "learning_rate": 2e-05, "loss": 0.06334166, "step": 17931 }, { "epoch": 35.864, "grad_norm": 2.0110175609588623, "learning_rate": 2e-05, "loss": 0.03753646, "step": 17932 }, { "epoch": 35.866, "grad_norm": 0.954401969909668, "learning_rate": 2e-05, "loss": 0.03755507, "step": 17933 }, { "epoch": 35.868, "grad_norm": 1.2640724182128906, "learning_rate": 2e-05, "loss": 0.04239856, "step": 17934 }, { "epoch": 35.87, "grad_norm": 1.0699065923690796, "learning_rate": 2e-05, "loss": 0.03946365, "step": 17935 }, { "epoch": 35.872, "grad_norm": 1.8986088037490845, "learning_rate": 2e-05, "loss": 0.05286825, "step": 17936 }, { "epoch": 35.874, "grad_norm": 1.5449522733688354, "learning_rate": 2e-05, "loss": 0.05508115, "step": 17937 }, { "epoch": 35.876, "grad_norm": 1.4599677324295044, "learning_rate": 2e-05, "loss": 0.0357135, "step": 17938 }, { "epoch": 35.878, "grad_norm": 1.6599795818328857, "learning_rate": 2e-05, "loss": 0.03889682, "step": 17939 }, { "epoch": 35.88, "grad_norm": 1.4402499198913574, "learning_rate": 2e-05, "loss": 0.07308306, "step": 17940 }, { "epoch": 35.882, "grad_norm": 1.994226098060608, "learning_rate": 2e-05, "loss": 0.04963081, "step": 17941 }, { "epoch": 35.884, "grad_norm": 2.3030917644500732, "learning_rate": 2e-05, "loss": 0.06350006, "step": 17942 }, { "epoch": 35.886, "grad_norm": 0.9162493944168091, "learning_rate": 2e-05, "loss": 0.03804775, "step": 17943 }, { "epoch": 35.888, "grad_norm": 1.3744843006134033, "learning_rate": 2e-05, "loss": 0.04678103, "step": 17944 }, { "epoch": 35.89, "grad_norm": 0.9501643180847168, "learning_rate": 2e-05, "loss": 0.04715009, "step": 17945 }, { "epoch": 35.892, "grad_norm": 0.8419013023376465, "learning_rate": 2e-05, "loss": 0.03066232, "step": 17946 }, { "epoch": 35.894, "grad_norm": 1.2549291849136353, "learning_rate": 2e-05, "loss": 0.05202661, "step": 17947 }, { "epoch": 35.896, "grad_norm": 0.9916409850120544, "learning_rate": 2e-05, "loss": 0.03674855, "step": 17948 }, { "epoch": 35.898, "grad_norm": 2.094815254211426, "learning_rate": 2e-05, "loss": 0.06448349, "step": 17949 }, { "epoch": 35.9, "grad_norm": 1.1162047386169434, "learning_rate": 2e-05, "loss": 0.04353852, "step": 17950 }, { "epoch": 35.902, "grad_norm": 1.376518726348877, "learning_rate": 2e-05, "loss": 0.04738733, "step": 17951 }, { "epoch": 35.904, "grad_norm": 1.3450309038162231, "learning_rate": 2e-05, "loss": 0.04365817, "step": 17952 }, { "epoch": 35.906, "grad_norm": 1.0869196653366089, "learning_rate": 2e-05, "loss": 0.04617172, "step": 17953 }, { "epoch": 35.908, "grad_norm": 1.1805920600891113, "learning_rate": 2e-05, "loss": 0.0341284, "step": 17954 }, { "epoch": 35.91, "grad_norm": 1.4624826908111572, "learning_rate": 2e-05, "loss": 0.05623692, "step": 17955 }, { "epoch": 35.912, "grad_norm": 0.9770745635032654, "learning_rate": 2e-05, "loss": 0.04358587, "step": 17956 }, { "epoch": 35.914, "grad_norm": 0.9455199241638184, "learning_rate": 2e-05, "loss": 0.04092978, "step": 17957 }, { "epoch": 35.916, "grad_norm": 1.147408366203308, "learning_rate": 2e-05, "loss": 0.02663073, "step": 17958 }, { "epoch": 35.918, "grad_norm": 1.3518959283828735, "learning_rate": 2e-05, "loss": 0.04713411, "step": 17959 }, { "epoch": 35.92, "grad_norm": 0.914516806602478, "learning_rate": 2e-05, "loss": 0.03204081, "step": 17960 }, { "epoch": 35.922, "grad_norm": 1.0150907039642334, "learning_rate": 2e-05, "loss": 0.04840542, "step": 17961 }, { "epoch": 35.924, "grad_norm": 0.934265673160553, "learning_rate": 2e-05, "loss": 0.03897597, "step": 17962 }, { "epoch": 35.926, "grad_norm": 1.0472999811172485, "learning_rate": 2e-05, "loss": 0.04450693, "step": 17963 }, { "epoch": 35.928, "grad_norm": 1.1037843227386475, "learning_rate": 2e-05, "loss": 0.04110377, "step": 17964 }, { "epoch": 35.93, "grad_norm": 1.1156753301620483, "learning_rate": 2e-05, "loss": 0.05329639, "step": 17965 }, { "epoch": 35.932, "grad_norm": 1.1106239557266235, "learning_rate": 2e-05, "loss": 0.04895515, "step": 17966 }, { "epoch": 35.934, "grad_norm": 1.701825499534607, "learning_rate": 2e-05, "loss": 0.06047001, "step": 17967 }, { "epoch": 35.936, "grad_norm": 1.680334210395813, "learning_rate": 2e-05, "loss": 0.04170813, "step": 17968 }, { "epoch": 35.938, "grad_norm": 1.3011748790740967, "learning_rate": 2e-05, "loss": 0.05525826, "step": 17969 }, { "epoch": 35.94, "grad_norm": 1.2516130208969116, "learning_rate": 2e-05, "loss": 0.06742955, "step": 17970 }, { "epoch": 35.942, "grad_norm": 1.0976548194885254, "learning_rate": 2e-05, "loss": 0.04188678, "step": 17971 }, { "epoch": 35.944, "grad_norm": 1.9291222095489502, "learning_rate": 2e-05, "loss": 0.0500432, "step": 17972 }, { "epoch": 35.946, "grad_norm": 1.1871390342712402, "learning_rate": 2e-05, "loss": 0.03833064, "step": 17973 }, { "epoch": 35.948, "grad_norm": 1.9358015060424805, "learning_rate": 2e-05, "loss": 0.04862614, "step": 17974 }, { "epoch": 35.95, "grad_norm": 1.0851377248764038, "learning_rate": 2e-05, "loss": 0.05858731, "step": 17975 }, { "epoch": 35.952, "grad_norm": 0.9408968091011047, "learning_rate": 2e-05, "loss": 0.03423677, "step": 17976 }, { "epoch": 35.954, "grad_norm": 1.0564815998077393, "learning_rate": 2e-05, "loss": 0.0383263, "step": 17977 }, { "epoch": 35.956, "grad_norm": 1.1081533432006836, "learning_rate": 2e-05, "loss": 0.04394086, "step": 17978 }, { "epoch": 35.958, "grad_norm": 0.9902077913284302, "learning_rate": 2e-05, "loss": 0.04109553, "step": 17979 }, { "epoch": 35.96, "grad_norm": 0.9771989583969116, "learning_rate": 2e-05, "loss": 0.04479699, "step": 17980 }, { "epoch": 35.962, "grad_norm": 2.2754855155944824, "learning_rate": 2e-05, "loss": 0.04749331, "step": 17981 }, { "epoch": 35.964, "grad_norm": 1.0698742866516113, "learning_rate": 2e-05, "loss": 0.03580903, "step": 17982 }, { "epoch": 35.966, "grad_norm": 1.2227691411972046, "learning_rate": 2e-05, "loss": 0.05323946, "step": 17983 }, { "epoch": 35.968, "grad_norm": 1.081729531288147, "learning_rate": 2e-05, "loss": 0.03783212, "step": 17984 }, { "epoch": 35.97, "grad_norm": 1.2142199277877808, "learning_rate": 2e-05, "loss": 0.0591646, "step": 17985 }, { "epoch": 35.972, "grad_norm": 1.0089576244354248, "learning_rate": 2e-05, "loss": 0.03988313, "step": 17986 }, { "epoch": 35.974, "grad_norm": 1.2357524633407593, "learning_rate": 2e-05, "loss": 0.04309086, "step": 17987 }, { "epoch": 35.976, "grad_norm": 1.8578888177871704, "learning_rate": 2e-05, "loss": 0.04944805, "step": 17988 }, { "epoch": 35.978, "grad_norm": 1.1336140632629395, "learning_rate": 2e-05, "loss": 0.04665945, "step": 17989 }, { "epoch": 35.98, "grad_norm": 0.9331308007240295, "learning_rate": 2e-05, "loss": 0.03621591, "step": 17990 }, { "epoch": 35.982, "grad_norm": 1.3162841796875, "learning_rate": 2e-05, "loss": 0.04493711, "step": 17991 }, { "epoch": 35.984, "grad_norm": 1.5543169975280762, "learning_rate": 2e-05, "loss": 0.04536107, "step": 17992 }, { "epoch": 35.986, "grad_norm": 1.7783422470092773, "learning_rate": 2e-05, "loss": 0.06524586, "step": 17993 }, { "epoch": 35.988, "grad_norm": 1.5215754508972168, "learning_rate": 2e-05, "loss": 0.03663024, "step": 17994 }, { "epoch": 35.99, "grad_norm": 1.4671252965927124, "learning_rate": 2e-05, "loss": 0.0541063, "step": 17995 }, { "epoch": 35.992, "grad_norm": 1.0499228239059448, "learning_rate": 2e-05, "loss": 0.04275646, "step": 17996 }, { "epoch": 35.994, "grad_norm": 1.2871328592300415, "learning_rate": 2e-05, "loss": 0.06662484, "step": 17997 }, { "epoch": 35.996, "grad_norm": 1.0467830896377563, "learning_rate": 2e-05, "loss": 0.02858277, "step": 17998 }, { "epoch": 35.998, "grad_norm": 0.9735072255134583, "learning_rate": 2e-05, "loss": 0.04048389, "step": 17999 }, { "epoch": 36.0, "grad_norm": 0.9895581603050232, "learning_rate": 2e-05, "loss": 0.03498125, "step": 18000 }, { "epoch": 36.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9860279441117764, "Equal_1": 0.998, "Equal_2": 0.9760479041916168, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.99, "Perpendicular_1": 0.994, "Perpendicular_2": 1.0, "Perpendicular_3": 0.8877755511022044, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 225.8867, "eval_samples_per_second": 46.483, "eval_steps_per_second": 0.93, "step": 18000 }, { "epoch": 36.002, "grad_norm": 1.1547328233718872, "learning_rate": 2e-05, "loss": 0.04469167, "step": 18001 }, { "epoch": 36.004, "grad_norm": 0.9784998893737793, "learning_rate": 2e-05, "loss": 0.03833816, "step": 18002 }, { "epoch": 36.006, "grad_norm": 1.0867472887039185, "learning_rate": 2e-05, "loss": 0.04364108, "step": 18003 }, { "epoch": 36.008, "grad_norm": 1.0137909650802612, "learning_rate": 2e-05, "loss": 0.04323806, "step": 18004 }, { "epoch": 36.01, "grad_norm": 1.3066986799240112, "learning_rate": 2e-05, "loss": 0.03712714, "step": 18005 }, { "epoch": 36.012, "grad_norm": 1.2700997591018677, "learning_rate": 2e-05, "loss": 0.05947264, "step": 18006 }, { "epoch": 36.014, "grad_norm": 1.0352877378463745, "learning_rate": 2e-05, "loss": 0.05117659, "step": 18007 }, { "epoch": 36.016, "grad_norm": 0.8768132328987122, "learning_rate": 2e-05, "loss": 0.03095286, "step": 18008 }, { "epoch": 36.018, "grad_norm": 1.013826608657837, "learning_rate": 2e-05, "loss": 0.04024442, "step": 18009 }, { "epoch": 36.02, "grad_norm": 0.8898334503173828, "learning_rate": 2e-05, "loss": 0.03267869, "step": 18010 }, { "epoch": 36.022, "grad_norm": 1.168753981590271, "learning_rate": 2e-05, "loss": 0.06206862, "step": 18011 }, { "epoch": 36.024, "grad_norm": 2.3365209102630615, "learning_rate": 2e-05, "loss": 0.04275678, "step": 18012 }, { "epoch": 36.026, "grad_norm": 1.1022475957870483, "learning_rate": 2e-05, "loss": 0.03964405, "step": 18013 }, { "epoch": 36.028, "grad_norm": 1.3331712484359741, "learning_rate": 2e-05, "loss": 0.05064458, "step": 18014 }, { "epoch": 36.03, "grad_norm": 0.7783733010292053, "learning_rate": 2e-05, "loss": 0.02274785, "step": 18015 }, { "epoch": 36.032, "grad_norm": 1.3800768852233887, "learning_rate": 2e-05, "loss": 0.04149601, "step": 18016 }, { "epoch": 36.034, "grad_norm": 1.192413091659546, "learning_rate": 2e-05, "loss": 0.03885002, "step": 18017 }, { "epoch": 36.036, "grad_norm": 1.1399497985839844, "learning_rate": 2e-05, "loss": 0.03476265, "step": 18018 }, { "epoch": 36.038, "grad_norm": 1.7624212503433228, "learning_rate": 2e-05, "loss": 0.03646769, "step": 18019 }, { "epoch": 36.04, "grad_norm": 1.2974351644515991, "learning_rate": 2e-05, "loss": 0.03705493, "step": 18020 }, { "epoch": 36.042, "grad_norm": 1.23936927318573, "learning_rate": 2e-05, "loss": 0.04269024, "step": 18021 }, { "epoch": 36.044, "grad_norm": 1.0420472621917725, "learning_rate": 2e-05, "loss": 0.0491667, "step": 18022 }, { "epoch": 36.046, "grad_norm": 1.1295794248580933, "learning_rate": 2e-05, "loss": 0.0483309, "step": 18023 }, { "epoch": 36.048, "grad_norm": 1.2538232803344727, "learning_rate": 2e-05, "loss": 0.04189965, "step": 18024 }, { "epoch": 36.05, "grad_norm": 1.1490254402160645, "learning_rate": 2e-05, "loss": 0.04948922, "step": 18025 }, { "epoch": 36.052, "grad_norm": 1.1473517417907715, "learning_rate": 2e-05, "loss": 0.04433149, "step": 18026 }, { "epoch": 36.054, "grad_norm": 2.406377077102661, "learning_rate": 2e-05, "loss": 0.03995343, "step": 18027 }, { "epoch": 36.056, "grad_norm": 0.9033026695251465, "learning_rate": 2e-05, "loss": 0.03492504, "step": 18028 }, { "epoch": 36.058, "grad_norm": 2.259152889251709, "learning_rate": 2e-05, "loss": 0.03825914, "step": 18029 }, { "epoch": 36.06, "grad_norm": 1.0139800310134888, "learning_rate": 2e-05, "loss": 0.03607133, "step": 18030 }, { "epoch": 36.062, "grad_norm": 1.024012565612793, "learning_rate": 2e-05, "loss": 0.04372687, "step": 18031 }, { "epoch": 36.064, "grad_norm": 1.413708209991455, "learning_rate": 2e-05, "loss": 0.05434243, "step": 18032 }, { "epoch": 36.066, "grad_norm": 1.0812147855758667, "learning_rate": 2e-05, "loss": 0.04545962, "step": 18033 }, { "epoch": 36.068, "grad_norm": 1.2126151323318481, "learning_rate": 2e-05, "loss": 0.0384191, "step": 18034 }, { "epoch": 36.07, "grad_norm": 0.9974027872085571, "learning_rate": 2e-05, "loss": 0.03963788, "step": 18035 }, { "epoch": 36.072, "grad_norm": 1.2551822662353516, "learning_rate": 2e-05, "loss": 0.03484931, "step": 18036 }, { "epoch": 36.074, "grad_norm": 1.0416690111160278, "learning_rate": 2e-05, "loss": 0.0443636, "step": 18037 }, { "epoch": 36.076, "grad_norm": 0.9761499762535095, "learning_rate": 2e-05, "loss": 0.03440753, "step": 18038 }, { "epoch": 36.078, "grad_norm": 1.095620036125183, "learning_rate": 2e-05, "loss": 0.04748923, "step": 18039 }, { "epoch": 36.08, "grad_norm": 1.0512536764144897, "learning_rate": 2e-05, "loss": 0.04524598, "step": 18040 }, { "epoch": 36.082, "grad_norm": 1.0632715225219727, "learning_rate": 2e-05, "loss": 0.04167694, "step": 18041 }, { "epoch": 36.084, "grad_norm": 1.0587679147720337, "learning_rate": 2e-05, "loss": 0.04438641, "step": 18042 }, { "epoch": 36.086, "grad_norm": 1.2922264337539673, "learning_rate": 2e-05, "loss": 0.03657916, "step": 18043 }, { "epoch": 36.088, "grad_norm": 1.3502165079116821, "learning_rate": 2e-05, "loss": 0.03409489, "step": 18044 }, { "epoch": 36.09, "grad_norm": 0.9757033586502075, "learning_rate": 2e-05, "loss": 0.03247763, "step": 18045 }, { "epoch": 36.092, "grad_norm": 1.4535313844680786, "learning_rate": 2e-05, "loss": 0.02803112, "step": 18046 }, { "epoch": 36.094, "grad_norm": 1.2366838455200195, "learning_rate": 2e-05, "loss": 0.05298037, "step": 18047 }, { "epoch": 36.096, "grad_norm": 1.181981086730957, "learning_rate": 2e-05, "loss": 0.05884934, "step": 18048 }, { "epoch": 36.098, "grad_norm": 1.1636250019073486, "learning_rate": 2e-05, "loss": 0.05668183, "step": 18049 }, { "epoch": 36.1, "grad_norm": 0.9034222960472107, "learning_rate": 2e-05, "loss": 0.03150563, "step": 18050 }, { "epoch": 36.102, "grad_norm": 1.2252378463745117, "learning_rate": 2e-05, "loss": 0.03503931, "step": 18051 }, { "epoch": 36.104, "grad_norm": 2.915555000305176, "learning_rate": 2e-05, "loss": 0.0670835, "step": 18052 }, { "epoch": 36.106, "grad_norm": 1.6234302520751953, "learning_rate": 2e-05, "loss": 0.04543199, "step": 18053 }, { "epoch": 36.108, "grad_norm": 1.0577139854431152, "learning_rate": 2e-05, "loss": 0.04299786, "step": 18054 }, { "epoch": 36.11, "grad_norm": 1.219182014465332, "learning_rate": 2e-05, "loss": 0.0412758, "step": 18055 }, { "epoch": 36.112, "grad_norm": 1.2038267850875854, "learning_rate": 2e-05, "loss": 0.05887077, "step": 18056 }, { "epoch": 36.114, "grad_norm": 1.1648696660995483, "learning_rate": 2e-05, "loss": 0.04111334, "step": 18057 }, { "epoch": 36.116, "grad_norm": 1.236552357673645, "learning_rate": 2e-05, "loss": 0.03169405, "step": 18058 }, { "epoch": 36.118, "grad_norm": 1.35683274269104, "learning_rate": 2e-05, "loss": 0.06167949, "step": 18059 }, { "epoch": 36.12, "grad_norm": 0.8526447415351868, "learning_rate": 2e-05, "loss": 0.02260097, "step": 18060 }, { "epoch": 36.122, "grad_norm": 1.5788859128952026, "learning_rate": 2e-05, "loss": 0.04528182, "step": 18061 }, { "epoch": 36.124, "grad_norm": 1.4425537586212158, "learning_rate": 2e-05, "loss": 0.03581157, "step": 18062 }, { "epoch": 36.126, "grad_norm": 1.180890679359436, "learning_rate": 2e-05, "loss": 0.03914965, "step": 18063 }, { "epoch": 36.128, "grad_norm": 2.295358896255493, "learning_rate": 2e-05, "loss": 0.05452657, "step": 18064 }, { "epoch": 36.13, "grad_norm": 1.1650062799453735, "learning_rate": 2e-05, "loss": 0.05545577, "step": 18065 }, { "epoch": 36.132, "grad_norm": 1.0452123880386353, "learning_rate": 2e-05, "loss": 0.04701293, "step": 18066 }, { "epoch": 36.134, "grad_norm": 1.3882209062576294, "learning_rate": 2e-05, "loss": 0.04618207, "step": 18067 }, { "epoch": 36.136, "grad_norm": 1.3880937099456787, "learning_rate": 2e-05, "loss": 0.05626071, "step": 18068 }, { "epoch": 36.138, "grad_norm": 1.3208019733428955, "learning_rate": 2e-05, "loss": 0.05341219, "step": 18069 }, { "epoch": 36.14, "grad_norm": 1.2359769344329834, "learning_rate": 2e-05, "loss": 0.05630834, "step": 18070 }, { "epoch": 36.142, "grad_norm": 1.4856984615325928, "learning_rate": 2e-05, "loss": 0.04784521, "step": 18071 }, { "epoch": 36.144, "grad_norm": 1.0184515714645386, "learning_rate": 2e-05, "loss": 0.03532099, "step": 18072 }, { "epoch": 36.146, "grad_norm": 0.8863417506217957, "learning_rate": 2e-05, "loss": 0.03981868, "step": 18073 }, { "epoch": 36.148, "grad_norm": 1.5833380222320557, "learning_rate": 2e-05, "loss": 0.06328207, "step": 18074 }, { "epoch": 36.15, "grad_norm": 1.1054245233535767, "learning_rate": 2e-05, "loss": 0.03084075, "step": 18075 }, { "epoch": 36.152, "grad_norm": 1.052768349647522, "learning_rate": 2e-05, "loss": 0.02654871, "step": 18076 }, { "epoch": 36.154, "grad_norm": 1.0694469213485718, "learning_rate": 2e-05, "loss": 0.04002466, "step": 18077 }, { "epoch": 36.156, "grad_norm": 1.1025431156158447, "learning_rate": 2e-05, "loss": 0.04189037, "step": 18078 }, { "epoch": 36.158, "grad_norm": 1.0295785665512085, "learning_rate": 2e-05, "loss": 0.0332966, "step": 18079 }, { "epoch": 36.16, "grad_norm": 1.237951397895813, "learning_rate": 2e-05, "loss": 0.04653509, "step": 18080 }, { "epoch": 36.162, "grad_norm": 1.0982863903045654, "learning_rate": 2e-05, "loss": 0.06409388, "step": 18081 }, { "epoch": 36.164, "grad_norm": 1.7912333011627197, "learning_rate": 2e-05, "loss": 0.05520929, "step": 18082 }, { "epoch": 36.166, "grad_norm": 2.389678955078125, "learning_rate": 2e-05, "loss": 0.04506039, "step": 18083 }, { "epoch": 36.168, "grad_norm": 1.4554105997085571, "learning_rate": 2e-05, "loss": 0.04939141, "step": 18084 }, { "epoch": 36.17, "grad_norm": 1.1505887508392334, "learning_rate": 2e-05, "loss": 0.03378366, "step": 18085 }, { "epoch": 36.172, "grad_norm": 1.0295778512954712, "learning_rate": 2e-05, "loss": 0.0420752, "step": 18086 }, { "epoch": 36.174, "grad_norm": 1.2514499425888062, "learning_rate": 2e-05, "loss": 0.0318369, "step": 18087 }, { "epoch": 36.176, "grad_norm": 6.763287544250488, "learning_rate": 2e-05, "loss": 0.04665124, "step": 18088 }, { "epoch": 36.178, "grad_norm": 1.3646979331970215, "learning_rate": 2e-05, "loss": 0.07093234, "step": 18089 }, { "epoch": 36.18, "grad_norm": 1.4142276048660278, "learning_rate": 2e-05, "loss": 0.05369917, "step": 18090 }, { "epoch": 36.182, "grad_norm": 1.7820714712142944, "learning_rate": 2e-05, "loss": 0.03933295, "step": 18091 }, { "epoch": 36.184, "grad_norm": 1.7797034978866577, "learning_rate": 2e-05, "loss": 0.03328394, "step": 18092 }, { "epoch": 36.186, "grad_norm": 0.8926862478256226, "learning_rate": 2e-05, "loss": 0.02798809, "step": 18093 }, { "epoch": 36.188, "grad_norm": 1.4235514402389526, "learning_rate": 2e-05, "loss": 0.05231753, "step": 18094 }, { "epoch": 36.19, "grad_norm": 0.9979972243309021, "learning_rate": 2e-05, "loss": 0.03881006, "step": 18095 }, { "epoch": 36.192, "grad_norm": 1.2357900142669678, "learning_rate": 2e-05, "loss": 0.04758938, "step": 18096 }, { "epoch": 36.194, "grad_norm": 0.9692314267158508, "learning_rate": 2e-05, "loss": 0.03797583, "step": 18097 }, { "epoch": 36.196, "grad_norm": 1.3042386770248413, "learning_rate": 2e-05, "loss": 0.03852347, "step": 18098 }, { "epoch": 36.198, "grad_norm": 1.2214518785476685, "learning_rate": 2e-05, "loss": 0.04328543, "step": 18099 }, { "epoch": 36.2, "grad_norm": 1.0520689487457275, "learning_rate": 2e-05, "loss": 0.03384469, "step": 18100 }, { "epoch": 36.202, "grad_norm": 2.0228559970855713, "learning_rate": 2e-05, "loss": 0.03976683, "step": 18101 }, { "epoch": 36.204, "grad_norm": 0.987694501876831, "learning_rate": 2e-05, "loss": 0.02882915, "step": 18102 }, { "epoch": 36.206, "grad_norm": 0.9760309457778931, "learning_rate": 2e-05, "loss": 0.02580061, "step": 18103 }, { "epoch": 36.208, "grad_norm": 1.0408885478973389, "learning_rate": 2e-05, "loss": 0.04258654, "step": 18104 }, { "epoch": 36.21, "grad_norm": 1.2025320529937744, "learning_rate": 2e-05, "loss": 0.05036615, "step": 18105 }, { "epoch": 36.212, "grad_norm": 1.0596842765808105, "learning_rate": 2e-05, "loss": 0.04600206, "step": 18106 }, { "epoch": 36.214, "grad_norm": 1.0019363164901733, "learning_rate": 2e-05, "loss": 0.03957354, "step": 18107 }, { "epoch": 36.216, "grad_norm": 1.0834828615188599, "learning_rate": 2e-05, "loss": 0.04598715, "step": 18108 }, { "epoch": 36.218, "grad_norm": 0.8921990394592285, "learning_rate": 2e-05, "loss": 0.0377783, "step": 18109 }, { "epoch": 36.22, "grad_norm": 1.5210086107254028, "learning_rate": 2e-05, "loss": 0.04725397, "step": 18110 }, { "epoch": 36.222, "grad_norm": 1.5957287549972534, "learning_rate": 2e-05, "loss": 0.07436598, "step": 18111 }, { "epoch": 36.224, "grad_norm": 1.386565923690796, "learning_rate": 2e-05, "loss": 0.0528223, "step": 18112 }, { "epoch": 36.226, "grad_norm": 1.5550975799560547, "learning_rate": 2e-05, "loss": 0.05164887, "step": 18113 }, { "epoch": 36.228, "grad_norm": 0.8634277582168579, "learning_rate": 2e-05, "loss": 0.03223967, "step": 18114 }, { "epoch": 36.23, "grad_norm": 1.6933544874191284, "learning_rate": 2e-05, "loss": 0.05924009, "step": 18115 }, { "epoch": 36.232, "grad_norm": 0.9973080158233643, "learning_rate": 2e-05, "loss": 0.04084028, "step": 18116 }, { "epoch": 36.234, "grad_norm": 1.0241217613220215, "learning_rate": 2e-05, "loss": 0.03810273, "step": 18117 }, { "epoch": 36.236, "grad_norm": 1.1792432069778442, "learning_rate": 2e-05, "loss": 0.05786144, "step": 18118 }, { "epoch": 36.238, "grad_norm": 1.6085301637649536, "learning_rate": 2e-05, "loss": 0.03755718, "step": 18119 }, { "epoch": 36.24, "grad_norm": 1.0082367658615112, "learning_rate": 2e-05, "loss": 0.03457486, "step": 18120 }, { "epoch": 36.242, "grad_norm": 1.8788563013076782, "learning_rate": 2e-05, "loss": 0.05238142, "step": 18121 }, { "epoch": 36.244, "grad_norm": 1.360443353652954, "learning_rate": 2e-05, "loss": 0.03759704, "step": 18122 }, { "epoch": 36.246, "grad_norm": 0.8452262878417969, "learning_rate": 2e-05, "loss": 0.03147104, "step": 18123 }, { "epoch": 36.248, "grad_norm": 1.2059133052825928, "learning_rate": 2e-05, "loss": 0.03802056, "step": 18124 }, { "epoch": 36.25, "grad_norm": 1.3137298822402954, "learning_rate": 2e-05, "loss": 0.04731848, "step": 18125 }, { "epoch": 36.252, "grad_norm": 3.807495355606079, "learning_rate": 2e-05, "loss": 0.06938329, "step": 18126 }, { "epoch": 36.254, "grad_norm": 1.8247867822647095, "learning_rate": 2e-05, "loss": 0.06592089, "step": 18127 }, { "epoch": 36.256, "grad_norm": 1.0595691204071045, "learning_rate": 2e-05, "loss": 0.04157022, "step": 18128 }, { "epoch": 36.258, "grad_norm": 1.2000421285629272, "learning_rate": 2e-05, "loss": 0.05030415, "step": 18129 }, { "epoch": 36.26, "grad_norm": 1.4462804794311523, "learning_rate": 2e-05, "loss": 0.04188553, "step": 18130 }, { "epoch": 36.262, "grad_norm": 0.9621727466583252, "learning_rate": 2e-05, "loss": 0.03252388, "step": 18131 }, { "epoch": 36.264, "grad_norm": 1.3379517793655396, "learning_rate": 2e-05, "loss": 0.0592545, "step": 18132 }, { "epoch": 36.266, "grad_norm": 1.260883092880249, "learning_rate": 2e-05, "loss": 0.04592264, "step": 18133 }, { "epoch": 36.268, "grad_norm": 0.951046347618103, "learning_rate": 2e-05, "loss": 0.03763274, "step": 18134 }, { "epoch": 36.27, "grad_norm": 1.2070631980895996, "learning_rate": 2e-05, "loss": 0.04987542, "step": 18135 }, { "epoch": 36.272, "grad_norm": 0.9174032211303711, "learning_rate": 2e-05, "loss": 0.041425, "step": 18136 }, { "epoch": 36.274, "grad_norm": 0.9744871854782104, "learning_rate": 2e-05, "loss": 0.03707837, "step": 18137 }, { "epoch": 36.276, "grad_norm": 2.015726327896118, "learning_rate": 2e-05, "loss": 0.05615123, "step": 18138 }, { "epoch": 36.278, "grad_norm": 1.0339828729629517, "learning_rate": 2e-05, "loss": 0.03831189, "step": 18139 }, { "epoch": 36.28, "grad_norm": 1.1325945854187012, "learning_rate": 2e-05, "loss": 0.0443339, "step": 18140 }, { "epoch": 36.282, "grad_norm": 1.2790862321853638, "learning_rate": 2e-05, "loss": 0.05322406, "step": 18141 }, { "epoch": 36.284, "grad_norm": 1.6974416971206665, "learning_rate": 2e-05, "loss": 0.05808611, "step": 18142 }, { "epoch": 36.286, "grad_norm": 1.324877381324768, "learning_rate": 2e-05, "loss": 0.03995011, "step": 18143 }, { "epoch": 36.288, "grad_norm": 0.9972968697547913, "learning_rate": 2e-05, "loss": 0.03236829, "step": 18144 }, { "epoch": 36.29, "grad_norm": 2.6326334476470947, "learning_rate": 2e-05, "loss": 0.05116319, "step": 18145 }, { "epoch": 36.292, "grad_norm": 1.2159920930862427, "learning_rate": 2e-05, "loss": 0.03850378, "step": 18146 }, { "epoch": 36.294, "grad_norm": 1.283882975578308, "learning_rate": 2e-05, "loss": 0.05847166, "step": 18147 }, { "epoch": 36.296, "grad_norm": 0.9754537343978882, "learning_rate": 2e-05, "loss": 0.03831177, "step": 18148 }, { "epoch": 36.298, "grad_norm": 1.1584181785583496, "learning_rate": 2e-05, "loss": 0.0415417, "step": 18149 }, { "epoch": 36.3, "grad_norm": 0.9804734587669373, "learning_rate": 2e-05, "loss": 0.04727735, "step": 18150 }, { "epoch": 36.302, "grad_norm": 1.1174956560134888, "learning_rate": 2e-05, "loss": 0.0418535, "step": 18151 }, { "epoch": 36.304, "grad_norm": 0.9662861227989197, "learning_rate": 2e-05, "loss": 0.03314947, "step": 18152 }, { "epoch": 36.306, "grad_norm": 1.0359177589416504, "learning_rate": 2e-05, "loss": 0.04416542, "step": 18153 }, { "epoch": 36.308, "grad_norm": 1.6807138919830322, "learning_rate": 2e-05, "loss": 0.03955986, "step": 18154 }, { "epoch": 36.31, "grad_norm": 1.2904269695281982, "learning_rate": 2e-05, "loss": 0.04740483, "step": 18155 }, { "epoch": 36.312, "grad_norm": 0.9057779312133789, "learning_rate": 2e-05, "loss": 0.03082168, "step": 18156 }, { "epoch": 36.314, "grad_norm": 1.4500725269317627, "learning_rate": 2e-05, "loss": 0.045389, "step": 18157 }, { "epoch": 36.316, "grad_norm": 1.0528526306152344, "learning_rate": 2e-05, "loss": 0.04059176, "step": 18158 }, { "epoch": 36.318, "grad_norm": 2.3999462127685547, "learning_rate": 2e-05, "loss": 0.03782693, "step": 18159 }, { "epoch": 36.32, "grad_norm": 1.129332184791565, "learning_rate": 2e-05, "loss": 0.04359079, "step": 18160 }, { "epoch": 36.322, "grad_norm": 1.308021903038025, "learning_rate": 2e-05, "loss": 0.0600227, "step": 18161 }, { "epoch": 36.324, "grad_norm": 1.1091634035110474, "learning_rate": 2e-05, "loss": 0.05207297, "step": 18162 }, { "epoch": 36.326, "grad_norm": 1.4300206899642944, "learning_rate": 2e-05, "loss": 0.04866595, "step": 18163 }, { "epoch": 36.328, "grad_norm": 1.915114402770996, "learning_rate": 2e-05, "loss": 0.03525345, "step": 18164 }, { "epoch": 36.33, "grad_norm": 0.9804067611694336, "learning_rate": 2e-05, "loss": 0.04006468, "step": 18165 }, { "epoch": 36.332, "grad_norm": 1.098341464996338, "learning_rate": 2e-05, "loss": 0.0482323, "step": 18166 }, { "epoch": 36.334, "grad_norm": 0.9357543587684631, "learning_rate": 2e-05, "loss": 0.03514704, "step": 18167 }, { "epoch": 36.336, "grad_norm": 1.311602234840393, "learning_rate": 2e-05, "loss": 0.03336764, "step": 18168 }, { "epoch": 36.338, "grad_norm": 2.577634572982788, "learning_rate": 2e-05, "loss": 0.0495299, "step": 18169 }, { "epoch": 36.34, "grad_norm": 1.6486222743988037, "learning_rate": 2e-05, "loss": 0.08410998, "step": 18170 }, { "epoch": 36.342, "grad_norm": 0.9249390959739685, "learning_rate": 2e-05, "loss": 0.02951442, "step": 18171 }, { "epoch": 36.344, "grad_norm": 0.8755949139595032, "learning_rate": 2e-05, "loss": 0.03209528, "step": 18172 }, { "epoch": 36.346, "grad_norm": 0.9413033127784729, "learning_rate": 2e-05, "loss": 0.0372153, "step": 18173 }, { "epoch": 36.348, "grad_norm": 1.7841044664382935, "learning_rate": 2e-05, "loss": 0.04583333, "step": 18174 }, { "epoch": 36.35, "grad_norm": 1.063729166984558, "learning_rate": 2e-05, "loss": 0.04538869, "step": 18175 }, { "epoch": 36.352, "grad_norm": 1.0596580505371094, "learning_rate": 2e-05, "loss": 0.04321297, "step": 18176 }, { "epoch": 36.354, "grad_norm": 0.9051801562309265, "learning_rate": 2e-05, "loss": 0.0371689, "step": 18177 }, { "epoch": 36.356, "grad_norm": 1.2519522905349731, "learning_rate": 2e-05, "loss": 0.062448, "step": 18178 }, { "epoch": 36.358, "grad_norm": 1.2632250785827637, "learning_rate": 2e-05, "loss": 0.04494053, "step": 18179 }, { "epoch": 36.36, "grad_norm": 0.9765613675117493, "learning_rate": 2e-05, "loss": 0.04332968, "step": 18180 }, { "epoch": 36.362, "grad_norm": 0.960936427116394, "learning_rate": 2e-05, "loss": 0.0390562, "step": 18181 }, { "epoch": 36.364, "grad_norm": 1.0641465187072754, "learning_rate": 2e-05, "loss": 0.04478747, "step": 18182 }, { "epoch": 36.366, "grad_norm": 1.0358315706253052, "learning_rate": 2e-05, "loss": 0.04437871, "step": 18183 }, { "epoch": 36.368, "grad_norm": 1.0138981342315674, "learning_rate": 2e-05, "loss": 0.04370901, "step": 18184 }, { "epoch": 36.37, "grad_norm": 1.311715006828308, "learning_rate": 2e-05, "loss": 0.0586396, "step": 18185 }, { "epoch": 36.372, "grad_norm": 0.932867705821991, "learning_rate": 2e-05, "loss": 0.03397618, "step": 18186 }, { "epoch": 36.374, "grad_norm": 1.0600165128707886, "learning_rate": 2e-05, "loss": 0.0434187, "step": 18187 }, { "epoch": 36.376, "grad_norm": 1.453376054763794, "learning_rate": 2e-05, "loss": 0.04615813, "step": 18188 }, { "epoch": 36.378, "grad_norm": 1.1891179084777832, "learning_rate": 2e-05, "loss": 0.05235086, "step": 18189 }, { "epoch": 36.38, "grad_norm": 1.2511253356933594, "learning_rate": 2e-05, "loss": 0.05714508, "step": 18190 }, { "epoch": 36.382, "grad_norm": 1.0763239860534668, "learning_rate": 2e-05, "loss": 0.0436965, "step": 18191 }, { "epoch": 36.384, "grad_norm": 1.1021196842193604, "learning_rate": 2e-05, "loss": 0.0487587, "step": 18192 }, { "epoch": 36.386, "grad_norm": 0.7265276908874512, "learning_rate": 2e-05, "loss": 0.02227158, "step": 18193 }, { "epoch": 36.388, "grad_norm": 0.9542597532272339, "learning_rate": 2e-05, "loss": 0.0347061, "step": 18194 }, { "epoch": 36.39, "grad_norm": 1.0779917240142822, "learning_rate": 2e-05, "loss": 0.03523953, "step": 18195 }, { "epoch": 36.392, "grad_norm": 1.251686692237854, "learning_rate": 2e-05, "loss": 0.05480514, "step": 18196 }, { "epoch": 36.394, "grad_norm": 2.964366912841797, "learning_rate": 2e-05, "loss": 0.05061496, "step": 18197 }, { "epoch": 36.396, "grad_norm": 0.9669221639633179, "learning_rate": 2e-05, "loss": 0.03461855, "step": 18198 }, { "epoch": 36.398, "grad_norm": 1.7254489660263062, "learning_rate": 2e-05, "loss": 0.0450973, "step": 18199 }, { "epoch": 36.4, "grad_norm": 1.6175504922866821, "learning_rate": 2e-05, "loss": 0.06268507, "step": 18200 }, { "epoch": 36.402, "grad_norm": 0.9274033308029175, "learning_rate": 2e-05, "loss": 0.03287154, "step": 18201 }, { "epoch": 36.404, "grad_norm": 1.227177381515503, "learning_rate": 2e-05, "loss": 0.06444988, "step": 18202 }, { "epoch": 36.406, "grad_norm": 4.1038007736206055, "learning_rate": 2e-05, "loss": 0.0536866, "step": 18203 }, { "epoch": 36.408, "grad_norm": 0.9421659111976624, "learning_rate": 2e-05, "loss": 0.03416989, "step": 18204 }, { "epoch": 36.41, "grad_norm": 1.3858892917633057, "learning_rate": 2e-05, "loss": 0.04377569, "step": 18205 }, { "epoch": 36.412, "grad_norm": 0.9776285886764526, "learning_rate": 2e-05, "loss": 0.03499193, "step": 18206 }, { "epoch": 36.414, "grad_norm": 1.4491907358169556, "learning_rate": 2e-05, "loss": 0.048717, "step": 18207 }, { "epoch": 36.416, "grad_norm": 1.1502240896224976, "learning_rate": 2e-05, "loss": 0.0407382, "step": 18208 }, { "epoch": 36.418, "grad_norm": 0.9800335168838501, "learning_rate": 2e-05, "loss": 0.03843459, "step": 18209 }, { "epoch": 36.42, "grad_norm": 1.1980656385421753, "learning_rate": 2e-05, "loss": 0.04430903, "step": 18210 }, { "epoch": 36.422, "grad_norm": 1.0046864748001099, "learning_rate": 2e-05, "loss": 0.03967419, "step": 18211 }, { "epoch": 36.424, "grad_norm": 1.0129104852676392, "learning_rate": 2e-05, "loss": 0.03453697, "step": 18212 }, { "epoch": 36.426, "grad_norm": 1.0538151264190674, "learning_rate": 2e-05, "loss": 0.05050563, "step": 18213 }, { "epoch": 36.428, "grad_norm": 1.320386290550232, "learning_rate": 2e-05, "loss": 0.05194265, "step": 18214 }, { "epoch": 36.43, "grad_norm": 1.0126291513442993, "learning_rate": 2e-05, "loss": 0.03438072, "step": 18215 }, { "epoch": 36.432, "grad_norm": 1.0564054250717163, "learning_rate": 2e-05, "loss": 0.04617267, "step": 18216 }, { "epoch": 36.434, "grad_norm": 1.6506766080856323, "learning_rate": 2e-05, "loss": 0.04727304, "step": 18217 }, { "epoch": 36.436, "grad_norm": 1.9200208187103271, "learning_rate": 2e-05, "loss": 0.07817221, "step": 18218 }, { "epoch": 36.438, "grad_norm": 1.0845707654953003, "learning_rate": 2e-05, "loss": 0.03826804, "step": 18219 }, { "epoch": 36.44, "grad_norm": 15.834321975708008, "learning_rate": 2e-05, "loss": 0.05409867, "step": 18220 }, { "epoch": 36.442, "grad_norm": 0.851262092590332, "learning_rate": 2e-05, "loss": 0.02766912, "step": 18221 }, { "epoch": 36.444, "grad_norm": 1.003509759902954, "learning_rate": 2e-05, "loss": 0.03653019, "step": 18222 }, { "epoch": 36.446, "grad_norm": 1.629040002822876, "learning_rate": 2e-05, "loss": 0.03537179, "step": 18223 }, { "epoch": 36.448, "grad_norm": 0.9991608262062073, "learning_rate": 2e-05, "loss": 0.04178185, "step": 18224 }, { "epoch": 36.45, "grad_norm": 0.9815368056297302, "learning_rate": 2e-05, "loss": 0.03858025, "step": 18225 }, { "epoch": 36.452, "grad_norm": 0.9815390110015869, "learning_rate": 2e-05, "loss": 0.03625118, "step": 18226 }, { "epoch": 36.454, "grad_norm": 1.0707416534423828, "learning_rate": 2e-05, "loss": 0.03909856, "step": 18227 }, { "epoch": 36.456, "grad_norm": 1.0935394763946533, "learning_rate": 2e-05, "loss": 0.04828483, "step": 18228 }, { "epoch": 36.458, "grad_norm": 0.9268385171890259, "learning_rate": 2e-05, "loss": 0.03542485, "step": 18229 }, { "epoch": 36.46, "grad_norm": 0.7991934418678284, "learning_rate": 2e-05, "loss": 0.02917837, "step": 18230 }, { "epoch": 36.462, "grad_norm": 0.7205358147621155, "learning_rate": 2e-05, "loss": 0.01763233, "step": 18231 }, { "epoch": 36.464, "grad_norm": 1.6514350175857544, "learning_rate": 2e-05, "loss": 0.05246878, "step": 18232 }, { "epoch": 36.466, "grad_norm": 1.4559180736541748, "learning_rate": 2e-05, "loss": 0.04459789, "step": 18233 }, { "epoch": 36.468, "grad_norm": 1.1103413105010986, "learning_rate": 2e-05, "loss": 0.03810715, "step": 18234 }, { "epoch": 36.47, "grad_norm": 1.6883729696273804, "learning_rate": 2e-05, "loss": 0.05540714, "step": 18235 }, { "epoch": 36.472, "grad_norm": 1.0368528366088867, "learning_rate": 2e-05, "loss": 0.02687224, "step": 18236 }, { "epoch": 36.474, "grad_norm": 1.0465880632400513, "learning_rate": 2e-05, "loss": 0.04734078, "step": 18237 }, { "epoch": 36.476, "grad_norm": 1.252293348312378, "learning_rate": 2e-05, "loss": 0.04945746, "step": 18238 }, { "epoch": 36.478, "grad_norm": 1.3992714881896973, "learning_rate": 2e-05, "loss": 0.04009645, "step": 18239 }, { "epoch": 36.48, "grad_norm": 1.5678775310516357, "learning_rate": 2e-05, "loss": 0.04669084, "step": 18240 }, { "epoch": 36.482, "grad_norm": 1.5509870052337646, "learning_rate": 2e-05, "loss": 0.03830127, "step": 18241 }, { "epoch": 36.484, "grad_norm": 1.915579915046692, "learning_rate": 2e-05, "loss": 0.0553691, "step": 18242 }, { "epoch": 36.486, "grad_norm": 1.1904710531234741, "learning_rate": 2e-05, "loss": 0.04224677, "step": 18243 }, { "epoch": 36.488, "grad_norm": 1.2168554067611694, "learning_rate": 2e-05, "loss": 0.04586092, "step": 18244 }, { "epoch": 36.49, "grad_norm": 1.3301738500595093, "learning_rate": 2e-05, "loss": 0.05171129, "step": 18245 }, { "epoch": 36.492, "grad_norm": 1.1761380434036255, "learning_rate": 2e-05, "loss": 0.03910657, "step": 18246 }, { "epoch": 36.494, "grad_norm": 1.9413561820983887, "learning_rate": 2e-05, "loss": 0.03714305, "step": 18247 }, { "epoch": 36.496, "grad_norm": 1.7500370740890503, "learning_rate": 2e-05, "loss": 0.06374657, "step": 18248 }, { "epoch": 36.498, "grad_norm": 1.381943941116333, "learning_rate": 2e-05, "loss": 0.03802197, "step": 18249 }, { "epoch": 36.5, "grad_norm": 0.9642596244812012, "learning_rate": 2e-05, "loss": 0.03333649, "step": 18250 }, { "epoch": 36.502, "grad_norm": 0.942935585975647, "learning_rate": 2e-05, "loss": 0.04047981, "step": 18251 }, { "epoch": 36.504, "grad_norm": 1.4175323247909546, "learning_rate": 2e-05, "loss": 0.05055133, "step": 18252 }, { "epoch": 36.506, "grad_norm": 1.0123085975646973, "learning_rate": 2e-05, "loss": 0.03340025, "step": 18253 }, { "epoch": 36.508, "grad_norm": 1.2198922634124756, "learning_rate": 2e-05, "loss": 0.04103016, "step": 18254 }, { "epoch": 36.51, "grad_norm": 1.0504868030548096, "learning_rate": 2e-05, "loss": 0.04015606, "step": 18255 }, { "epoch": 36.512, "grad_norm": 0.891169011592865, "learning_rate": 2e-05, "loss": 0.02399347, "step": 18256 }, { "epoch": 36.514, "grad_norm": 1.2364811897277832, "learning_rate": 2e-05, "loss": 0.04313375, "step": 18257 }, { "epoch": 36.516, "grad_norm": 1.1266855001449585, "learning_rate": 2e-05, "loss": 0.04502291, "step": 18258 }, { "epoch": 36.518, "grad_norm": 1.0467621088027954, "learning_rate": 2e-05, "loss": 0.04971634, "step": 18259 }, { "epoch": 36.52, "grad_norm": 1.0385361909866333, "learning_rate": 2e-05, "loss": 0.03757544, "step": 18260 }, { "epoch": 36.522, "grad_norm": 1.1160595417022705, "learning_rate": 2e-05, "loss": 0.03531703, "step": 18261 }, { "epoch": 36.524, "grad_norm": 1.3917378187179565, "learning_rate": 2e-05, "loss": 0.05143885, "step": 18262 }, { "epoch": 36.526, "grad_norm": 0.9907529354095459, "learning_rate": 2e-05, "loss": 0.03175423, "step": 18263 }, { "epoch": 36.528, "grad_norm": 1.145869493484497, "learning_rate": 2e-05, "loss": 0.0528495, "step": 18264 }, { "epoch": 36.53, "grad_norm": 1.240200161933899, "learning_rate": 2e-05, "loss": 0.04903135, "step": 18265 }, { "epoch": 36.532, "grad_norm": 1.0843182802200317, "learning_rate": 2e-05, "loss": 0.0450576, "step": 18266 }, { "epoch": 36.534, "grad_norm": 0.8221714496612549, "learning_rate": 2e-05, "loss": 0.02467009, "step": 18267 }, { "epoch": 36.536, "grad_norm": 1.0432863235473633, "learning_rate": 2e-05, "loss": 0.04350327, "step": 18268 }, { "epoch": 36.538, "grad_norm": 1.4372597932815552, "learning_rate": 2e-05, "loss": 0.05932559, "step": 18269 }, { "epoch": 36.54, "grad_norm": 1.201994776725769, "learning_rate": 2e-05, "loss": 0.04599293, "step": 18270 }, { "epoch": 36.542, "grad_norm": 1.1407171487808228, "learning_rate": 2e-05, "loss": 0.05376129, "step": 18271 }, { "epoch": 36.544, "grad_norm": 0.9831859469413757, "learning_rate": 2e-05, "loss": 0.03398538, "step": 18272 }, { "epoch": 36.546, "grad_norm": 1.0688565969467163, "learning_rate": 2e-05, "loss": 0.04778615, "step": 18273 }, { "epoch": 36.548, "grad_norm": 1.1213395595550537, "learning_rate": 2e-05, "loss": 0.03932, "step": 18274 }, { "epoch": 36.55, "grad_norm": 1.1448311805725098, "learning_rate": 2e-05, "loss": 0.05196967, "step": 18275 }, { "epoch": 36.552, "grad_norm": 1.1790046691894531, "learning_rate": 2e-05, "loss": 0.03561121, "step": 18276 }, { "epoch": 36.554, "grad_norm": 1.2666711807250977, "learning_rate": 2e-05, "loss": 0.03894105, "step": 18277 }, { "epoch": 36.556, "grad_norm": 1.2572201490402222, "learning_rate": 2e-05, "loss": 0.04365982, "step": 18278 }, { "epoch": 36.558, "grad_norm": 1.2344820499420166, "learning_rate": 2e-05, "loss": 0.04264975, "step": 18279 }, { "epoch": 36.56, "grad_norm": 1.8529245853424072, "learning_rate": 2e-05, "loss": 0.05609737, "step": 18280 }, { "epoch": 36.562, "grad_norm": 1.0321011543273926, "learning_rate": 2e-05, "loss": 0.04008961, "step": 18281 }, { "epoch": 36.564, "grad_norm": 1.1526654958724976, "learning_rate": 2e-05, "loss": 0.0429452, "step": 18282 }, { "epoch": 36.566, "grad_norm": 2.275144100189209, "learning_rate": 2e-05, "loss": 0.05043276, "step": 18283 }, { "epoch": 36.568, "grad_norm": 1.0161266326904297, "learning_rate": 2e-05, "loss": 0.04846366, "step": 18284 }, { "epoch": 36.57, "grad_norm": 1.4074817895889282, "learning_rate": 2e-05, "loss": 0.06286315, "step": 18285 }, { "epoch": 36.572, "grad_norm": 1.0789586305618286, "learning_rate": 2e-05, "loss": 0.05768754, "step": 18286 }, { "epoch": 36.574, "grad_norm": 1.0801752805709839, "learning_rate": 2e-05, "loss": 0.04505496, "step": 18287 }, { "epoch": 36.576, "grad_norm": 1.1768354177474976, "learning_rate": 2e-05, "loss": 0.04220011, "step": 18288 }, { "epoch": 36.578, "grad_norm": 1.079171895980835, "learning_rate": 2e-05, "loss": 0.04326253, "step": 18289 }, { "epoch": 36.58, "grad_norm": 1.1383424997329712, "learning_rate": 2e-05, "loss": 0.05659823, "step": 18290 }, { "epoch": 36.582, "grad_norm": 1.2776405811309814, "learning_rate": 2e-05, "loss": 0.04328676, "step": 18291 }, { "epoch": 36.584, "grad_norm": 0.9981813430786133, "learning_rate": 2e-05, "loss": 0.02976444, "step": 18292 }, { "epoch": 36.586, "grad_norm": 1.051627516746521, "learning_rate": 2e-05, "loss": 0.04588671, "step": 18293 }, { "epoch": 36.588, "grad_norm": 1.323179006576538, "learning_rate": 2e-05, "loss": 0.05382748, "step": 18294 }, { "epoch": 36.59, "grad_norm": 0.9575484395027161, "learning_rate": 2e-05, "loss": 0.03244565, "step": 18295 }, { "epoch": 36.592, "grad_norm": 1.053873896598816, "learning_rate": 2e-05, "loss": 0.03174559, "step": 18296 }, { "epoch": 36.594, "grad_norm": 1.1865633726119995, "learning_rate": 2e-05, "loss": 0.04369554, "step": 18297 }, { "epoch": 36.596, "grad_norm": 1.0070998668670654, "learning_rate": 2e-05, "loss": 0.03702208, "step": 18298 }, { "epoch": 36.598, "grad_norm": 1.0272834300994873, "learning_rate": 2e-05, "loss": 0.03915671, "step": 18299 }, { "epoch": 36.6, "grad_norm": 1.5127160549163818, "learning_rate": 2e-05, "loss": 0.0455484, "step": 18300 }, { "epoch": 36.602, "grad_norm": 1.0794305801391602, "learning_rate": 2e-05, "loss": 0.0346117, "step": 18301 }, { "epoch": 36.604, "grad_norm": 1.0348949432373047, "learning_rate": 2e-05, "loss": 0.04552088, "step": 18302 }, { "epoch": 36.606, "grad_norm": 1.048442006111145, "learning_rate": 2e-05, "loss": 0.04380922, "step": 18303 }, { "epoch": 36.608, "grad_norm": 2.004702091217041, "learning_rate": 2e-05, "loss": 0.03365945, "step": 18304 }, { "epoch": 36.61, "grad_norm": 0.9562575817108154, "learning_rate": 2e-05, "loss": 0.03662489, "step": 18305 }, { "epoch": 36.612, "grad_norm": 1.3157565593719482, "learning_rate": 2e-05, "loss": 0.04390732, "step": 18306 }, { "epoch": 36.614, "grad_norm": 1.1000422239303589, "learning_rate": 2e-05, "loss": 0.03292922, "step": 18307 }, { "epoch": 36.616, "grad_norm": 1.9574331045150757, "learning_rate": 2e-05, "loss": 0.05021146, "step": 18308 }, { "epoch": 36.618, "grad_norm": 0.948233425617218, "learning_rate": 2e-05, "loss": 0.02475598, "step": 18309 }, { "epoch": 36.62, "grad_norm": 1.2436680793762207, "learning_rate": 2e-05, "loss": 0.05194392, "step": 18310 }, { "epoch": 36.622, "grad_norm": 1.4380803108215332, "learning_rate": 2e-05, "loss": 0.04860058, "step": 18311 }, { "epoch": 36.624, "grad_norm": 0.9794439673423767, "learning_rate": 2e-05, "loss": 0.04139171, "step": 18312 }, { "epoch": 36.626, "grad_norm": 1.0998506546020508, "learning_rate": 2e-05, "loss": 0.03820693, "step": 18313 }, { "epoch": 36.628, "grad_norm": 2.860480785369873, "learning_rate": 2e-05, "loss": 0.03815154, "step": 18314 }, { "epoch": 36.63, "grad_norm": 0.8581103682518005, "learning_rate": 2e-05, "loss": 0.02766853, "step": 18315 }, { "epoch": 36.632, "grad_norm": 1.8850700855255127, "learning_rate": 2e-05, "loss": 0.03901371, "step": 18316 }, { "epoch": 36.634, "grad_norm": 1.1233925819396973, "learning_rate": 2e-05, "loss": 0.04775962, "step": 18317 }, { "epoch": 36.636, "grad_norm": 2.5430996417999268, "learning_rate": 2e-05, "loss": 0.04836998, "step": 18318 }, { "epoch": 36.638, "grad_norm": 1.9226852655410767, "learning_rate": 2e-05, "loss": 0.04729922, "step": 18319 }, { "epoch": 36.64, "grad_norm": 0.9372596144676208, "learning_rate": 2e-05, "loss": 0.02783322, "step": 18320 }, { "epoch": 36.642, "grad_norm": 0.9249229431152344, "learning_rate": 2e-05, "loss": 0.03499991, "step": 18321 }, { "epoch": 36.644, "grad_norm": 4.156978130340576, "learning_rate": 2e-05, "loss": 0.04772555, "step": 18322 }, { "epoch": 36.646, "grad_norm": 1.2103395462036133, "learning_rate": 2e-05, "loss": 0.03773941, "step": 18323 }, { "epoch": 36.648, "grad_norm": 1.8933029174804688, "learning_rate": 2e-05, "loss": 0.05208328, "step": 18324 }, { "epoch": 36.65, "grad_norm": 4.760164260864258, "learning_rate": 2e-05, "loss": 0.05351193, "step": 18325 }, { "epoch": 36.652, "grad_norm": 0.9737799167633057, "learning_rate": 2e-05, "loss": 0.03251104, "step": 18326 }, { "epoch": 36.654, "grad_norm": 1.6335538625717163, "learning_rate": 2e-05, "loss": 0.04572538, "step": 18327 }, { "epoch": 36.656, "grad_norm": 1.3653438091278076, "learning_rate": 2e-05, "loss": 0.06602904, "step": 18328 }, { "epoch": 36.658, "grad_norm": 0.9298463463783264, "learning_rate": 2e-05, "loss": 0.03229936, "step": 18329 }, { "epoch": 36.66, "grad_norm": 1.9307780265808105, "learning_rate": 2e-05, "loss": 0.06293757, "step": 18330 }, { "epoch": 36.662, "grad_norm": 2.201835870742798, "learning_rate": 2e-05, "loss": 0.05230286, "step": 18331 }, { "epoch": 36.664, "grad_norm": 1.0907442569732666, "learning_rate": 2e-05, "loss": 0.02157433, "step": 18332 }, { "epoch": 36.666, "grad_norm": 0.9865091443061829, "learning_rate": 2e-05, "loss": 0.03958989, "step": 18333 }, { "epoch": 36.668, "grad_norm": 1.0964685678482056, "learning_rate": 2e-05, "loss": 0.02590856, "step": 18334 }, { "epoch": 36.67, "grad_norm": 1.1179277896881104, "learning_rate": 2e-05, "loss": 0.04870913, "step": 18335 }, { "epoch": 36.672, "grad_norm": 2.204655408859253, "learning_rate": 2e-05, "loss": 0.04417299, "step": 18336 }, { "epoch": 36.674, "grad_norm": 0.9407059550285339, "learning_rate": 2e-05, "loss": 0.03263928, "step": 18337 }, { "epoch": 36.676, "grad_norm": 1.0223804712295532, "learning_rate": 2e-05, "loss": 0.03229953, "step": 18338 }, { "epoch": 36.678, "grad_norm": 1.5161690711975098, "learning_rate": 2e-05, "loss": 0.04354671, "step": 18339 }, { "epoch": 36.68, "grad_norm": 1.0827372074127197, "learning_rate": 2e-05, "loss": 0.03189337, "step": 18340 }, { "epoch": 36.682, "grad_norm": 1.0712003707885742, "learning_rate": 2e-05, "loss": 0.03986772, "step": 18341 }, { "epoch": 36.684, "grad_norm": 1.8263356685638428, "learning_rate": 2e-05, "loss": 0.06464032, "step": 18342 }, { "epoch": 36.686, "grad_norm": 0.9857245087623596, "learning_rate": 2e-05, "loss": 0.03141895, "step": 18343 }, { "epoch": 36.688, "grad_norm": 1.1037623882293701, "learning_rate": 2e-05, "loss": 0.05341347, "step": 18344 }, { "epoch": 36.69, "grad_norm": 1.3319497108459473, "learning_rate": 2e-05, "loss": 0.04842388, "step": 18345 }, { "epoch": 36.692, "grad_norm": 0.9048107266426086, "learning_rate": 2e-05, "loss": 0.03326295, "step": 18346 }, { "epoch": 36.694, "grad_norm": 1.2653605937957764, "learning_rate": 2e-05, "loss": 0.05624186, "step": 18347 }, { "epoch": 36.696, "grad_norm": 1.0323076248168945, "learning_rate": 2e-05, "loss": 0.036389, "step": 18348 }, { "epoch": 36.698, "grad_norm": 0.9778062701225281, "learning_rate": 2e-05, "loss": 0.0376082, "step": 18349 }, { "epoch": 36.7, "grad_norm": 0.964667558670044, "learning_rate": 2e-05, "loss": 0.03379457, "step": 18350 }, { "epoch": 36.702, "grad_norm": 1.6179938316345215, "learning_rate": 2e-05, "loss": 0.04310118, "step": 18351 }, { "epoch": 36.704, "grad_norm": 1.618957757949829, "learning_rate": 2e-05, "loss": 0.03925658, "step": 18352 }, { "epoch": 36.706, "grad_norm": 0.9782449007034302, "learning_rate": 2e-05, "loss": 0.04512272, "step": 18353 }, { "epoch": 36.708, "grad_norm": 1.3687751293182373, "learning_rate": 2e-05, "loss": 0.05281378, "step": 18354 }, { "epoch": 36.71, "grad_norm": 0.9114720821380615, "learning_rate": 2e-05, "loss": 0.0391209, "step": 18355 }, { "epoch": 36.712, "grad_norm": 1.1271899938583374, "learning_rate": 2e-05, "loss": 0.04409296, "step": 18356 }, { "epoch": 36.714, "grad_norm": 1.8552175760269165, "learning_rate": 2e-05, "loss": 0.05702048, "step": 18357 }, { "epoch": 36.716, "grad_norm": 1.7209573984146118, "learning_rate": 2e-05, "loss": 0.04640878, "step": 18358 }, { "epoch": 36.718, "grad_norm": 0.9476683139801025, "learning_rate": 2e-05, "loss": 0.03813782, "step": 18359 }, { "epoch": 36.72, "grad_norm": 1.688008189201355, "learning_rate": 2e-05, "loss": 0.04366963, "step": 18360 }, { "epoch": 36.722, "grad_norm": 2.010718822479248, "learning_rate": 2e-05, "loss": 0.06395419, "step": 18361 }, { "epoch": 36.724, "grad_norm": 0.8910848498344421, "learning_rate": 2e-05, "loss": 0.03455939, "step": 18362 }, { "epoch": 36.726, "grad_norm": 1.1505165100097656, "learning_rate": 2e-05, "loss": 0.04913718, "step": 18363 }, { "epoch": 36.728, "grad_norm": 0.9859042763710022, "learning_rate": 2e-05, "loss": 0.03303109, "step": 18364 }, { "epoch": 36.73, "grad_norm": 0.8414836525917053, "learning_rate": 2e-05, "loss": 0.03276784, "step": 18365 }, { "epoch": 36.732, "grad_norm": 0.9811587333679199, "learning_rate": 2e-05, "loss": 0.0389123, "step": 18366 }, { "epoch": 36.734, "grad_norm": 2.312257766723633, "learning_rate": 2e-05, "loss": 0.03441167, "step": 18367 }, { "epoch": 36.736, "grad_norm": 1.6929166316986084, "learning_rate": 2e-05, "loss": 0.04984661, "step": 18368 }, { "epoch": 36.738, "grad_norm": 1.7506216764450073, "learning_rate": 2e-05, "loss": 0.05079547, "step": 18369 }, { "epoch": 36.74, "grad_norm": 1.1666603088378906, "learning_rate": 2e-05, "loss": 0.04992181, "step": 18370 }, { "epoch": 36.742, "grad_norm": 1.2197574377059937, "learning_rate": 2e-05, "loss": 0.04768842, "step": 18371 }, { "epoch": 36.744, "grad_norm": 1.1094059944152832, "learning_rate": 2e-05, "loss": 0.04199777, "step": 18372 }, { "epoch": 36.746, "grad_norm": 3.280449151992798, "learning_rate": 2e-05, "loss": 0.05740436, "step": 18373 }, { "epoch": 36.748, "grad_norm": 0.9624062776565552, "learning_rate": 2e-05, "loss": 0.03716166, "step": 18374 }, { "epoch": 36.75, "grad_norm": 1.0864914655685425, "learning_rate": 2e-05, "loss": 0.04715025, "step": 18375 }, { "epoch": 36.752, "grad_norm": 3.3075265884399414, "learning_rate": 2e-05, "loss": 0.05242647, "step": 18376 }, { "epoch": 36.754, "grad_norm": 1.0499765872955322, "learning_rate": 2e-05, "loss": 0.0402412, "step": 18377 }, { "epoch": 36.756, "grad_norm": 0.9074463248252869, "learning_rate": 2e-05, "loss": 0.03055411, "step": 18378 }, { "epoch": 36.758, "grad_norm": 1.3339488506317139, "learning_rate": 2e-05, "loss": 0.05847781, "step": 18379 }, { "epoch": 36.76, "grad_norm": 0.9649989604949951, "learning_rate": 2e-05, "loss": 0.03218369, "step": 18380 }, { "epoch": 36.762, "grad_norm": 1.0169107913970947, "learning_rate": 2e-05, "loss": 0.04686766, "step": 18381 }, { "epoch": 36.764, "grad_norm": 1.000291109085083, "learning_rate": 2e-05, "loss": 0.04495984, "step": 18382 }, { "epoch": 36.766, "grad_norm": 1.1442769765853882, "learning_rate": 2e-05, "loss": 0.03618668, "step": 18383 }, { "epoch": 36.768, "grad_norm": 2.3837881088256836, "learning_rate": 2e-05, "loss": 0.02887362, "step": 18384 }, { "epoch": 36.77, "grad_norm": 1.0470716953277588, "learning_rate": 2e-05, "loss": 0.04317556, "step": 18385 }, { "epoch": 36.772, "grad_norm": 2.096198081970215, "learning_rate": 2e-05, "loss": 0.0632734, "step": 18386 }, { "epoch": 36.774, "grad_norm": 1.1189266443252563, "learning_rate": 2e-05, "loss": 0.02945754, "step": 18387 }, { "epoch": 36.776, "grad_norm": 1.030576467514038, "learning_rate": 2e-05, "loss": 0.0424621, "step": 18388 }, { "epoch": 36.778, "grad_norm": 1.321351408958435, "learning_rate": 2e-05, "loss": 0.04765938, "step": 18389 }, { "epoch": 36.78, "grad_norm": 1.165911078453064, "learning_rate": 2e-05, "loss": 0.03492573, "step": 18390 }, { "epoch": 36.782, "grad_norm": 1.1613008975982666, "learning_rate": 2e-05, "loss": 0.0409067, "step": 18391 }, { "epoch": 36.784, "grad_norm": 1.1841105222702026, "learning_rate": 2e-05, "loss": 0.04455191, "step": 18392 }, { "epoch": 36.786, "grad_norm": 0.7762970328330994, "learning_rate": 2e-05, "loss": 0.0253247, "step": 18393 }, { "epoch": 36.788, "grad_norm": 1.1207592487335205, "learning_rate": 2e-05, "loss": 0.04564336, "step": 18394 }, { "epoch": 36.79, "grad_norm": 0.9029062390327454, "learning_rate": 2e-05, "loss": 0.04075585, "step": 18395 }, { "epoch": 36.792, "grad_norm": 1.112956166267395, "learning_rate": 2e-05, "loss": 0.04074747, "step": 18396 }, { "epoch": 36.794, "grad_norm": 0.9839574694633484, "learning_rate": 2e-05, "loss": 0.04180932, "step": 18397 }, { "epoch": 36.796, "grad_norm": 1.1542251110076904, "learning_rate": 2e-05, "loss": 0.03987038, "step": 18398 }, { "epoch": 36.798, "grad_norm": 1.0333783626556396, "learning_rate": 2e-05, "loss": 0.04041319, "step": 18399 }, { "epoch": 36.8, "grad_norm": 1.2593748569488525, "learning_rate": 2e-05, "loss": 0.05035427, "step": 18400 }, { "epoch": 36.802, "grad_norm": 0.9334186315536499, "learning_rate": 2e-05, "loss": 0.03204294, "step": 18401 }, { "epoch": 36.804, "grad_norm": 1.0926785469055176, "learning_rate": 2e-05, "loss": 0.03932837, "step": 18402 }, { "epoch": 36.806, "grad_norm": 1.0397621393203735, "learning_rate": 2e-05, "loss": 0.04319752, "step": 18403 }, { "epoch": 36.808, "grad_norm": 0.9711613059043884, "learning_rate": 2e-05, "loss": 0.0452536, "step": 18404 }, { "epoch": 36.81, "grad_norm": 1.8931093215942383, "learning_rate": 2e-05, "loss": 0.05244964, "step": 18405 }, { "epoch": 36.812, "grad_norm": 2.3761041164398193, "learning_rate": 2e-05, "loss": 0.0582416, "step": 18406 }, { "epoch": 36.814, "grad_norm": 1.9970051050186157, "learning_rate": 2e-05, "loss": 0.05754521, "step": 18407 }, { "epoch": 36.816, "grad_norm": 1.0881487131118774, "learning_rate": 2e-05, "loss": 0.03944354, "step": 18408 }, { "epoch": 36.818, "grad_norm": 1.1691758632659912, "learning_rate": 2e-05, "loss": 0.04663821, "step": 18409 }, { "epoch": 36.82, "grad_norm": 2.6630196571350098, "learning_rate": 2e-05, "loss": 0.04442621, "step": 18410 }, { "epoch": 36.822, "grad_norm": 1.178477168083191, "learning_rate": 2e-05, "loss": 0.06222789, "step": 18411 }, { "epoch": 36.824, "grad_norm": 0.9005385637283325, "learning_rate": 2e-05, "loss": 0.02979804, "step": 18412 }, { "epoch": 36.826, "grad_norm": 1.3291435241699219, "learning_rate": 2e-05, "loss": 0.03718179, "step": 18413 }, { "epoch": 36.828, "grad_norm": 1.2575690746307373, "learning_rate": 2e-05, "loss": 0.0420006, "step": 18414 }, { "epoch": 36.83, "grad_norm": 0.9177241921424866, "learning_rate": 2e-05, "loss": 0.02713429, "step": 18415 }, { "epoch": 36.832, "grad_norm": 0.9539687037467957, "learning_rate": 2e-05, "loss": 0.03764067, "step": 18416 }, { "epoch": 36.834, "grad_norm": 1.2315313816070557, "learning_rate": 2e-05, "loss": 0.05110057, "step": 18417 }, { "epoch": 36.836, "grad_norm": 0.9716684222221375, "learning_rate": 2e-05, "loss": 0.03656537, "step": 18418 }, { "epoch": 36.838, "grad_norm": 1.4782754182815552, "learning_rate": 2e-05, "loss": 0.05838714, "step": 18419 }, { "epoch": 36.84, "grad_norm": 1.0978866815567017, "learning_rate": 2e-05, "loss": 0.03337295, "step": 18420 }, { "epoch": 36.842, "grad_norm": 0.9099648594856262, "learning_rate": 2e-05, "loss": 0.02617436, "step": 18421 }, { "epoch": 36.844, "grad_norm": 1.8865479230880737, "learning_rate": 2e-05, "loss": 0.06396951, "step": 18422 }, { "epoch": 36.846, "grad_norm": 1.4283522367477417, "learning_rate": 2e-05, "loss": 0.06104654, "step": 18423 }, { "epoch": 36.848, "grad_norm": 1.6691433191299438, "learning_rate": 2e-05, "loss": 0.05061128, "step": 18424 }, { "epoch": 36.85, "grad_norm": 0.7365068793296814, "learning_rate": 2e-05, "loss": 0.01961617, "step": 18425 }, { "epoch": 36.852, "grad_norm": 0.9096119403839111, "learning_rate": 2e-05, "loss": 0.02698065, "step": 18426 }, { "epoch": 36.854, "grad_norm": 1.139542818069458, "learning_rate": 2e-05, "loss": 0.05034701, "step": 18427 }, { "epoch": 36.856, "grad_norm": 1.1367566585540771, "learning_rate": 2e-05, "loss": 0.06615532, "step": 18428 }, { "epoch": 36.858, "grad_norm": 1.1288893222808838, "learning_rate": 2e-05, "loss": 0.04844678, "step": 18429 }, { "epoch": 36.86, "grad_norm": 1.7860678434371948, "learning_rate": 2e-05, "loss": 0.0521226, "step": 18430 }, { "epoch": 36.862, "grad_norm": 0.963985800743103, "learning_rate": 2e-05, "loss": 0.03146128, "step": 18431 }, { "epoch": 36.864, "grad_norm": 1.116714596748352, "learning_rate": 2e-05, "loss": 0.05578673, "step": 18432 }, { "epoch": 36.866, "grad_norm": 1.2527042627334595, "learning_rate": 2e-05, "loss": 0.0589627, "step": 18433 }, { "epoch": 36.868, "grad_norm": 1.132263422012329, "learning_rate": 2e-05, "loss": 0.038504, "step": 18434 }, { "epoch": 36.87, "grad_norm": 1.2455251216888428, "learning_rate": 2e-05, "loss": 0.04573322, "step": 18435 }, { "epoch": 36.872, "grad_norm": 1.4633914232254028, "learning_rate": 2e-05, "loss": 0.06862871, "step": 18436 }, { "epoch": 36.874, "grad_norm": 0.971443772315979, "learning_rate": 2e-05, "loss": 0.03940517, "step": 18437 }, { "epoch": 36.876, "grad_norm": 5.435373306274414, "learning_rate": 2e-05, "loss": 0.02864525, "step": 18438 }, { "epoch": 36.878, "grad_norm": 1.9440596103668213, "learning_rate": 2e-05, "loss": 0.05987046, "step": 18439 }, { "epoch": 36.88, "grad_norm": 1.4058712720870972, "learning_rate": 2e-05, "loss": 0.05802792, "step": 18440 }, { "epoch": 36.882, "grad_norm": 1.7847079038619995, "learning_rate": 2e-05, "loss": 0.04465545, "step": 18441 }, { "epoch": 36.884, "grad_norm": 1.309718370437622, "learning_rate": 2e-05, "loss": 0.03451239, "step": 18442 }, { "epoch": 36.886, "grad_norm": 0.9990866780281067, "learning_rate": 2e-05, "loss": 0.039327, "step": 18443 }, { "epoch": 36.888, "grad_norm": 1.2902060747146606, "learning_rate": 2e-05, "loss": 0.05610467, "step": 18444 }, { "epoch": 36.89, "grad_norm": 1.0011862516403198, "learning_rate": 2e-05, "loss": 0.04527715, "step": 18445 }, { "epoch": 36.892, "grad_norm": 1.0508722066879272, "learning_rate": 2e-05, "loss": 0.04230107, "step": 18446 }, { "epoch": 36.894, "grad_norm": 1.4164390563964844, "learning_rate": 2e-05, "loss": 0.03276374, "step": 18447 }, { "epoch": 36.896, "grad_norm": 0.9304678440093994, "learning_rate": 2e-05, "loss": 0.03545247, "step": 18448 }, { "epoch": 36.898, "grad_norm": 0.8107298016548157, "learning_rate": 2e-05, "loss": 0.0228092, "step": 18449 }, { "epoch": 36.9, "grad_norm": 2.331596612930298, "learning_rate": 2e-05, "loss": 0.05408289, "step": 18450 }, { "epoch": 36.902, "grad_norm": 2.052706241607666, "learning_rate": 2e-05, "loss": 0.03973062, "step": 18451 }, { "epoch": 36.904, "grad_norm": 1.0983513593673706, "learning_rate": 2e-05, "loss": 0.0412596, "step": 18452 }, { "epoch": 36.906, "grad_norm": 1.385184645652771, "learning_rate": 2e-05, "loss": 0.05211121, "step": 18453 }, { "epoch": 36.908, "grad_norm": 1.196946620941162, "learning_rate": 2e-05, "loss": 0.04114325, "step": 18454 }, { "epoch": 36.91, "grad_norm": 1.242645502090454, "learning_rate": 2e-05, "loss": 0.04828195, "step": 18455 }, { "epoch": 36.912, "grad_norm": 1.3914340734481812, "learning_rate": 2e-05, "loss": 0.04427434, "step": 18456 }, { "epoch": 36.914, "grad_norm": 1.3237046003341675, "learning_rate": 2e-05, "loss": 0.05166736, "step": 18457 }, { "epoch": 36.916, "grad_norm": 1.1052852869033813, "learning_rate": 2e-05, "loss": 0.05035865, "step": 18458 }, { "epoch": 36.918, "grad_norm": 1.672071099281311, "learning_rate": 2e-05, "loss": 0.05231842, "step": 18459 }, { "epoch": 36.92, "grad_norm": 1.23970365524292, "learning_rate": 2e-05, "loss": 0.03026779, "step": 18460 }, { "epoch": 36.922, "grad_norm": 0.974622368812561, "learning_rate": 2e-05, "loss": 0.03627507, "step": 18461 }, { "epoch": 36.924, "grad_norm": 1.152114748954773, "learning_rate": 2e-05, "loss": 0.04782024, "step": 18462 }, { "epoch": 36.926, "grad_norm": 1.9899969100952148, "learning_rate": 2e-05, "loss": 0.07043861, "step": 18463 }, { "epoch": 36.928, "grad_norm": 1.5622037649154663, "learning_rate": 2e-05, "loss": 0.04683921, "step": 18464 }, { "epoch": 36.93, "grad_norm": 0.9169710278511047, "learning_rate": 2e-05, "loss": 0.03294283, "step": 18465 }, { "epoch": 36.932, "grad_norm": 0.9145183563232422, "learning_rate": 2e-05, "loss": 0.02998644, "step": 18466 }, { "epoch": 36.934, "grad_norm": 1.0593329668045044, "learning_rate": 2e-05, "loss": 0.04985102, "step": 18467 }, { "epoch": 36.936, "grad_norm": 1.208417534828186, "learning_rate": 2e-05, "loss": 0.04804527, "step": 18468 }, { "epoch": 36.938, "grad_norm": 1.169744849205017, "learning_rate": 2e-05, "loss": 0.04403226, "step": 18469 }, { "epoch": 36.94, "grad_norm": 1.252413272857666, "learning_rate": 2e-05, "loss": 0.04190671, "step": 18470 }, { "epoch": 36.942, "grad_norm": 0.8094372749328613, "learning_rate": 2e-05, "loss": 0.03302211, "step": 18471 }, { "epoch": 36.944, "grad_norm": 0.7911103963851929, "learning_rate": 2e-05, "loss": 0.0283273, "step": 18472 }, { "epoch": 36.946, "grad_norm": 1.2825318574905396, "learning_rate": 2e-05, "loss": 0.03996003, "step": 18473 }, { "epoch": 36.948, "grad_norm": 1.0204027891159058, "learning_rate": 2e-05, "loss": 0.04393394, "step": 18474 }, { "epoch": 36.95, "grad_norm": 1.10942542552948, "learning_rate": 2e-05, "loss": 0.03605561, "step": 18475 }, { "epoch": 36.952, "grad_norm": 1.1125274896621704, "learning_rate": 2e-05, "loss": 0.05388219, "step": 18476 }, { "epoch": 36.954, "grad_norm": 1.0539900064468384, "learning_rate": 2e-05, "loss": 0.03827664, "step": 18477 }, { "epoch": 36.956, "grad_norm": 0.9649869203567505, "learning_rate": 2e-05, "loss": 0.03404582, "step": 18478 }, { "epoch": 36.958, "grad_norm": 1.306621789932251, "learning_rate": 2e-05, "loss": 0.04820665, "step": 18479 }, { "epoch": 36.96, "grad_norm": 1.2438654899597168, "learning_rate": 2e-05, "loss": 0.06396779, "step": 18480 }, { "epoch": 36.962, "grad_norm": 1.0792428255081177, "learning_rate": 2e-05, "loss": 0.05157465, "step": 18481 }, { "epoch": 36.964, "grad_norm": 1.0924911499023438, "learning_rate": 2e-05, "loss": 0.04207941, "step": 18482 }, { "epoch": 36.966, "grad_norm": 1.424224615097046, "learning_rate": 2e-05, "loss": 0.03915636, "step": 18483 }, { "epoch": 36.968, "grad_norm": 1.627061128616333, "learning_rate": 2e-05, "loss": 0.0426735, "step": 18484 }, { "epoch": 36.97, "grad_norm": 0.9869682192802429, "learning_rate": 2e-05, "loss": 0.04533973, "step": 18485 }, { "epoch": 36.972, "grad_norm": 1.2473162412643433, "learning_rate": 2e-05, "loss": 0.05253605, "step": 18486 }, { "epoch": 36.974, "grad_norm": 1.2208019495010376, "learning_rate": 2e-05, "loss": 0.0335426, "step": 18487 }, { "epoch": 36.976, "grad_norm": 0.9897271990776062, "learning_rate": 2e-05, "loss": 0.04591155, "step": 18488 }, { "epoch": 36.978, "grad_norm": 1.2006891965866089, "learning_rate": 2e-05, "loss": 0.04321127, "step": 18489 }, { "epoch": 36.98, "grad_norm": 0.8633509874343872, "learning_rate": 2e-05, "loss": 0.0337048, "step": 18490 }, { "epoch": 36.982, "grad_norm": 1.547573208808899, "learning_rate": 2e-05, "loss": 0.04338808, "step": 18491 }, { "epoch": 36.984, "grad_norm": 1.0358763933181763, "learning_rate": 2e-05, "loss": 0.04519119, "step": 18492 }, { "epoch": 36.986, "grad_norm": 1.0358898639678955, "learning_rate": 2e-05, "loss": 0.04193265, "step": 18493 }, { "epoch": 36.988, "grad_norm": 1.6679039001464844, "learning_rate": 2e-05, "loss": 0.07180307, "step": 18494 }, { "epoch": 36.99, "grad_norm": 1.2240631580352783, "learning_rate": 2e-05, "loss": 0.04303456, "step": 18495 }, { "epoch": 36.992, "grad_norm": 0.9912630915641785, "learning_rate": 2e-05, "loss": 0.03490957, "step": 18496 }, { "epoch": 36.994, "grad_norm": 1.0766117572784424, "learning_rate": 2e-05, "loss": 0.0478361, "step": 18497 }, { "epoch": 36.996, "grad_norm": 0.8898375034332275, "learning_rate": 2e-05, "loss": 0.03672919, "step": 18498 }, { "epoch": 36.998, "grad_norm": 1.2632743120193481, "learning_rate": 2e-05, "loss": 0.04127926, "step": 18499 }, { "epoch": 37.0, "grad_norm": 1.0599600076675415, "learning_rate": 2e-05, "loss": 0.04293627, "step": 18500 }, { "epoch": 37.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.996, "Equal_2": 0.9760479041916168, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.988, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8817635270541082, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.996, "PointLiesOnLine_1": 0.9899799599198397, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 227.4754, "eval_samples_per_second": 46.159, "eval_steps_per_second": 0.923, "step": 18500 }, { "epoch": 37.002, "grad_norm": 1.1682475805282593, "learning_rate": 2e-05, "loss": 0.0570367, "step": 18501 }, { "epoch": 37.004, "grad_norm": 1.003157138824463, "learning_rate": 2e-05, "loss": 0.04462772, "step": 18502 }, { "epoch": 37.006, "grad_norm": 0.9233158230781555, "learning_rate": 2e-05, "loss": 0.03723887, "step": 18503 }, { "epoch": 37.008, "grad_norm": 0.8924064636230469, "learning_rate": 2e-05, "loss": 0.0247997, "step": 18504 }, { "epoch": 37.01, "grad_norm": 1.1703031063079834, "learning_rate": 2e-05, "loss": 0.05182582, "step": 18505 }, { "epoch": 37.012, "grad_norm": 1.359881043434143, "learning_rate": 2e-05, "loss": 0.03129677, "step": 18506 }, { "epoch": 37.014, "grad_norm": 1.5726388692855835, "learning_rate": 2e-05, "loss": 0.03585267, "step": 18507 }, { "epoch": 37.016, "grad_norm": 1.0973987579345703, "learning_rate": 2e-05, "loss": 0.04739658, "step": 18508 }, { "epoch": 37.018, "grad_norm": 1.2007373571395874, "learning_rate": 2e-05, "loss": 0.04335775, "step": 18509 }, { "epoch": 37.02, "grad_norm": 1.0888774394989014, "learning_rate": 2e-05, "loss": 0.035429, "step": 18510 }, { "epoch": 37.022, "grad_norm": 1.1034109592437744, "learning_rate": 2e-05, "loss": 0.0396948, "step": 18511 }, { "epoch": 37.024, "grad_norm": 1.0754344463348389, "learning_rate": 2e-05, "loss": 0.03501423, "step": 18512 }, { "epoch": 37.026, "grad_norm": 1.0313738584518433, "learning_rate": 2e-05, "loss": 0.0522851, "step": 18513 }, { "epoch": 37.028, "grad_norm": 1.102516531944275, "learning_rate": 2e-05, "loss": 0.05250989, "step": 18514 }, { "epoch": 37.03, "grad_norm": 0.8615829944610596, "learning_rate": 2e-05, "loss": 0.03426981, "step": 18515 }, { "epoch": 37.032, "grad_norm": 1.137335181236267, "learning_rate": 2e-05, "loss": 0.0624398, "step": 18516 }, { "epoch": 37.034, "grad_norm": 1.1044379472732544, "learning_rate": 2e-05, "loss": 0.04272424, "step": 18517 }, { "epoch": 37.036, "grad_norm": 1.6234915256500244, "learning_rate": 2e-05, "loss": 0.03984703, "step": 18518 }, { "epoch": 37.038, "grad_norm": 1.0715773105621338, "learning_rate": 2e-05, "loss": 0.04468865, "step": 18519 }, { "epoch": 37.04, "grad_norm": 0.8963190913200378, "learning_rate": 2e-05, "loss": 0.03542995, "step": 18520 }, { "epoch": 37.042, "grad_norm": 1.4817719459533691, "learning_rate": 2e-05, "loss": 0.0553272, "step": 18521 }, { "epoch": 37.044, "grad_norm": 1.2632020711898804, "learning_rate": 2e-05, "loss": 0.05457424, "step": 18522 }, { "epoch": 37.046, "grad_norm": 1.087607741355896, "learning_rate": 2e-05, "loss": 0.05570821, "step": 18523 }, { "epoch": 37.048, "grad_norm": 2.519253969192505, "learning_rate": 2e-05, "loss": 0.04770573, "step": 18524 }, { "epoch": 37.05, "grad_norm": 1.2821415662765503, "learning_rate": 2e-05, "loss": 0.06182626, "step": 18525 }, { "epoch": 37.052, "grad_norm": 1.498982548713684, "learning_rate": 2e-05, "loss": 0.0616838, "step": 18526 }, { "epoch": 37.054, "grad_norm": 1.2517311573028564, "learning_rate": 2e-05, "loss": 0.04086914, "step": 18527 }, { "epoch": 37.056, "grad_norm": 1.0576390027999878, "learning_rate": 2e-05, "loss": 0.04006585, "step": 18528 }, { "epoch": 37.058, "grad_norm": 1.2376681566238403, "learning_rate": 2e-05, "loss": 0.02923202, "step": 18529 }, { "epoch": 37.06, "grad_norm": 1.0800292491912842, "learning_rate": 2e-05, "loss": 0.04538111, "step": 18530 }, { "epoch": 37.062, "grad_norm": 1.1487936973571777, "learning_rate": 2e-05, "loss": 0.0345978, "step": 18531 }, { "epoch": 37.064, "grad_norm": 1.1852397918701172, "learning_rate": 2e-05, "loss": 0.03834898, "step": 18532 }, { "epoch": 37.066, "grad_norm": 0.9839009642601013, "learning_rate": 2e-05, "loss": 0.03927675, "step": 18533 }, { "epoch": 37.068, "grad_norm": 1.0537208318710327, "learning_rate": 2e-05, "loss": 0.03851627, "step": 18534 }, { "epoch": 37.07, "grad_norm": 1.08991277217865, "learning_rate": 2e-05, "loss": 0.05038679, "step": 18535 }, { "epoch": 37.072, "grad_norm": 1.7779130935668945, "learning_rate": 2e-05, "loss": 0.04691472, "step": 18536 }, { "epoch": 37.074, "grad_norm": 1.192657232284546, "learning_rate": 2e-05, "loss": 0.04174162, "step": 18537 }, { "epoch": 37.076, "grad_norm": 2.2757623195648193, "learning_rate": 2e-05, "loss": 0.04920544, "step": 18538 }, { "epoch": 37.078, "grad_norm": 0.8510375618934631, "learning_rate": 2e-05, "loss": 0.02739036, "step": 18539 }, { "epoch": 37.08, "grad_norm": 0.9908120632171631, "learning_rate": 2e-05, "loss": 0.0390277, "step": 18540 }, { "epoch": 37.082, "grad_norm": 0.9508699774742126, "learning_rate": 2e-05, "loss": 0.03601983, "step": 18541 }, { "epoch": 37.084, "grad_norm": 1.014615535736084, "learning_rate": 2e-05, "loss": 0.04521754, "step": 18542 }, { "epoch": 37.086, "grad_norm": 0.9018338322639465, "learning_rate": 2e-05, "loss": 0.03767071, "step": 18543 }, { "epoch": 37.088, "grad_norm": 1.052674412727356, "learning_rate": 2e-05, "loss": 0.03841018, "step": 18544 }, { "epoch": 37.09, "grad_norm": 1.0184881687164307, "learning_rate": 2e-05, "loss": 0.04767507, "step": 18545 }, { "epoch": 37.092, "grad_norm": 0.897982120513916, "learning_rate": 2e-05, "loss": 0.02831026, "step": 18546 }, { "epoch": 37.094, "grad_norm": 1.9670709371566772, "learning_rate": 2e-05, "loss": 0.04322557, "step": 18547 }, { "epoch": 37.096, "grad_norm": 2.8941071033477783, "learning_rate": 2e-05, "loss": 0.05296428, "step": 18548 }, { "epoch": 37.098, "grad_norm": 0.9463397860527039, "learning_rate": 2e-05, "loss": 0.0296222, "step": 18549 }, { "epoch": 37.1, "grad_norm": 2.523704767227173, "learning_rate": 2e-05, "loss": 0.04951302, "step": 18550 }, { "epoch": 37.102, "grad_norm": 1.3599003553390503, "learning_rate": 2e-05, "loss": 0.05852996, "step": 18551 }, { "epoch": 37.104, "grad_norm": 1.4825284481048584, "learning_rate": 2e-05, "loss": 0.04060439, "step": 18552 }, { "epoch": 37.106, "grad_norm": 1.0748836994171143, "learning_rate": 2e-05, "loss": 0.04787468, "step": 18553 }, { "epoch": 37.108, "grad_norm": 2.3501408100128174, "learning_rate": 2e-05, "loss": 0.05236188, "step": 18554 }, { "epoch": 37.11, "grad_norm": 1.1505173444747925, "learning_rate": 2e-05, "loss": 0.034664, "step": 18555 }, { "epoch": 37.112, "grad_norm": 1.684207797050476, "learning_rate": 2e-05, "loss": 0.03156, "step": 18556 }, { "epoch": 37.114, "grad_norm": 1.2476089000701904, "learning_rate": 2e-05, "loss": 0.03569327, "step": 18557 }, { "epoch": 37.116, "grad_norm": 0.8469535708427429, "learning_rate": 2e-05, "loss": 0.02906827, "step": 18558 }, { "epoch": 37.118, "grad_norm": 1.0945793390274048, "learning_rate": 2e-05, "loss": 0.04736859, "step": 18559 }, { "epoch": 37.12, "grad_norm": 1.0249221324920654, "learning_rate": 2e-05, "loss": 0.0421187, "step": 18560 }, { "epoch": 37.122, "grad_norm": 0.9868743419647217, "learning_rate": 2e-05, "loss": 0.03615376, "step": 18561 }, { "epoch": 37.124, "grad_norm": 1.809541940689087, "learning_rate": 2e-05, "loss": 0.05981335, "step": 18562 }, { "epoch": 37.126, "grad_norm": 0.987253725528717, "learning_rate": 2e-05, "loss": 0.0357689, "step": 18563 }, { "epoch": 37.128, "grad_norm": 1.444756031036377, "learning_rate": 2e-05, "loss": 0.06078364, "step": 18564 }, { "epoch": 37.13, "grad_norm": 0.9723086953163147, "learning_rate": 2e-05, "loss": 0.04072724, "step": 18565 }, { "epoch": 37.132, "grad_norm": 1.1131644248962402, "learning_rate": 2e-05, "loss": 0.03938079, "step": 18566 }, { "epoch": 37.134, "grad_norm": 1.2408827543258667, "learning_rate": 2e-05, "loss": 0.05207326, "step": 18567 }, { "epoch": 37.136, "grad_norm": 1.197900414466858, "learning_rate": 2e-05, "loss": 0.04488026, "step": 18568 }, { "epoch": 37.138, "grad_norm": 0.850174069404602, "learning_rate": 2e-05, "loss": 0.03402021, "step": 18569 }, { "epoch": 37.14, "grad_norm": 4.397053241729736, "learning_rate": 2e-05, "loss": 0.04009341, "step": 18570 }, { "epoch": 37.142, "grad_norm": 2.002505302429199, "learning_rate": 2e-05, "loss": 0.04085949, "step": 18571 }, { "epoch": 37.144, "grad_norm": 1.1312376260757446, "learning_rate": 2e-05, "loss": 0.02654742, "step": 18572 }, { "epoch": 37.146, "grad_norm": 1.0259140729904175, "learning_rate": 2e-05, "loss": 0.04556305, "step": 18573 }, { "epoch": 37.148, "grad_norm": 1.1859492063522339, "learning_rate": 2e-05, "loss": 0.04656759, "step": 18574 }, { "epoch": 37.15, "grad_norm": 1.3081399202346802, "learning_rate": 2e-05, "loss": 0.0322, "step": 18575 }, { "epoch": 37.152, "grad_norm": 0.939509928226471, "learning_rate": 2e-05, "loss": 0.03435161, "step": 18576 }, { "epoch": 37.154, "grad_norm": 0.9644339680671692, "learning_rate": 2e-05, "loss": 0.03243748, "step": 18577 }, { "epoch": 37.156, "grad_norm": 1.5971527099609375, "learning_rate": 2e-05, "loss": 0.03711062, "step": 18578 }, { "epoch": 37.158, "grad_norm": 0.980640709400177, "learning_rate": 2e-05, "loss": 0.03521699, "step": 18579 }, { "epoch": 37.16, "grad_norm": 0.9940372705459595, "learning_rate": 2e-05, "loss": 0.03740561, "step": 18580 }, { "epoch": 37.162, "grad_norm": 1.0860470533370972, "learning_rate": 2e-05, "loss": 0.05285859, "step": 18581 }, { "epoch": 37.164, "grad_norm": 1.2403854131698608, "learning_rate": 2e-05, "loss": 0.05728502, "step": 18582 }, { "epoch": 37.166, "grad_norm": 0.9322932362556458, "learning_rate": 2e-05, "loss": 0.04108722, "step": 18583 }, { "epoch": 37.168, "grad_norm": 0.8943539261817932, "learning_rate": 2e-05, "loss": 0.02722687, "step": 18584 }, { "epoch": 37.17, "grad_norm": 1.1638489961624146, "learning_rate": 2e-05, "loss": 0.05123524, "step": 18585 }, { "epoch": 37.172, "grad_norm": 2.160351514816284, "learning_rate": 2e-05, "loss": 0.05772797, "step": 18586 }, { "epoch": 37.174, "grad_norm": 1.3946647644042969, "learning_rate": 2e-05, "loss": 0.0401518, "step": 18587 }, { "epoch": 37.176, "grad_norm": 0.9993221759796143, "learning_rate": 2e-05, "loss": 0.03932448, "step": 18588 }, { "epoch": 37.178, "grad_norm": 1.035330057144165, "learning_rate": 2e-05, "loss": 0.03464904, "step": 18589 }, { "epoch": 37.18, "grad_norm": 4.5411553382873535, "learning_rate": 2e-05, "loss": 0.05501566, "step": 18590 }, { "epoch": 37.182, "grad_norm": 1.2819823026657104, "learning_rate": 2e-05, "loss": 0.04825307, "step": 18591 }, { "epoch": 37.184, "grad_norm": 1.2159525156021118, "learning_rate": 2e-05, "loss": 0.05332406, "step": 18592 }, { "epoch": 37.186, "grad_norm": 0.998975932598114, "learning_rate": 2e-05, "loss": 0.04003967, "step": 18593 }, { "epoch": 37.188, "grad_norm": 0.9766421914100647, "learning_rate": 2e-05, "loss": 0.04102134, "step": 18594 }, { "epoch": 37.19, "grad_norm": 1.0833269357681274, "learning_rate": 2e-05, "loss": 0.04893383, "step": 18595 }, { "epoch": 37.192, "grad_norm": 1.2093477249145508, "learning_rate": 2e-05, "loss": 0.04680919, "step": 18596 }, { "epoch": 37.194, "grad_norm": 1.4963105916976929, "learning_rate": 2e-05, "loss": 0.04804759, "step": 18597 }, { "epoch": 37.196, "grad_norm": 0.7900597453117371, "learning_rate": 2e-05, "loss": 0.03158198, "step": 18598 }, { "epoch": 37.198, "grad_norm": 2.6444647312164307, "learning_rate": 2e-05, "loss": 0.05477588, "step": 18599 }, { "epoch": 37.2, "grad_norm": 0.9605166912078857, "learning_rate": 2e-05, "loss": 0.04306604, "step": 18600 }, { "epoch": 37.202, "grad_norm": 1.1954678297042847, "learning_rate": 2e-05, "loss": 0.04451198, "step": 18601 }, { "epoch": 37.204, "grad_norm": 2.043818473815918, "learning_rate": 2e-05, "loss": 0.05692128, "step": 18602 }, { "epoch": 37.206, "grad_norm": 0.925286054611206, "learning_rate": 2e-05, "loss": 0.03521394, "step": 18603 }, { "epoch": 37.208, "grad_norm": 1.350442886352539, "learning_rate": 2e-05, "loss": 0.04294303, "step": 18604 }, { "epoch": 37.21, "grad_norm": 1.1500611305236816, "learning_rate": 2e-05, "loss": 0.05209876, "step": 18605 }, { "epoch": 37.212, "grad_norm": 1.269662857055664, "learning_rate": 2e-05, "loss": 0.0590977, "step": 18606 }, { "epoch": 37.214, "grad_norm": 1.2121318578720093, "learning_rate": 2e-05, "loss": 0.03545868, "step": 18607 }, { "epoch": 37.216, "grad_norm": 1.2181477546691895, "learning_rate": 2e-05, "loss": 0.03755495, "step": 18608 }, { "epoch": 37.218, "grad_norm": 2.547245979309082, "learning_rate": 2e-05, "loss": 0.05628955, "step": 18609 }, { "epoch": 37.22, "grad_norm": 1.0236345529556274, "learning_rate": 2e-05, "loss": 0.04849827, "step": 18610 }, { "epoch": 37.222, "grad_norm": 1.322635531425476, "learning_rate": 2e-05, "loss": 0.04886009, "step": 18611 }, { "epoch": 37.224, "grad_norm": 1.401161551475525, "learning_rate": 2e-05, "loss": 0.04266241, "step": 18612 }, { "epoch": 37.226, "grad_norm": 1.1718062162399292, "learning_rate": 2e-05, "loss": 0.05141316, "step": 18613 }, { "epoch": 37.228, "grad_norm": 1.0649789571762085, "learning_rate": 2e-05, "loss": 0.0370419, "step": 18614 }, { "epoch": 37.23, "grad_norm": 1.4325693845748901, "learning_rate": 2e-05, "loss": 0.05144949, "step": 18615 }, { "epoch": 37.232, "grad_norm": 0.9535304307937622, "learning_rate": 2e-05, "loss": 0.03233226, "step": 18616 }, { "epoch": 37.234, "grad_norm": 0.9559863209724426, "learning_rate": 2e-05, "loss": 0.03468228, "step": 18617 }, { "epoch": 37.236, "grad_norm": 1.1019268035888672, "learning_rate": 2e-05, "loss": 0.03874182, "step": 18618 }, { "epoch": 37.238, "grad_norm": 1.370253086090088, "learning_rate": 2e-05, "loss": 0.04959146, "step": 18619 }, { "epoch": 37.24, "grad_norm": 1.3471044301986694, "learning_rate": 2e-05, "loss": 0.03725696, "step": 18620 }, { "epoch": 37.242, "grad_norm": 1.483403205871582, "learning_rate": 2e-05, "loss": 0.0549309, "step": 18621 }, { "epoch": 37.244, "grad_norm": 1.342819333076477, "learning_rate": 2e-05, "loss": 0.05008642, "step": 18622 }, { "epoch": 37.246, "grad_norm": 1.1659915447235107, "learning_rate": 2e-05, "loss": 0.04240853, "step": 18623 }, { "epoch": 37.248, "grad_norm": 1.0542713403701782, "learning_rate": 2e-05, "loss": 0.03913464, "step": 18624 }, { "epoch": 37.25, "grad_norm": 1.039794683456421, "learning_rate": 2e-05, "loss": 0.03687271, "step": 18625 }, { "epoch": 37.252, "grad_norm": 1.1286312341690063, "learning_rate": 2e-05, "loss": 0.04725526, "step": 18626 }, { "epoch": 37.254, "grad_norm": 1.1490280628204346, "learning_rate": 2e-05, "loss": 0.05450933, "step": 18627 }, { "epoch": 37.256, "grad_norm": 1.6916707754135132, "learning_rate": 2e-05, "loss": 0.04454128, "step": 18628 }, { "epoch": 37.258, "grad_norm": 1.1944490671157837, "learning_rate": 2e-05, "loss": 0.03833795, "step": 18629 }, { "epoch": 37.26, "grad_norm": 0.9914228916168213, "learning_rate": 2e-05, "loss": 0.03870317, "step": 18630 }, { "epoch": 37.262, "grad_norm": 1.8159040212631226, "learning_rate": 2e-05, "loss": 0.06136857, "step": 18631 }, { "epoch": 37.264, "grad_norm": 1.251948595046997, "learning_rate": 2e-05, "loss": 0.05153133, "step": 18632 }, { "epoch": 37.266, "grad_norm": 1.5842785835266113, "learning_rate": 2e-05, "loss": 0.05784289, "step": 18633 }, { "epoch": 37.268, "grad_norm": 1.3485541343688965, "learning_rate": 2e-05, "loss": 0.0579562, "step": 18634 }, { "epoch": 37.27, "grad_norm": 1.6932165622711182, "learning_rate": 2e-05, "loss": 0.06283514, "step": 18635 }, { "epoch": 37.272, "grad_norm": 1.3861899375915527, "learning_rate": 2e-05, "loss": 0.05023057, "step": 18636 }, { "epoch": 37.274, "grad_norm": 2.3972713947296143, "learning_rate": 2e-05, "loss": 0.04690819, "step": 18637 }, { "epoch": 37.276, "grad_norm": 1.3445793390274048, "learning_rate": 2e-05, "loss": 0.06359614, "step": 18638 }, { "epoch": 37.278, "grad_norm": 0.9832997918128967, "learning_rate": 2e-05, "loss": 0.0378873, "step": 18639 }, { "epoch": 37.28, "grad_norm": 2.1400487422943115, "learning_rate": 2e-05, "loss": 0.05010797, "step": 18640 }, { "epoch": 37.282, "grad_norm": 0.8837321996688843, "learning_rate": 2e-05, "loss": 0.03715153, "step": 18641 }, { "epoch": 37.284, "grad_norm": 1.0266139507293701, "learning_rate": 2e-05, "loss": 0.03989313, "step": 18642 }, { "epoch": 37.286, "grad_norm": 1.3253655433654785, "learning_rate": 2e-05, "loss": 0.0533238, "step": 18643 }, { "epoch": 37.288, "grad_norm": 1.293095588684082, "learning_rate": 2e-05, "loss": 0.0531436, "step": 18644 }, { "epoch": 37.29, "grad_norm": 1.0632065534591675, "learning_rate": 2e-05, "loss": 0.04442224, "step": 18645 }, { "epoch": 37.292, "grad_norm": 1.8831132650375366, "learning_rate": 2e-05, "loss": 0.04281551, "step": 18646 }, { "epoch": 37.294, "grad_norm": 1.3328958749771118, "learning_rate": 2e-05, "loss": 0.03416609, "step": 18647 }, { "epoch": 37.296, "grad_norm": 1.461463451385498, "learning_rate": 2e-05, "loss": 0.04544738, "step": 18648 }, { "epoch": 37.298, "grad_norm": 1.1755958795547485, "learning_rate": 2e-05, "loss": 0.05290145, "step": 18649 }, { "epoch": 37.3, "grad_norm": 0.9932820796966553, "learning_rate": 2e-05, "loss": 0.05134604, "step": 18650 }, { "epoch": 37.302, "grad_norm": 1.019054889678955, "learning_rate": 2e-05, "loss": 0.03638615, "step": 18651 }, { "epoch": 37.304, "grad_norm": 0.9947881698608398, "learning_rate": 2e-05, "loss": 0.03798313, "step": 18652 }, { "epoch": 37.306, "grad_norm": 1.0616116523742676, "learning_rate": 2e-05, "loss": 0.05289736, "step": 18653 }, { "epoch": 37.308, "grad_norm": 0.8234993815422058, "learning_rate": 2e-05, "loss": 0.02449666, "step": 18654 }, { "epoch": 37.31, "grad_norm": 0.9493464231491089, "learning_rate": 2e-05, "loss": 0.03574233, "step": 18655 }, { "epoch": 37.312, "grad_norm": 1.0965240001678467, "learning_rate": 2e-05, "loss": 0.04305274, "step": 18656 }, { "epoch": 37.314, "grad_norm": 1.0377099514007568, "learning_rate": 2e-05, "loss": 0.05121434, "step": 18657 }, { "epoch": 37.316, "grad_norm": 1.273390531539917, "learning_rate": 2e-05, "loss": 0.03455181, "step": 18658 }, { "epoch": 37.318, "grad_norm": 1.1096628904342651, "learning_rate": 2e-05, "loss": 0.03347107, "step": 18659 }, { "epoch": 37.32, "grad_norm": 1.108762264251709, "learning_rate": 2e-05, "loss": 0.05677307, "step": 18660 }, { "epoch": 37.322, "grad_norm": 0.9330056309700012, "learning_rate": 2e-05, "loss": 0.04388924, "step": 18661 }, { "epoch": 37.324, "grad_norm": 0.9625260233879089, "learning_rate": 2e-05, "loss": 0.04363878, "step": 18662 }, { "epoch": 37.326, "grad_norm": 1.1656562089920044, "learning_rate": 2e-05, "loss": 0.04816993, "step": 18663 }, { "epoch": 37.328, "grad_norm": 1.5760594606399536, "learning_rate": 2e-05, "loss": 0.03821027, "step": 18664 }, { "epoch": 37.33, "grad_norm": 1.0648982524871826, "learning_rate": 2e-05, "loss": 0.04351669, "step": 18665 }, { "epoch": 37.332, "grad_norm": 1.1120707988739014, "learning_rate": 2e-05, "loss": 0.05716189, "step": 18666 }, { "epoch": 37.334, "grad_norm": 1.0865143537521362, "learning_rate": 2e-05, "loss": 0.04369311, "step": 18667 }, { "epoch": 37.336, "grad_norm": 1.3720588684082031, "learning_rate": 2e-05, "loss": 0.0602327, "step": 18668 }, { "epoch": 37.338, "grad_norm": 1.2710494995117188, "learning_rate": 2e-05, "loss": 0.0435473, "step": 18669 }, { "epoch": 37.34, "grad_norm": 1.0073732137680054, "learning_rate": 2e-05, "loss": 0.04486372, "step": 18670 }, { "epoch": 37.342, "grad_norm": 1.1116842031478882, "learning_rate": 2e-05, "loss": 0.04512633, "step": 18671 }, { "epoch": 37.344, "grad_norm": 1.6758850812911987, "learning_rate": 2e-05, "loss": 0.06914443, "step": 18672 }, { "epoch": 37.346, "grad_norm": 1.260852575302124, "learning_rate": 2e-05, "loss": 0.04131905, "step": 18673 }, { "epoch": 37.348, "grad_norm": 2.244182586669922, "learning_rate": 2e-05, "loss": 0.06125639, "step": 18674 }, { "epoch": 37.35, "grad_norm": 1.2403620481491089, "learning_rate": 2e-05, "loss": 0.05314468, "step": 18675 }, { "epoch": 37.352, "grad_norm": 1.569681167602539, "learning_rate": 2e-05, "loss": 0.04426288, "step": 18676 }, { "epoch": 37.354, "grad_norm": 1.3588958978652954, "learning_rate": 2e-05, "loss": 0.05753552, "step": 18677 }, { "epoch": 37.356, "grad_norm": 2.209467649459839, "learning_rate": 2e-05, "loss": 0.03650244, "step": 18678 }, { "epoch": 37.358, "grad_norm": 0.9632941484451294, "learning_rate": 2e-05, "loss": 0.03627942, "step": 18679 }, { "epoch": 37.36, "grad_norm": 1.704131841659546, "learning_rate": 2e-05, "loss": 0.04979102, "step": 18680 }, { "epoch": 37.362, "grad_norm": 1.0669357776641846, "learning_rate": 2e-05, "loss": 0.03643303, "step": 18681 }, { "epoch": 37.364, "grad_norm": 1.0994805097579956, "learning_rate": 2e-05, "loss": 0.02993735, "step": 18682 }, { "epoch": 37.366, "grad_norm": 1.55198073387146, "learning_rate": 2e-05, "loss": 0.05181917, "step": 18683 }, { "epoch": 37.368, "grad_norm": 1.3147363662719727, "learning_rate": 2e-05, "loss": 0.03561576, "step": 18684 }, { "epoch": 37.37, "grad_norm": 1.0753350257873535, "learning_rate": 2e-05, "loss": 0.04880945, "step": 18685 }, { "epoch": 37.372, "grad_norm": 0.6490964889526367, "learning_rate": 2e-05, "loss": 0.01630099, "step": 18686 }, { "epoch": 37.374, "grad_norm": 1.5814690589904785, "learning_rate": 2e-05, "loss": 0.0333583, "step": 18687 }, { "epoch": 37.376, "grad_norm": 1.7648757696151733, "learning_rate": 2e-05, "loss": 0.04492356, "step": 18688 }, { "epoch": 37.378, "grad_norm": 1.0159778594970703, "learning_rate": 2e-05, "loss": 0.03480536, "step": 18689 }, { "epoch": 37.38, "grad_norm": 1.4376331567764282, "learning_rate": 2e-05, "loss": 0.03816323, "step": 18690 }, { "epoch": 37.382, "grad_norm": 1.121727466583252, "learning_rate": 2e-05, "loss": 0.03657464, "step": 18691 }, { "epoch": 37.384, "grad_norm": 0.9426096081733704, "learning_rate": 2e-05, "loss": 0.04046069, "step": 18692 }, { "epoch": 37.386, "grad_norm": 1.106610894203186, "learning_rate": 2e-05, "loss": 0.04946468, "step": 18693 }, { "epoch": 37.388, "grad_norm": 1.0896666049957275, "learning_rate": 2e-05, "loss": 0.03705723, "step": 18694 }, { "epoch": 37.39, "grad_norm": 0.9025124311447144, "learning_rate": 2e-05, "loss": 0.02911085, "step": 18695 }, { "epoch": 37.392, "grad_norm": 0.826928973197937, "learning_rate": 2e-05, "loss": 0.02817074, "step": 18696 }, { "epoch": 37.394, "grad_norm": 1.385642647743225, "learning_rate": 2e-05, "loss": 0.04097568, "step": 18697 }, { "epoch": 37.396, "grad_norm": 1.2809875011444092, "learning_rate": 2e-05, "loss": 0.04866387, "step": 18698 }, { "epoch": 37.398, "grad_norm": 1.329746961593628, "learning_rate": 2e-05, "loss": 0.05476627, "step": 18699 }, { "epoch": 37.4, "grad_norm": 1.4562588930130005, "learning_rate": 2e-05, "loss": 0.05559216, "step": 18700 }, { "epoch": 37.402, "grad_norm": 1.6684162616729736, "learning_rate": 2e-05, "loss": 0.04630516, "step": 18701 }, { "epoch": 37.404, "grad_norm": 0.9768155217170715, "learning_rate": 2e-05, "loss": 0.04276256, "step": 18702 }, { "epoch": 37.406, "grad_norm": 1.2113561630249023, "learning_rate": 2e-05, "loss": 0.03790126, "step": 18703 }, { "epoch": 37.408, "grad_norm": 0.9753798246383667, "learning_rate": 2e-05, "loss": 0.04058219, "step": 18704 }, { "epoch": 37.41, "grad_norm": 1.0319480895996094, "learning_rate": 2e-05, "loss": 0.04142007, "step": 18705 }, { "epoch": 37.412, "grad_norm": 0.8820832967758179, "learning_rate": 2e-05, "loss": 0.03687086, "step": 18706 }, { "epoch": 37.414, "grad_norm": 1.021862268447876, "learning_rate": 2e-05, "loss": 0.04124251, "step": 18707 }, { "epoch": 37.416, "grad_norm": 1.415955662727356, "learning_rate": 2e-05, "loss": 0.0474915, "step": 18708 }, { "epoch": 37.418, "grad_norm": 1.2507342100143433, "learning_rate": 2e-05, "loss": 0.04408417, "step": 18709 }, { "epoch": 37.42, "grad_norm": 0.9660037755966187, "learning_rate": 2e-05, "loss": 0.03410469, "step": 18710 }, { "epoch": 37.422, "grad_norm": 0.9281665086746216, "learning_rate": 2e-05, "loss": 0.03058691, "step": 18711 }, { "epoch": 37.424, "grad_norm": 1.1241471767425537, "learning_rate": 2e-05, "loss": 0.05254525, "step": 18712 }, { "epoch": 37.426, "grad_norm": 1.9680571556091309, "learning_rate": 2e-05, "loss": 0.06595837, "step": 18713 }, { "epoch": 37.428, "grad_norm": 1.1519217491149902, "learning_rate": 2e-05, "loss": 0.0410401, "step": 18714 }, { "epoch": 37.43, "grad_norm": 1.0350985527038574, "learning_rate": 2e-05, "loss": 0.03957517, "step": 18715 }, { "epoch": 37.432, "grad_norm": 1.030280351638794, "learning_rate": 2e-05, "loss": 0.05034163, "step": 18716 }, { "epoch": 37.434, "grad_norm": 1.1518094539642334, "learning_rate": 2e-05, "loss": 0.04843215, "step": 18717 }, { "epoch": 37.436, "grad_norm": 0.9654944539070129, "learning_rate": 2e-05, "loss": 0.03407705, "step": 18718 }, { "epoch": 37.438, "grad_norm": 1.677377462387085, "learning_rate": 2e-05, "loss": 0.06618379, "step": 18719 }, { "epoch": 37.44, "grad_norm": 1.1766339540481567, "learning_rate": 2e-05, "loss": 0.05563653, "step": 18720 }, { "epoch": 37.442, "grad_norm": 1.033892273902893, "learning_rate": 2e-05, "loss": 0.0398586, "step": 18721 }, { "epoch": 37.444, "grad_norm": 0.9789164066314697, "learning_rate": 2e-05, "loss": 0.03886178, "step": 18722 }, { "epoch": 37.446, "grad_norm": 0.983202338218689, "learning_rate": 2e-05, "loss": 0.02763868, "step": 18723 }, { "epoch": 37.448, "grad_norm": 1.2991498708724976, "learning_rate": 2e-05, "loss": 0.04163698, "step": 18724 }, { "epoch": 37.45, "grad_norm": 1.2219797372817993, "learning_rate": 2e-05, "loss": 0.0413318, "step": 18725 }, { "epoch": 37.452, "grad_norm": 0.9120646715164185, "learning_rate": 2e-05, "loss": 0.0288358, "step": 18726 }, { "epoch": 37.454, "grad_norm": 2.002021312713623, "learning_rate": 2e-05, "loss": 0.03625394, "step": 18727 }, { "epoch": 37.456, "grad_norm": 1.9354413747787476, "learning_rate": 2e-05, "loss": 0.04129477, "step": 18728 }, { "epoch": 37.458, "grad_norm": 1.132306694984436, "learning_rate": 2e-05, "loss": 0.04617504, "step": 18729 }, { "epoch": 37.46, "grad_norm": 1.1618436574935913, "learning_rate": 2e-05, "loss": 0.0473834, "step": 18730 }, { "epoch": 37.462, "grad_norm": 1.106923222541809, "learning_rate": 2e-05, "loss": 0.04612853, "step": 18731 }, { "epoch": 37.464, "grad_norm": 1.0806629657745361, "learning_rate": 2e-05, "loss": 0.03217027, "step": 18732 }, { "epoch": 37.466, "grad_norm": 1.126107096672058, "learning_rate": 2e-05, "loss": 0.04397826, "step": 18733 }, { "epoch": 37.468, "grad_norm": 1.1601415872573853, "learning_rate": 2e-05, "loss": 0.04445393, "step": 18734 }, { "epoch": 37.47, "grad_norm": 0.8998717069625854, "learning_rate": 2e-05, "loss": 0.02971387, "step": 18735 }, { "epoch": 37.472, "grad_norm": 1.5459580421447754, "learning_rate": 2e-05, "loss": 0.06649768, "step": 18736 }, { "epoch": 37.474, "grad_norm": 1.270645022392273, "learning_rate": 2e-05, "loss": 0.04256365, "step": 18737 }, { "epoch": 37.476, "grad_norm": 1.66653573513031, "learning_rate": 2e-05, "loss": 0.04069435, "step": 18738 }, { "epoch": 37.478, "grad_norm": 0.996475100517273, "learning_rate": 2e-05, "loss": 0.04074864, "step": 18739 }, { "epoch": 37.48, "grad_norm": 1.2826459407806396, "learning_rate": 2e-05, "loss": 0.05607035, "step": 18740 }, { "epoch": 37.482, "grad_norm": 1.2153382301330566, "learning_rate": 2e-05, "loss": 0.05422562, "step": 18741 }, { "epoch": 37.484, "grad_norm": 1.2384055852890015, "learning_rate": 2e-05, "loss": 0.04050317, "step": 18742 }, { "epoch": 37.486, "grad_norm": 1.0686463117599487, "learning_rate": 2e-05, "loss": 0.04167401, "step": 18743 }, { "epoch": 37.488, "grad_norm": 1.2066036462783813, "learning_rate": 2e-05, "loss": 0.04531534, "step": 18744 }, { "epoch": 37.49, "grad_norm": 0.9987419247627258, "learning_rate": 2e-05, "loss": 0.04012421, "step": 18745 }, { "epoch": 37.492, "grad_norm": 0.9740080237388611, "learning_rate": 2e-05, "loss": 0.03634139, "step": 18746 }, { "epoch": 37.494, "grad_norm": 1.3249741792678833, "learning_rate": 2e-05, "loss": 0.03803197, "step": 18747 }, { "epoch": 37.496, "grad_norm": 1.6681747436523438, "learning_rate": 2e-05, "loss": 0.04957918, "step": 18748 }, { "epoch": 37.498, "grad_norm": 1.5310307741165161, "learning_rate": 2e-05, "loss": 0.04288496, "step": 18749 }, { "epoch": 37.5, "grad_norm": 1.0008505582809448, "learning_rate": 2e-05, "loss": 0.03939836, "step": 18750 }, { "epoch": 37.502, "grad_norm": 1.1147916316986084, "learning_rate": 2e-05, "loss": 0.04899579, "step": 18751 }, { "epoch": 37.504, "grad_norm": 0.8553879261016846, "learning_rate": 2e-05, "loss": 0.03566083, "step": 18752 }, { "epoch": 37.506, "grad_norm": 0.7958679795265198, "learning_rate": 2e-05, "loss": 0.02956646, "step": 18753 }, { "epoch": 37.508, "grad_norm": 1.0791382789611816, "learning_rate": 2e-05, "loss": 0.02780805, "step": 18754 }, { "epoch": 37.51, "grad_norm": 1.2659754753112793, "learning_rate": 2e-05, "loss": 0.04429627, "step": 18755 }, { "epoch": 37.512, "grad_norm": 2.2846593856811523, "learning_rate": 2e-05, "loss": 0.03261747, "step": 18756 }, { "epoch": 37.514, "grad_norm": 1.2112829685211182, "learning_rate": 2e-05, "loss": 0.0407877, "step": 18757 }, { "epoch": 37.516, "grad_norm": 1.5165746212005615, "learning_rate": 2e-05, "loss": 0.04490262, "step": 18758 }, { "epoch": 37.518, "grad_norm": 1.1374213695526123, "learning_rate": 2e-05, "loss": 0.05539905, "step": 18759 }, { "epoch": 37.52, "grad_norm": 1.4474830627441406, "learning_rate": 2e-05, "loss": 0.05303566, "step": 18760 }, { "epoch": 37.522, "grad_norm": 1.061644434928894, "learning_rate": 2e-05, "loss": 0.03879377, "step": 18761 }, { "epoch": 37.524, "grad_norm": 1.2156922817230225, "learning_rate": 2e-05, "loss": 0.04445741, "step": 18762 }, { "epoch": 37.526, "grad_norm": 1.0017434358596802, "learning_rate": 2e-05, "loss": 0.0404274, "step": 18763 }, { "epoch": 37.528, "grad_norm": 1.1403751373291016, "learning_rate": 2e-05, "loss": 0.03409303, "step": 18764 }, { "epoch": 37.53, "grad_norm": 1.4317917823791504, "learning_rate": 2e-05, "loss": 0.04550587, "step": 18765 }, { "epoch": 37.532, "grad_norm": 1.5411899089813232, "learning_rate": 2e-05, "loss": 0.04525489, "step": 18766 }, { "epoch": 37.534, "grad_norm": 1.0103319883346558, "learning_rate": 2e-05, "loss": 0.03338393, "step": 18767 }, { "epoch": 37.536, "grad_norm": 0.9137657284736633, "learning_rate": 2e-05, "loss": 0.03574432, "step": 18768 }, { "epoch": 37.538, "grad_norm": 1.337023138999939, "learning_rate": 2e-05, "loss": 0.05035329, "step": 18769 }, { "epoch": 37.54, "grad_norm": 1.0165894031524658, "learning_rate": 2e-05, "loss": 0.04362631, "step": 18770 }, { "epoch": 37.542, "grad_norm": 1.3536618947982788, "learning_rate": 2e-05, "loss": 0.06143478, "step": 18771 }, { "epoch": 37.544, "grad_norm": 1.4272964000701904, "learning_rate": 2e-05, "loss": 0.03958009, "step": 18772 }, { "epoch": 37.546, "grad_norm": 1.0528368949890137, "learning_rate": 2e-05, "loss": 0.04316139, "step": 18773 }, { "epoch": 37.548, "grad_norm": 1.1067519187927246, "learning_rate": 2e-05, "loss": 0.03696557, "step": 18774 }, { "epoch": 37.55, "grad_norm": 1.021020531654358, "learning_rate": 2e-05, "loss": 0.04061282, "step": 18775 }, { "epoch": 37.552, "grad_norm": 1.4153789281845093, "learning_rate": 2e-05, "loss": 0.04513776, "step": 18776 }, { "epoch": 37.554, "grad_norm": 2.9456605911254883, "learning_rate": 2e-05, "loss": 0.04253369, "step": 18777 }, { "epoch": 37.556, "grad_norm": 1.0258910655975342, "learning_rate": 2e-05, "loss": 0.04435899, "step": 18778 }, { "epoch": 37.558, "grad_norm": 1.2424430847167969, "learning_rate": 2e-05, "loss": 0.06741741, "step": 18779 }, { "epoch": 37.56, "grad_norm": 2.3567514419555664, "learning_rate": 2e-05, "loss": 0.03990025, "step": 18780 }, { "epoch": 37.562, "grad_norm": 0.9585657715797424, "learning_rate": 2e-05, "loss": 0.03028939, "step": 18781 }, { "epoch": 37.564, "grad_norm": 1.4681471586227417, "learning_rate": 2e-05, "loss": 0.07524794, "step": 18782 }, { "epoch": 37.566, "grad_norm": 1.1728054285049438, "learning_rate": 2e-05, "loss": 0.03987654, "step": 18783 }, { "epoch": 37.568, "grad_norm": 1.0648523569107056, "learning_rate": 2e-05, "loss": 0.0564645, "step": 18784 }, { "epoch": 37.57, "grad_norm": 1.0455750226974487, "learning_rate": 2e-05, "loss": 0.03324683, "step": 18785 }, { "epoch": 37.572, "grad_norm": 1.141681432723999, "learning_rate": 2e-05, "loss": 0.03916598, "step": 18786 }, { "epoch": 37.574, "grad_norm": 1.0642967224121094, "learning_rate": 2e-05, "loss": 0.05424938, "step": 18787 }, { "epoch": 37.576, "grad_norm": 1.0437703132629395, "learning_rate": 2e-05, "loss": 0.04625114, "step": 18788 }, { "epoch": 37.578, "grad_norm": 0.8992397785186768, "learning_rate": 2e-05, "loss": 0.03381258, "step": 18789 }, { "epoch": 37.58, "grad_norm": 2.122413158416748, "learning_rate": 2e-05, "loss": 0.03955257, "step": 18790 }, { "epoch": 37.582, "grad_norm": 1.2580934762954712, "learning_rate": 2e-05, "loss": 0.03735654, "step": 18791 }, { "epoch": 37.584, "grad_norm": 0.8548269867897034, "learning_rate": 2e-05, "loss": 0.03740982, "step": 18792 }, { "epoch": 37.586, "grad_norm": 1.1040412187576294, "learning_rate": 2e-05, "loss": 0.06896301, "step": 18793 }, { "epoch": 37.588, "grad_norm": 1.0904555320739746, "learning_rate": 2e-05, "loss": 0.04003724, "step": 18794 }, { "epoch": 37.59, "grad_norm": 0.9670760035514832, "learning_rate": 2e-05, "loss": 0.03714907, "step": 18795 }, { "epoch": 37.592, "grad_norm": 1.166575312614441, "learning_rate": 2e-05, "loss": 0.03536236, "step": 18796 }, { "epoch": 37.594, "grad_norm": 1.6498163938522339, "learning_rate": 2e-05, "loss": 0.03903349, "step": 18797 }, { "epoch": 37.596, "grad_norm": 1.057323932647705, "learning_rate": 2e-05, "loss": 0.05029205, "step": 18798 }, { "epoch": 37.598, "grad_norm": 1.663523554801941, "learning_rate": 2e-05, "loss": 0.03777354, "step": 18799 }, { "epoch": 37.6, "grad_norm": 1.1782076358795166, "learning_rate": 2e-05, "loss": 0.03594039, "step": 18800 }, { "epoch": 37.602, "grad_norm": 1.2294508218765259, "learning_rate": 2e-05, "loss": 0.02808234, "step": 18801 }, { "epoch": 37.604, "grad_norm": 1.3029745817184448, "learning_rate": 2e-05, "loss": 0.05593179, "step": 18802 }, { "epoch": 37.606, "grad_norm": 1.0195879936218262, "learning_rate": 2e-05, "loss": 0.03832771, "step": 18803 }, { "epoch": 37.608, "grad_norm": 0.9678752422332764, "learning_rate": 2e-05, "loss": 0.03599244, "step": 18804 }, { "epoch": 37.61, "grad_norm": 0.9449793100357056, "learning_rate": 2e-05, "loss": 0.03616268, "step": 18805 }, { "epoch": 37.612, "grad_norm": 1.102702260017395, "learning_rate": 2e-05, "loss": 0.03862703, "step": 18806 }, { "epoch": 37.614, "grad_norm": 1.4119384288787842, "learning_rate": 2e-05, "loss": 0.05421577, "step": 18807 }, { "epoch": 37.616, "grad_norm": 2.5671143531799316, "learning_rate": 2e-05, "loss": 0.05665691, "step": 18808 }, { "epoch": 37.618, "grad_norm": 1.0099848508834839, "learning_rate": 2e-05, "loss": 0.04807793, "step": 18809 }, { "epoch": 37.62, "grad_norm": 1.3990161418914795, "learning_rate": 2e-05, "loss": 0.05522398, "step": 18810 }, { "epoch": 37.622, "grad_norm": 1.129443645477295, "learning_rate": 2e-05, "loss": 0.05636445, "step": 18811 }, { "epoch": 37.624, "grad_norm": 1.082963228225708, "learning_rate": 2e-05, "loss": 0.04201128, "step": 18812 }, { "epoch": 37.626, "grad_norm": 0.9115501642227173, "learning_rate": 2e-05, "loss": 0.03409997, "step": 18813 }, { "epoch": 37.628, "grad_norm": 1.5805048942565918, "learning_rate": 2e-05, "loss": 0.05126679, "step": 18814 }, { "epoch": 37.63, "grad_norm": 0.9611139893531799, "learning_rate": 2e-05, "loss": 0.0392372, "step": 18815 }, { "epoch": 37.632, "grad_norm": 0.9888186454772949, "learning_rate": 2e-05, "loss": 0.03317364, "step": 18816 }, { "epoch": 37.634, "grad_norm": 1.6233830451965332, "learning_rate": 2e-05, "loss": 0.04676667, "step": 18817 }, { "epoch": 37.636, "grad_norm": 1.1264859437942505, "learning_rate": 2e-05, "loss": 0.03827876, "step": 18818 }, { "epoch": 37.638, "grad_norm": 1.402205467224121, "learning_rate": 2e-05, "loss": 0.03780929, "step": 18819 }, { "epoch": 37.64, "grad_norm": 1.2931559085845947, "learning_rate": 2e-05, "loss": 0.05103051, "step": 18820 }, { "epoch": 37.642, "grad_norm": 1.0364389419555664, "learning_rate": 2e-05, "loss": 0.04648129, "step": 18821 }, { "epoch": 37.644, "grad_norm": 0.9718706607818604, "learning_rate": 2e-05, "loss": 0.04144686, "step": 18822 }, { "epoch": 37.646, "grad_norm": 1.2100083827972412, "learning_rate": 2e-05, "loss": 0.04853836, "step": 18823 }, { "epoch": 37.648, "grad_norm": 0.8672258257865906, "learning_rate": 2e-05, "loss": 0.02894824, "step": 18824 }, { "epoch": 37.65, "grad_norm": 1.2831289768218994, "learning_rate": 2e-05, "loss": 0.04380868, "step": 18825 }, { "epoch": 37.652, "grad_norm": 1.1962673664093018, "learning_rate": 2e-05, "loss": 0.06062628, "step": 18826 }, { "epoch": 37.654, "grad_norm": 1.0220983028411865, "learning_rate": 2e-05, "loss": 0.04551403, "step": 18827 }, { "epoch": 37.656, "grad_norm": 1.1660890579223633, "learning_rate": 2e-05, "loss": 0.04603134, "step": 18828 }, { "epoch": 37.658, "grad_norm": 1.6814560890197754, "learning_rate": 2e-05, "loss": 0.05087702, "step": 18829 }, { "epoch": 37.66, "grad_norm": 1.1539851427078247, "learning_rate": 2e-05, "loss": 0.04578172, "step": 18830 }, { "epoch": 37.662, "grad_norm": 1.0317453145980835, "learning_rate": 2e-05, "loss": 0.03530533, "step": 18831 }, { "epoch": 37.664, "grad_norm": 0.9875096678733826, "learning_rate": 2e-05, "loss": 0.03653241, "step": 18832 }, { "epoch": 37.666, "grad_norm": 1.0412888526916504, "learning_rate": 2e-05, "loss": 0.04385642, "step": 18833 }, { "epoch": 37.668, "grad_norm": 1.0991995334625244, "learning_rate": 2e-05, "loss": 0.05635276, "step": 18834 }, { "epoch": 37.67, "grad_norm": 1.0921926498413086, "learning_rate": 2e-05, "loss": 0.04315586, "step": 18835 }, { "epoch": 37.672, "grad_norm": 1.0444475412368774, "learning_rate": 2e-05, "loss": 0.0539056, "step": 18836 }, { "epoch": 37.674, "grad_norm": 1.982426404953003, "learning_rate": 2e-05, "loss": 0.0444312, "step": 18837 }, { "epoch": 37.676, "grad_norm": 1.0868357419967651, "learning_rate": 2e-05, "loss": 0.04363297, "step": 18838 }, { "epoch": 37.678, "grad_norm": 0.9949026107788086, "learning_rate": 2e-05, "loss": 0.03962643, "step": 18839 }, { "epoch": 37.68, "grad_norm": 1.022019624710083, "learning_rate": 2e-05, "loss": 0.04749414, "step": 18840 }, { "epoch": 37.682, "grad_norm": 0.8703010082244873, "learning_rate": 2e-05, "loss": 0.0276827, "step": 18841 }, { "epoch": 37.684, "grad_norm": 1.0120478868484497, "learning_rate": 2e-05, "loss": 0.03630254, "step": 18842 }, { "epoch": 37.686, "grad_norm": 1.2947815656661987, "learning_rate": 2e-05, "loss": 0.04670317, "step": 18843 }, { "epoch": 37.688, "grad_norm": 1.1354161500930786, "learning_rate": 2e-05, "loss": 0.03456123, "step": 18844 }, { "epoch": 37.69, "grad_norm": 1.0470629930496216, "learning_rate": 2e-05, "loss": 0.04884163, "step": 18845 }, { "epoch": 37.692, "grad_norm": 1.013656497001648, "learning_rate": 2e-05, "loss": 0.03474177, "step": 18846 }, { "epoch": 37.694, "grad_norm": 1.2719136476516724, "learning_rate": 2e-05, "loss": 0.0532422, "step": 18847 }, { "epoch": 37.696, "grad_norm": 1.1343333721160889, "learning_rate": 2e-05, "loss": 0.04239897, "step": 18848 }, { "epoch": 37.698, "grad_norm": 1.1942012310028076, "learning_rate": 2e-05, "loss": 0.05409771, "step": 18849 }, { "epoch": 37.7, "grad_norm": 1.0362792015075684, "learning_rate": 2e-05, "loss": 0.03506543, "step": 18850 }, { "epoch": 37.702, "grad_norm": 1.115951657295227, "learning_rate": 2e-05, "loss": 0.0418034, "step": 18851 }, { "epoch": 37.704, "grad_norm": 1.094841718673706, "learning_rate": 2e-05, "loss": 0.04899231, "step": 18852 }, { "epoch": 37.706, "grad_norm": 1.0014265775680542, "learning_rate": 2e-05, "loss": 0.03522962, "step": 18853 }, { "epoch": 37.708, "grad_norm": 1.6350477933883667, "learning_rate": 2e-05, "loss": 0.03969879, "step": 18854 }, { "epoch": 37.71, "grad_norm": 1.0936475992202759, "learning_rate": 2e-05, "loss": 0.04207449, "step": 18855 }, { "epoch": 37.712, "grad_norm": 1.1588548421859741, "learning_rate": 2e-05, "loss": 0.06258174, "step": 18856 }, { "epoch": 37.714, "grad_norm": 1.558176040649414, "learning_rate": 2e-05, "loss": 0.05331915, "step": 18857 }, { "epoch": 37.716, "grad_norm": 1.3440383672714233, "learning_rate": 2e-05, "loss": 0.03128509, "step": 18858 }, { "epoch": 37.718, "grad_norm": 0.9466093182563782, "learning_rate": 2e-05, "loss": 0.03084318, "step": 18859 }, { "epoch": 37.72, "grad_norm": 0.9748710989952087, "learning_rate": 2e-05, "loss": 0.03572369, "step": 18860 }, { "epoch": 37.722, "grad_norm": 1.8796697854995728, "learning_rate": 2e-05, "loss": 0.05052012, "step": 18861 }, { "epoch": 37.724, "grad_norm": 1.4299914836883545, "learning_rate": 2e-05, "loss": 0.04214597, "step": 18862 }, { "epoch": 37.726, "grad_norm": 1.069328784942627, "learning_rate": 2e-05, "loss": 0.04031834, "step": 18863 }, { "epoch": 37.728, "grad_norm": 1.1966696977615356, "learning_rate": 2e-05, "loss": 0.0401395, "step": 18864 }, { "epoch": 37.73, "grad_norm": 2.3898987770080566, "learning_rate": 2e-05, "loss": 0.06085876, "step": 18865 }, { "epoch": 37.732, "grad_norm": 1.8415448665618896, "learning_rate": 2e-05, "loss": 0.05794105, "step": 18866 }, { "epoch": 37.734, "grad_norm": 1.5700851678848267, "learning_rate": 2e-05, "loss": 0.06148114, "step": 18867 }, { "epoch": 37.736, "grad_norm": 1.1868525743484497, "learning_rate": 2e-05, "loss": 0.03793664, "step": 18868 }, { "epoch": 37.738, "grad_norm": 1.386899471282959, "learning_rate": 2e-05, "loss": 0.04799426, "step": 18869 }, { "epoch": 37.74, "grad_norm": 1.1481682062149048, "learning_rate": 2e-05, "loss": 0.05042987, "step": 18870 }, { "epoch": 37.742, "grad_norm": 1.2423620223999023, "learning_rate": 2e-05, "loss": 0.04836147, "step": 18871 }, { "epoch": 37.744, "grad_norm": 2.295675039291382, "learning_rate": 2e-05, "loss": 0.04542322, "step": 18872 }, { "epoch": 37.746, "grad_norm": 1.1619027853012085, "learning_rate": 2e-05, "loss": 0.03707786, "step": 18873 }, { "epoch": 37.748, "grad_norm": 1.4235494136810303, "learning_rate": 2e-05, "loss": 0.04797658, "step": 18874 }, { "epoch": 37.75, "grad_norm": 1.3280595541000366, "learning_rate": 2e-05, "loss": 0.05838828, "step": 18875 }, { "epoch": 37.752, "grad_norm": 0.9920888543128967, "learning_rate": 2e-05, "loss": 0.03158339, "step": 18876 }, { "epoch": 37.754, "grad_norm": 1.1040688753128052, "learning_rate": 2e-05, "loss": 0.03485991, "step": 18877 }, { "epoch": 37.756, "grad_norm": 1.0648990869522095, "learning_rate": 2e-05, "loss": 0.04095337, "step": 18878 }, { "epoch": 37.758, "grad_norm": 1.4326541423797607, "learning_rate": 2e-05, "loss": 0.04586481, "step": 18879 }, { "epoch": 37.76, "grad_norm": 1.2277796268463135, "learning_rate": 2e-05, "loss": 0.04827518, "step": 18880 }, { "epoch": 37.762, "grad_norm": 1.0598303079605103, "learning_rate": 2e-05, "loss": 0.04587974, "step": 18881 }, { "epoch": 37.764, "grad_norm": 2.3544678688049316, "learning_rate": 2e-05, "loss": 0.04076872, "step": 18882 }, { "epoch": 37.766, "grad_norm": 2.961266279220581, "learning_rate": 2e-05, "loss": 0.04413686, "step": 18883 }, { "epoch": 37.768, "grad_norm": 1.1886168718338013, "learning_rate": 2e-05, "loss": 0.05006697, "step": 18884 }, { "epoch": 37.77, "grad_norm": 1.7172924280166626, "learning_rate": 2e-05, "loss": 0.04549586, "step": 18885 }, { "epoch": 37.772, "grad_norm": 2.2110679149627686, "learning_rate": 2e-05, "loss": 0.04849089, "step": 18886 }, { "epoch": 37.774, "grad_norm": 1.4250366687774658, "learning_rate": 2e-05, "loss": 0.05197417, "step": 18887 }, { "epoch": 37.776, "grad_norm": 1.3715035915374756, "learning_rate": 2e-05, "loss": 0.04593984, "step": 18888 }, { "epoch": 37.778, "grad_norm": 1.8877391815185547, "learning_rate": 2e-05, "loss": 0.0439683, "step": 18889 }, { "epoch": 37.78, "grad_norm": 1.0670255422592163, "learning_rate": 2e-05, "loss": 0.03677295, "step": 18890 }, { "epoch": 37.782, "grad_norm": 1.2562309503555298, "learning_rate": 2e-05, "loss": 0.03133457, "step": 18891 }, { "epoch": 37.784, "grad_norm": 1.221799373626709, "learning_rate": 2e-05, "loss": 0.04235502, "step": 18892 }, { "epoch": 37.786, "grad_norm": 1.320099949836731, "learning_rate": 2e-05, "loss": 0.04577269, "step": 18893 }, { "epoch": 37.788, "grad_norm": 1.0770692825317383, "learning_rate": 2e-05, "loss": 0.05014978, "step": 18894 }, { "epoch": 37.79, "grad_norm": 2.9332194328308105, "learning_rate": 2e-05, "loss": 0.03677168, "step": 18895 }, { "epoch": 37.792, "grad_norm": 1.4482650756835938, "learning_rate": 2e-05, "loss": 0.05690682, "step": 18896 }, { "epoch": 37.794, "grad_norm": 3.2924160957336426, "learning_rate": 2e-05, "loss": 0.05873226, "step": 18897 }, { "epoch": 37.796, "grad_norm": 0.8983320593833923, "learning_rate": 2e-05, "loss": 0.02649505, "step": 18898 }, { "epoch": 37.798, "grad_norm": 0.8349385857582092, "learning_rate": 2e-05, "loss": 0.03259555, "step": 18899 }, { "epoch": 37.8, "grad_norm": 3.147834062576294, "learning_rate": 2e-05, "loss": 0.04678661, "step": 18900 }, { "epoch": 37.802, "grad_norm": 1.2380520105361938, "learning_rate": 2e-05, "loss": 0.05397804, "step": 18901 }, { "epoch": 37.804, "grad_norm": 1.055299997329712, "learning_rate": 2e-05, "loss": 0.0371677, "step": 18902 }, { "epoch": 37.806, "grad_norm": 1.2146174907684326, "learning_rate": 2e-05, "loss": 0.04826795, "step": 18903 }, { "epoch": 37.808, "grad_norm": 0.930882453918457, "learning_rate": 2e-05, "loss": 0.03340304, "step": 18904 }, { "epoch": 37.81, "grad_norm": 1.0792516469955444, "learning_rate": 2e-05, "loss": 0.03648348, "step": 18905 }, { "epoch": 37.812, "grad_norm": 0.9146212935447693, "learning_rate": 2e-05, "loss": 0.03475334, "step": 18906 }, { "epoch": 37.814, "grad_norm": 1.1347914934158325, "learning_rate": 2e-05, "loss": 0.04972285, "step": 18907 }, { "epoch": 37.816, "grad_norm": 2.8119237422943115, "learning_rate": 2e-05, "loss": 0.0575778, "step": 18908 }, { "epoch": 37.818, "grad_norm": 1.409567952156067, "learning_rate": 2e-05, "loss": 0.04484772, "step": 18909 }, { "epoch": 37.82, "grad_norm": 1.0711102485656738, "learning_rate": 2e-05, "loss": 0.03806607, "step": 18910 }, { "epoch": 37.822, "grad_norm": 1.15849769115448, "learning_rate": 2e-05, "loss": 0.03112827, "step": 18911 }, { "epoch": 37.824, "grad_norm": 0.9912369847297668, "learning_rate": 2e-05, "loss": 0.03591751, "step": 18912 }, { "epoch": 37.826, "grad_norm": 1.378166675567627, "learning_rate": 2e-05, "loss": 0.0576729, "step": 18913 }, { "epoch": 37.828, "grad_norm": 1.0558768510818481, "learning_rate": 2e-05, "loss": 0.03098297, "step": 18914 }, { "epoch": 37.83, "grad_norm": 1.2733522653579712, "learning_rate": 2e-05, "loss": 0.04115165, "step": 18915 }, { "epoch": 37.832, "grad_norm": 1.300357460975647, "learning_rate": 2e-05, "loss": 0.06739998, "step": 18916 }, { "epoch": 37.834, "grad_norm": 1.0592100620269775, "learning_rate": 2e-05, "loss": 0.05033366, "step": 18917 }, { "epoch": 37.836, "grad_norm": 1.1297162771224976, "learning_rate": 2e-05, "loss": 0.04939591, "step": 18918 }, { "epoch": 37.838, "grad_norm": 1.252231240272522, "learning_rate": 2e-05, "loss": 0.03955444, "step": 18919 }, { "epoch": 37.84, "grad_norm": 1.1079142093658447, "learning_rate": 2e-05, "loss": 0.03638052, "step": 18920 }, { "epoch": 37.842, "grad_norm": 0.8688382506370544, "learning_rate": 2e-05, "loss": 0.02995694, "step": 18921 }, { "epoch": 37.844, "grad_norm": 1.1126800775527954, "learning_rate": 2e-05, "loss": 0.03253857, "step": 18922 }, { "epoch": 37.846, "grad_norm": 0.9654681086540222, "learning_rate": 2e-05, "loss": 0.03584439, "step": 18923 }, { "epoch": 37.848, "grad_norm": 0.9588539600372314, "learning_rate": 2e-05, "loss": 0.03743844, "step": 18924 }, { "epoch": 37.85, "grad_norm": 0.8038519620895386, "learning_rate": 2e-05, "loss": 0.03202536, "step": 18925 }, { "epoch": 37.852, "grad_norm": 1.1769639253616333, "learning_rate": 2e-05, "loss": 0.0550707, "step": 18926 }, { "epoch": 37.854, "grad_norm": 1.3562484979629517, "learning_rate": 2e-05, "loss": 0.04457762, "step": 18927 }, { "epoch": 37.856, "grad_norm": 2.0206151008605957, "learning_rate": 2e-05, "loss": 0.05219372, "step": 18928 }, { "epoch": 37.858, "grad_norm": 0.8258675932884216, "learning_rate": 2e-05, "loss": 0.03097643, "step": 18929 }, { "epoch": 37.86, "grad_norm": 1.0403401851654053, "learning_rate": 2e-05, "loss": 0.0481614, "step": 18930 }, { "epoch": 37.862, "grad_norm": 2.12001895904541, "learning_rate": 2e-05, "loss": 0.04533806, "step": 18931 }, { "epoch": 37.864, "grad_norm": 1.1743680238723755, "learning_rate": 2e-05, "loss": 0.05559888, "step": 18932 }, { "epoch": 37.866, "grad_norm": 1.301820993423462, "learning_rate": 2e-05, "loss": 0.04499183, "step": 18933 }, { "epoch": 37.868, "grad_norm": 0.9837618470191956, "learning_rate": 2e-05, "loss": 0.03871724, "step": 18934 }, { "epoch": 37.87, "grad_norm": 1.5907585620880127, "learning_rate": 2e-05, "loss": 0.04992094, "step": 18935 }, { "epoch": 37.872, "grad_norm": 0.9256614446640015, "learning_rate": 2e-05, "loss": 0.03672835, "step": 18936 }, { "epoch": 37.874, "grad_norm": 1.277227520942688, "learning_rate": 2e-05, "loss": 0.05088023, "step": 18937 }, { "epoch": 37.876, "grad_norm": 1.2029907703399658, "learning_rate": 2e-05, "loss": 0.04554307, "step": 18938 }, { "epoch": 37.878, "grad_norm": 0.8870610594749451, "learning_rate": 2e-05, "loss": 0.02822838, "step": 18939 }, { "epoch": 37.88, "grad_norm": 1.656826138496399, "learning_rate": 2e-05, "loss": 0.04394811, "step": 18940 }, { "epoch": 37.882, "grad_norm": 1.0443203449249268, "learning_rate": 2e-05, "loss": 0.03309946, "step": 18941 }, { "epoch": 37.884, "grad_norm": 0.7729543447494507, "learning_rate": 2e-05, "loss": 0.02462711, "step": 18942 }, { "epoch": 37.886, "grad_norm": 0.9086648225784302, "learning_rate": 2e-05, "loss": 0.03681312, "step": 18943 }, { "epoch": 37.888, "grad_norm": 0.9825101494789124, "learning_rate": 2e-05, "loss": 0.04218232, "step": 18944 }, { "epoch": 37.89, "grad_norm": 0.8920814990997314, "learning_rate": 2e-05, "loss": 0.0277781, "step": 18945 }, { "epoch": 37.892, "grad_norm": 1.037960410118103, "learning_rate": 2e-05, "loss": 0.05137964, "step": 18946 }, { "epoch": 37.894, "grad_norm": 1.0242578983306885, "learning_rate": 2e-05, "loss": 0.03562148, "step": 18947 }, { "epoch": 37.896, "grad_norm": 1.1324876546859741, "learning_rate": 2e-05, "loss": 0.04308298, "step": 18948 }, { "epoch": 37.898, "grad_norm": 1.1029669046401978, "learning_rate": 2e-05, "loss": 0.03178472, "step": 18949 }, { "epoch": 37.9, "grad_norm": 1.2301125526428223, "learning_rate": 2e-05, "loss": 0.04993764, "step": 18950 }, { "epoch": 37.902, "grad_norm": 1.159550666809082, "learning_rate": 2e-05, "loss": 0.03458505, "step": 18951 }, { "epoch": 37.904, "grad_norm": 1.0247794389724731, "learning_rate": 2e-05, "loss": 0.03424925, "step": 18952 }, { "epoch": 37.906, "grad_norm": 0.9856832027435303, "learning_rate": 2e-05, "loss": 0.03638713, "step": 18953 }, { "epoch": 37.908, "grad_norm": 1.0859779119491577, "learning_rate": 2e-05, "loss": 0.03618608, "step": 18954 }, { "epoch": 37.91, "grad_norm": 1.0067310333251953, "learning_rate": 2e-05, "loss": 0.0405248, "step": 18955 }, { "epoch": 37.912, "grad_norm": 1.6460914611816406, "learning_rate": 2e-05, "loss": 0.04224902, "step": 18956 }, { "epoch": 37.914, "grad_norm": 1.3289964199066162, "learning_rate": 2e-05, "loss": 0.06237075, "step": 18957 }, { "epoch": 37.916, "grad_norm": 0.8281854391098022, "learning_rate": 2e-05, "loss": 0.01907051, "step": 18958 }, { "epoch": 37.918, "grad_norm": 1.11221444606781, "learning_rate": 2e-05, "loss": 0.05262002, "step": 18959 }, { "epoch": 37.92, "grad_norm": 1.0773868560791016, "learning_rate": 2e-05, "loss": 0.04675142, "step": 18960 }, { "epoch": 37.922, "grad_norm": 1.2198683023452759, "learning_rate": 2e-05, "loss": 0.05705185, "step": 18961 }, { "epoch": 37.924, "grad_norm": 0.8966087698936462, "learning_rate": 2e-05, "loss": 0.03536968, "step": 18962 }, { "epoch": 37.926, "grad_norm": 1.0335179567337036, "learning_rate": 2e-05, "loss": 0.05837837, "step": 18963 }, { "epoch": 37.928, "grad_norm": 1.2544982433319092, "learning_rate": 2e-05, "loss": 0.04942916, "step": 18964 }, { "epoch": 37.93, "grad_norm": 1.2923914194107056, "learning_rate": 2e-05, "loss": 0.03757299, "step": 18965 }, { "epoch": 37.932, "grad_norm": 1.0922046899795532, "learning_rate": 2e-05, "loss": 0.03501926, "step": 18966 }, { "epoch": 37.934, "grad_norm": 0.925020694732666, "learning_rate": 2e-05, "loss": 0.02777275, "step": 18967 }, { "epoch": 37.936, "grad_norm": 2.9154934883117676, "learning_rate": 2e-05, "loss": 0.06504337, "step": 18968 }, { "epoch": 37.938, "grad_norm": 1.0884032249450684, "learning_rate": 2e-05, "loss": 0.02612902, "step": 18969 }, { "epoch": 37.94, "grad_norm": 1.067151665687561, "learning_rate": 2e-05, "loss": 0.05026908, "step": 18970 }, { "epoch": 37.942, "grad_norm": 1.2798658609390259, "learning_rate": 2e-05, "loss": 0.03574484, "step": 18971 }, { "epoch": 37.944, "grad_norm": 2.670130729675293, "learning_rate": 2e-05, "loss": 0.05875919, "step": 18972 }, { "epoch": 37.946, "grad_norm": 1.0779452323913574, "learning_rate": 2e-05, "loss": 0.04674947, "step": 18973 }, { "epoch": 37.948, "grad_norm": 1.3659930229187012, "learning_rate": 2e-05, "loss": 0.04738132, "step": 18974 }, { "epoch": 37.95, "grad_norm": 1.054917335510254, "learning_rate": 2e-05, "loss": 0.039779, "step": 18975 }, { "epoch": 37.952, "grad_norm": 0.9012338519096375, "learning_rate": 2e-05, "loss": 0.02913295, "step": 18976 }, { "epoch": 37.954, "grad_norm": 1.102645993232727, "learning_rate": 2e-05, "loss": 0.04405007, "step": 18977 }, { "epoch": 37.956, "grad_norm": 1.1853587627410889, "learning_rate": 2e-05, "loss": 0.05324361, "step": 18978 }, { "epoch": 37.958, "grad_norm": 1.0219330787658691, "learning_rate": 2e-05, "loss": 0.04883197, "step": 18979 }, { "epoch": 37.96, "grad_norm": 1.7334797382354736, "learning_rate": 2e-05, "loss": 0.04952811, "step": 18980 }, { "epoch": 37.962, "grad_norm": 1.156711220741272, "learning_rate": 2e-05, "loss": 0.0495377, "step": 18981 }, { "epoch": 37.964, "grad_norm": 1.771813988685608, "learning_rate": 2e-05, "loss": 0.05786481, "step": 18982 }, { "epoch": 37.966, "grad_norm": 1.30844247341156, "learning_rate": 2e-05, "loss": 0.04001062, "step": 18983 }, { "epoch": 37.968, "grad_norm": 1.0746525526046753, "learning_rate": 2e-05, "loss": 0.03619002, "step": 18984 }, { "epoch": 37.97, "grad_norm": 1.0653403997421265, "learning_rate": 2e-05, "loss": 0.03692459, "step": 18985 }, { "epoch": 37.972, "grad_norm": 1.7900402545928955, "learning_rate": 2e-05, "loss": 0.05012682, "step": 18986 }, { "epoch": 37.974, "grad_norm": 1.0227274894714355, "learning_rate": 2e-05, "loss": 0.03166505, "step": 18987 }, { "epoch": 37.976, "grad_norm": 1.1506602764129639, "learning_rate": 2e-05, "loss": 0.04981997, "step": 18988 }, { "epoch": 37.978, "grad_norm": 4.846827507019043, "learning_rate": 2e-05, "loss": 0.04896365, "step": 18989 }, { "epoch": 37.98, "grad_norm": 0.9913517236709595, "learning_rate": 2e-05, "loss": 0.0379057, "step": 18990 }, { "epoch": 37.982, "grad_norm": 1.8025267124176025, "learning_rate": 2e-05, "loss": 0.05586127, "step": 18991 }, { "epoch": 37.984, "grad_norm": 1.132684588432312, "learning_rate": 2e-05, "loss": 0.03268192, "step": 18992 }, { "epoch": 37.986, "grad_norm": 0.9470504522323608, "learning_rate": 2e-05, "loss": 0.04058151, "step": 18993 }, { "epoch": 37.988, "grad_norm": 3.7048418521881104, "learning_rate": 2e-05, "loss": 0.05647293, "step": 18994 }, { "epoch": 37.99, "grad_norm": 1.0128827095031738, "learning_rate": 2e-05, "loss": 0.03746347, "step": 18995 }, { "epoch": 37.992, "grad_norm": 1.6474192142486572, "learning_rate": 2e-05, "loss": 0.08178754, "step": 18996 }, { "epoch": 37.994, "grad_norm": 1.1139341592788696, "learning_rate": 2e-05, "loss": 0.05124307, "step": 18997 }, { "epoch": 37.996, "grad_norm": 1.40438711643219, "learning_rate": 2e-05, "loss": 0.03827235, "step": 18998 }, { "epoch": 37.998, "grad_norm": 1.1550930738449097, "learning_rate": 2e-05, "loss": 0.03912105, "step": 18999 }, { "epoch": 38.0, "grad_norm": 0.9796078205108643, "learning_rate": 2e-05, "loss": 0.03765359, "step": 19000 }, { "epoch": 38.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.996, "Equal_2": 0.9760479041916168, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.992, "Perpendicular_1": 0.994, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8957915831663327, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 226.8931, "eval_samples_per_second": 46.277, "eval_steps_per_second": 0.926, "step": 19000 }, { "epoch": 38.002, "grad_norm": 1.1857725381851196, "learning_rate": 2e-05, "loss": 0.05068675, "step": 19001 }, { "epoch": 38.004, "grad_norm": 1.8349792957305908, "learning_rate": 2e-05, "loss": 0.03845159, "step": 19002 }, { "epoch": 38.006, "grad_norm": 1.2083112001419067, "learning_rate": 2e-05, "loss": 0.04921532, "step": 19003 }, { "epoch": 38.008, "grad_norm": 1.6150623559951782, "learning_rate": 2e-05, "loss": 0.06955804, "step": 19004 }, { "epoch": 38.01, "grad_norm": 1.1880565881729126, "learning_rate": 2e-05, "loss": 0.0646949, "step": 19005 }, { "epoch": 38.012, "grad_norm": 0.8448614478111267, "learning_rate": 2e-05, "loss": 0.03267739, "step": 19006 }, { "epoch": 38.014, "grad_norm": 0.8860496878623962, "learning_rate": 2e-05, "loss": 0.03163987, "step": 19007 }, { "epoch": 38.016, "grad_norm": 1.1608946323394775, "learning_rate": 2e-05, "loss": 0.05490214, "step": 19008 }, { "epoch": 38.018, "grad_norm": 1.4096230268478394, "learning_rate": 2e-05, "loss": 0.04647225, "step": 19009 }, { "epoch": 38.02, "grad_norm": 2.639594554901123, "learning_rate": 2e-05, "loss": 0.04155675, "step": 19010 }, { "epoch": 38.022, "grad_norm": 1.3461120128631592, "learning_rate": 2e-05, "loss": 0.05609204, "step": 19011 }, { "epoch": 38.024, "grad_norm": 4.138711929321289, "learning_rate": 2e-05, "loss": 0.04268423, "step": 19012 }, { "epoch": 38.026, "grad_norm": 0.9714207649230957, "learning_rate": 2e-05, "loss": 0.04117605, "step": 19013 }, { "epoch": 38.028, "grad_norm": 1.472511887550354, "learning_rate": 2e-05, "loss": 0.04032911, "step": 19014 }, { "epoch": 38.03, "grad_norm": 1.8279123306274414, "learning_rate": 2e-05, "loss": 0.05401701, "step": 19015 }, { "epoch": 38.032, "grad_norm": 1.3704090118408203, "learning_rate": 2e-05, "loss": 0.02831961, "step": 19016 }, { "epoch": 38.034, "grad_norm": 1.1668146848678589, "learning_rate": 2e-05, "loss": 0.05877344, "step": 19017 }, { "epoch": 38.036, "grad_norm": 1.4199328422546387, "learning_rate": 2e-05, "loss": 0.05185028, "step": 19018 }, { "epoch": 38.038, "grad_norm": 1.098205804824829, "learning_rate": 2e-05, "loss": 0.04090238, "step": 19019 }, { "epoch": 38.04, "grad_norm": 1.2233284711837769, "learning_rate": 2e-05, "loss": 0.04066335, "step": 19020 }, { "epoch": 38.042, "grad_norm": 1.3385541439056396, "learning_rate": 2e-05, "loss": 0.03994186, "step": 19021 }, { "epoch": 38.044, "grad_norm": 2.0515246391296387, "learning_rate": 2e-05, "loss": 0.04916276, "step": 19022 }, { "epoch": 38.046, "grad_norm": 1.4565155506134033, "learning_rate": 2e-05, "loss": 0.05642572, "step": 19023 }, { "epoch": 38.048, "grad_norm": 1.4643627405166626, "learning_rate": 2e-05, "loss": 0.05078375, "step": 19024 }, { "epoch": 38.05, "grad_norm": 1.2359614372253418, "learning_rate": 2e-05, "loss": 0.03910499, "step": 19025 }, { "epoch": 38.052, "grad_norm": 1.1790772676467896, "learning_rate": 2e-05, "loss": 0.02929858, "step": 19026 }, { "epoch": 38.054, "grad_norm": 1.1378750801086426, "learning_rate": 2e-05, "loss": 0.04412016, "step": 19027 }, { "epoch": 38.056, "grad_norm": 0.8290085792541504, "learning_rate": 2e-05, "loss": 0.02837101, "step": 19028 }, { "epoch": 38.058, "grad_norm": 0.962614893913269, "learning_rate": 2e-05, "loss": 0.04068268, "step": 19029 }, { "epoch": 38.06, "grad_norm": 0.8717194199562073, "learning_rate": 2e-05, "loss": 0.03346996, "step": 19030 }, { "epoch": 38.062, "grad_norm": 1.9289531707763672, "learning_rate": 2e-05, "loss": 0.04307383, "step": 19031 }, { "epoch": 38.064, "grad_norm": 0.9313304424285889, "learning_rate": 2e-05, "loss": 0.02655231, "step": 19032 }, { "epoch": 38.066, "grad_norm": 1.146187424659729, "learning_rate": 2e-05, "loss": 0.04797999, "step": 19033 }, { "epoch": 38.068, "grad_norm": 2.3054511547088623, "learning_rate": 2e-05, "loss": 0.0509394, "step": 19034 }, { "epoch": 38.07, "grad_norm": 2.425523042678833, "learning_rate": 2e-05, "loss": 0.05437211, "step": 19035 }, { "epoch": 38.072, "grad_norm": 1.0331354141235352, "learning_rate": 2e-05, "loss": 0.04365887, "step": 19036 }, { "epoch": 38.074, "grad_norm": 1.1450347900390625, "learning_rate": 2e-05, "loss": 0.05064712, "step": 19037 }, { "epoch": 38.076, "grad_norm": 0.90910804271698, "learning_rate": 2e-05, "loss": 0.02630668, "step": 19038 }, { "epoch": 38.078, "grad_norm": 1.4607967138290405, "learning_rate": 2e-05, "loss": 0.04745074, "step": 19039 }, { "epoch": 38.08, "grad_norm": 0.8371084928512573, "learning_rate": 2e-05, "loss": 0.0317363, "step": 19040 }, { "epoch": 38.082, "grad_norm": 1.3142812252044678, "learning_rate": 2e-05, "loss": 0.04080697, "step": 19041 }, { "epoch": 38.084, "grad_norm": 1.3879748582839966, "learning_rate": 2e-05, "loss": 0.06016927, "step": 19042 }, { "epoch": 38.086, "grad_norm": 1.0883716344833374, "learning_rate": 2e-05, "loss": 0.03619281, "step": 19043 }, { "epoch": 38.088, "grad_norm": 0.7856574058532715, "learning_rate": 2e-05, "loss": 0.02658314, "step": 19044 }, { "epoch": 38.09, "grad_norm": 1.0672367811203003, "learning_rate": 2e-05, "loss": 0.03681033, "step": 19045 }, { "epoch": 38.092, "grad_norm": 1.0332149267196655, "learning_rate": 2e-05, "loss": 0.03947186, "step": 19046 }, { "epoch": 38.094, "grad_norm": 1.0873006582260132, "learning_rate": 2e-05, "loss": 0.04946952, "step": 19047 }, { "epoch": 38.096, "grad_norm": 3.815877676010132, "learning_rate": 2e-05, "loss": 0.0515066, "step": 19048 }, { "epoch": 38.098, "grad_norm": 1.3976384401321411, "learning_rate": 2e-05, "loss": 0.03494392, "step": 19049 }, { "epoch": 38.1, "grad_norm": 2.8085548877716064, "learning_rate": 2e-05, "loss": 0.06091487, "step": 19050 }, { "epoch": 38.102, "grad_norm": 0.9536729454994202, "learning_rate": 2e-05, "loss": 0.03242748, "step": 19051 }, { "epoch": 38.104, "grad_norm": 1.4789749383926392, "learning_rate": 2e-05, "loss": 0.05357812, "step": 19052 }, { "epoch": 38.106, "grad_norm": 0.9844691157341003, "learning_rate": 2e-05, "loss": 0.04200847, "step": 19053 }, { "epoch": 38.108, "grad_norm": 1.1769919395446777, "learning_rate": 2e-05, "loss": 0.06283616, "step": 19054 }, { "epoch": 38.11, "grad_norm": 1.8569592237472534, "learning_rate": 2e-05, "loss": 0.04965276, "step": 19055 }, { "epoch": 38.112, "grad_norm": 1.0667264461517334, "learning_rate": 2e-05, "loss": 0.05138033, "step": 19056 }, { "epoch": 38.114, "grad_norm": 2.1954941749572754, "learning_rate": 2e-05, "loss": 0.04746103, "step": 19057 }, { "epoch": 38.116, "grad_norm": 1.7568672895431519, "learning_rate": 2e-05, "loss": 0.0384854, "step": 19058 }, { "epoch": 38.118, "grad_norm": 0.9309579133987427, "learning_rate": 2e-05, "loss": 0.0349305, "step": 19059 }, { "epoch": 38.12, "grad_norm": 0.8319924473762512, "learning_rate": 2e-05, "loss": 0.03080974, "step": 19060 }, { "epoch": 38.122, "grad_norm": 1.1133543252944946, "learning_rate": 2e-05, "loss": 0.05004583, "step": 19061 }, { "epoch": 38.124, "grad_norm": 0.9707732796669006, "learning_rate": 2e-05, "loss": 0.03214945, "step": 19062 }, { "epoch": 38.126, "grad_norm": 1.1956777572631836, "learning_rate": 2e-05, "loss": 0.04815499, "step": 19063 }, { "epoch": 38.128, "grad_norm": 1.1991287469863892, "learning_rate": 2e-05, "loss": 0.03495335, "step": 19064 }, { "epoch": 38.13, "grad_norm": 1.692772626876831, "learning_rate": 2e-05, "loss": 0.0484814, "step": 19065 }, { "epoch": 38.132, "grad_norm": 0.9562758803367615, "learning_rate": 2e-05, "loss": 0.03144904, "step": 19066 }, { "epoch": 38.134, "grad_norm": 1.093252182006836, "learning_rate": 2e-05, "loss": 0.05065017, "step": 19067 }, { "epoch": 38.136, "grad_norm": 1.3179384469985962, "learning_rate": 2e-05, "loss": 0.03091011, "step": 19068 }, { "epoch": 38.138, "grad_norm": 1.035415768623352, "learning_rate": 2e-05, "loss": 0.04592619, "step": 19069 }, { "epoch": 38.14, "grad_norm": 3.0472514629364014, "learning_rate": 2e-05, "loss": 0.0514869, "step": 19070 }, { "epoch": 38.142, "grad_norm": 0.9892259836196899, "learning_rate": 2e-05, "loss": 0.05041065, "step": 19071 }, { "epoch": 38.144, "grad_norm": 2.0199320316314697, "learning_rate": 2e-05, "loss": 0.03528522, "step": 19072 }, { "epoch": 38.146, "grad_norm": 1.3646631240844727, "learning_rate": 2e-05, "loss": 0.04432381, "step": 19073 }, { "epoch": 38.148, "grad_norm": 0.9518442749977112, "learning_rate": 2e-05, "loss": 0.03748691, "step": 19074 }, { "epoch": 38.15, "grad_norm": 1.1646300554275513, "learning_rate": 2e-05, "loss": 0.05838945, "step": 19075 }, { "epoch": 38.152, "grad_norm": 1.1369733810424805, "learning_rate": 2e-05, "loss": 0.04239263, "step": 19076 }, { "epoch": 38.154, "grad_norm": 1.321479320526123, "learning_rate": 2e-05, "loss": 0.04643835, "step": 19077 }, { "epoch": 38.156, "grad_norm": 1.0142929553985596, "learning_rate": 2e-05, "loss": 0.04267386, "step": 19078 }, { "epoch": 38.158, "grad_norm": 1.1097346544265747, "learning_rate": 2e-05, "loss": 0.04110869, "step": 19079 }, { "epoch": 38.16, "grad_norm": 1.0397744178771973, "learning_rate": 2e-05, "loss": 0.03614996, "step": 19080 }, { "epoch": 38.162, "grad_norm": 1.4321893453598022, "learning_rate": 2e-05, "loss": 0.05135652, "step": 19081 }, { "epoch": 38.164, "grad_norm": 0.9651837944984436, "learning_rate": 2e-05, "loss": 0.03508379, "step": 19082 }, { "epoch": 38.166, "grad_norm": 1.5180799961090088, "learning_rate": 2e-05, "loss": 0.04553884, "step": 19083 }, { "epoch": 38.168, "grad_norm": 0.9238044023513794, "learning_rate": 2e-05, "loss": 0.03308198, "step": 19084 }, { "epoch": 38.17, "grad_norm": 1.0907282829284668, "learning_rate": 2e-05, "loss": 0.03917833, "step": 19085 }, { "epoch": 38.172, "grad_norm": 1.3067773580551147, "learning_rate": 2e-05, "loss": 0.04263156, "step": 19086 }, { "epoch": 38.174, "grad_norm": 1.8923944234848022, "learning_rate": 2e-05, "loss": 0.04515216, "step": 19087 }, { "epoch": 38.176, "grad_norm": 2.090118646621704, "learning_rate": 2e-05, "loss": 0.03313391, "step": 19088 }, { "epoch": 38.178, "grad_norm": 1.2880207300186157, "learning_rate": 2e-05, "loss": 0.04901484, "step": 19089 }, { "epoch": 38.18, "grad_norm": 0.9398374557495117, "learning_rate": 2e-05, "loss": 0.03521935, "step": 19090 }, { "epoch": 38.182, "grad_norm": 1.1691851615905762, "learning_rate": 2e-05, "loss": 0.05414864, "step": 19091 }, { "epoch": 38.184, "grad_norm": 1.120355486869812, "learning_rate": 2e-05, "loss": 0.04552022, "step": 19092 }, { "epoch": 38.186, "grad_norm": 1.0435969829559326, "learning_rate": 2e-05, "loss": 0.02810922, "step": 19093 }, { "epoch": 38.188, "grad_norm": 1.0278534889221191, "learning_rate": 2e-05, "loss": 0.04383072, "step": 19094 }, { "epoch": 38.19, "grad_norm": 1.0870497226715088, "learning_rate": 2e-05, "loss": 0.03720466, "step": 19095 }, { "epoch": 38.192, "grad_norm": 1.2354259490966797, "learning_rate": 2e-05, "loss": 0.05530258, "step": 19096 }, { "epoch": 38.194, "grad_norm": 1.5438313484191895, "learning_rate": 2e-05, "loss": 0.04958253, "step": 19097 }, { "epoch": 38.196, "grad_norm": 3.513209581375122, "learning_rate": 2e-05, "loss": 0.07620174, "step": 19098 }, { "epoch": 38.198, "grad_norm": 1.4860045909881592, "learning_rate": 2e-05, "loss": 0.04516128, "step": 19099 }, { "epoch": 38.2, "grad_norm": 1.0657498836517334, "learning_rate": 2e-05, "loss": 0.0443478, "step": 19100 }, { "epoch": 38.202, "grad_norm": 1.2662476301193237, "learning_rate": 2e-05, "loss": 0.05510429, "step": 19101 }, { "epoch": 38.204, "grad_norm": 1.635820984840393, "learning_rate": 2e-05, "loss": 0.05987892, "step": 19102 }, { "epoch": 38.206, "grad_norm": 1.7935552597045898, "learning_rate": 2e-05, "loss": 0.04547988, "step": 19103 }, { "epoch": 38.208, "grad_norm": 1.1721686124801636, "learning_rate": 2e-05, "loss": 0.05176605, "step": 19104 }, { "epoch": 38.21, "grad_norm": 1.1774653196334839, "learning_rate": 2e-05, "loss": 0.02311687, "step": 19105 }, { "epoch": 38.212, "grad_norm": 1.0064268112182617, "learning_rate": 2e-05, "loss": 0.04805762, "step": 19106 }, { "epoch": 38.214, "grad_norm": 2.3554165363311768, "learning_rate": 2e-05, "loss": 0.06185, "step": 19107 }, { "epoch": 38.216, "grad_norm": 1.0584416389465332, "learning_rate": 2e-05, "loss": 0.04451058, "step": 19108 }, { "epoch": 38.218, "grad_norm": 1.0596531629562378, "learning_rate": 2e-05, "loss": 0.0459378, "step": 19109 }, { "epoch": 38.22, "grad_norm": 0.9666985273361206, "learning_rate": 2e-05, "loss": 0.03244024, "step": 19110 }, { "epoch": 38.222, "grad_norm": 1.5508886575698853, "learning_rate": 2e-05, "loss": 0.05416223, "step": 19111 }, { "epoch": 38.224, "grad_norm": 1.0764144659042358, "learning_rate": 2e-05, "loss": 0.03418811, "step": 19112 }, { "epoch": 38.226, "grad_norm": 1.7086256742477417, "learning_rate": 2e-05, "loss": 0.04951385, "step": 19113 }, { "epoch": 38.228, "grad_norm": 1.8031891584396362, "learning_rate": 2e-05, "loss": 0.05957458, "step": 19114 }, { "epoch": 38.23, "grad_norm": 1.714905023574829, "learning_rate": 2e-05, "loss": 0.05376598, "step": 19115 }, { "epoch": 38.232, "grad_norm": 1.2457348108291626, "learning_rate": 2e-05, "loss": 0.05603325, "step": 19116 }, { "epoch": 38.234, "grad_norm": 0.8834710717201233, "learning_rate": 2e-05, "loss": 0.02934789, "step": 19117 }, { "epoch": 38.236, "grad_norm": 6.667577266693115, "learning_rate": 2e-05, "loss": 0.04243184, "step": 19118 }, { "epoch": 38.238, "grad_norm": 0.9107360243797302, "learning_rate": 2e-05, "loss": 0.03569074, "step": 19119 }, { "epoch": 38.24, "grad_norm": 2.6655824184417725, "learning_rate": 2e-05, "loss": 0.02509627, "step": 19120 }, { "epoch": 38.242, "grad_norm": 1.613572597503662, "learning_rate": 2e-05, "loss": 0.05840549, "step": 19121 }, { "epoch": 38.244, "grad_norm": 0.9234614968299866, "learning_rate": 2e-05, "loss": 0.03753355, "step": 19122 }, { "epoch": 38.246, "grad_norm": 0.9767255783081055, "learning_rate": 2e-05, "loss": 0.03626244, "step": 19123 }, { "epoch": 38.248, "grad_norm": 1.0804781913757324, "learning_rate": 2e-05, "loss": 0.04236784, "step": 19124 }, { "epoch": 38.25, "grad_norm": 1.057296633720398, "learning_rate": 2e-05, "loss": 0.02701549, "step": 19125 }, { "epoch": 38.252, "grad_norm": 1.3602415323257446, "learning_rate": 2e-05, "loss": 0.04637405, "step": 19126 }, { "epoch": 38.254, "grad_norm": 2.341430425643921, "learning_rate": 2e-05, "loss": 0.06888908, "step": 19127 }, { "epoch": 38.256, "grad_norm": 0.8171716332435608, "learning_rate": 2e-05, "loss": 0.02416177, "step": 19128 }, { "epoch": 38.258, "grad_norm": 1.1263364553451538, "learning_rate": 2e-05, "loss": 0.04857118, "step": 19129 }, { "epoch": 38.26, "grad_norm": 1.1870951652526855, "learning_rate": 2e-05, "loss": 0.05540338, "step": 19130 }, { "epoch": 38.262, "grad_norm": 1.401424527168274, "learning_rate": 2e-05, "loss": 0.04119574, "step": 19131 }, { "epoch": 38.264, "grad_norm": 1.281165361404419, "learning_rate": 2e-05, "loss": 0.05366011, "step": 19132 }, { "epoch": 38.266, "grad_norm": 1.1923267841339111, "learning_rate": 2e-05, "loss": 0.0436106, "step": 19133 }, { "epoch": 38.268, "grad_norm": 0.9838460087776184, "learning_rate": 2e-05, "loss": 0.04238772, "step": 19134 }, { "epoch": 38.27, "grad_norm": 1.0594916343688965, "learning_rate": 2e-05, "loss": 0.04981911, "step": 19135 }, { "epoch": 38.272, "grad_norm": 1.1587008237838745, "learning_rate": 2e-05, "loss": 0.05313203, "step": 19136 }, { "epoch": 38.274, "grad_norm": 1.3160158395767212, "learning_rate": 2e-05, "loss": 0.04662284, "step": 19137 }, { "epoch": 38.276, "grad_norm": 1.1079109907150269, "learning_rate": 2e-05, "loss": 0.05073427, "step": 19138 }, { "epoch": 38.278, "grad_norm": 1.004639983177185, "learning_rate": 2e-05, "loss": 0.03746736, "step": 19139 }, { "epoch": 38.28, "grad_norm": 1.2487540245056152, "learning_rate": 2e-05, "loss": 0.04753603, "step": 19140 }, { "epoch": 38.282, "grad_norm": 1.196237325668335, "learning_rate": 2e-05, "loss": 0.04334036, "step": 19141 }, { "epoch": 38.284, "grad_norm": 1.0311989784240723, "learning_rate": 2e-05, "loss": 0.03883181, "step": 19142 }, { "epoch": 38.286, "grad_norm": 1.0751636028289795, "learning_rate": 2e-05, "loss": 0.03886398, "step": 19143 }, { "epoch": 38.288, "grad_norm": 1.1703816652297974, "learning_rate": 2e-05, "loss": 0.04902594, "step": 19144 }, { "epoch": 38.29, "grad_norm": 1.1070494651794434, "learning_rate": 2e-05, "loss": 0.04704769, "step": 19145 }, { "epoch": 38.292, "grad_norm": 1.5904055833816528, "learning_rate": 2e-05, "loss": 0.0544263, "step": 19146 }, { "epoch": 38.294, "grad_norm": 1.0136159658432007, "learning_rate": 2e-05, "loss": 0.04256488, "step": 19147 }, { "epoch": 38.296, "grad_norm": 1.0528347492218018, "learning_rate": 2e-05, "loss": 0.04483087, "step": 19148 }, { "epoch": 38.298, "grad_norm": 0.9037364721298218, "learning_rate": 2e-05, "loss": 0.03900316, "step": 19149 }, { "epoch": 38.3, "grad_norm": 1.1031533479690552, "learning_rate": 2e-05, "loss": 0.04978066, "step": 19150 }, { "epoch": 38.302, "grad_norm": 0.959299623966217, "learning_rate": 2e-05, "loss": 0.04234768, "step": 19151 }, { "epoch": 38.304, "grad_norm": 1.028848648071289, "learning_rate": 2e-05, "loss": 0.0389412, "step": 19152 }, { "epoch": 38.306, "grad_norm": 0.8427807092666626, "learning_rate": 2e-05, "loss": 0.03051137, "step": 19153 }, { "epoch": 38.308, "grad_norm": 1.2843321561813354, "learning_rate": 2e-05, "loss": 0.04645744, "step": 19154 }, { "epoch": 38.31, "grad_norm": 0.9856249094009399, "learning_rate": 2e-05, "loss": 0.04210236, "step": 19155 }, { "epoch": 38.312, "grad_norm": 1.2271077632904053, "learning_rate": 2e-05, "loss": 0.03719503, "step": 19156 }, { "epoch": 38.314, "grad_norm": 1.1385163068771362, "learning_rate": 2e-05, "loss": 0.0315531, "step": 19157 }, { "epoch": 38.316, "grad_norm": 4.996722221374512, "learning_rate": 2e-05, "loss": 0.04684191, "step": 19158 }, { "epoch": 38.318, "grad_norm": 1.3988193273544312, "learning_rate": 2e-05, "loss": 0.04813611, "step": 19159 }, { "epoch": 38.32, "grad_norm": 1.4559175968170166, "learning_rate": 2e-05, "loss": 0.05458013, "step": 19160 }, { "epoch": 38.322, "grad_norm": 1.15451180934906, "learning_rate": 2e-05, "loss": 0.05900109, "step": 19161 }, { "epoch": 38.324, "grad_norm": 1.0646992921829224, "learning_rate": 2e-05, "loss": 0.04320752, "step": 19162 }, { "epoch": 38.326, "grad_norm": 1.0288231372833252, "learning_rate": 2e-05, "loss": 0.04763383, "step": 19163 }, { "epoch": 38.328, "grad_norm": 1.3107750415802002, "learning_rate": 2e-05, "loss": 0.05102226, "step": 19164 }, { "epoch": 38.33, "grad_norm": 1.2626296281814575, "learning_rate": 2e-05, "loss": 0.0344701, "step": 19165 }, { "epoch": 38.332, "grad_norm": 0.9309131503105164, "learning_rate": 2e-05, "loss": 0.03272251, "step": 19166 }, { "epoch": 38.334, "grad_norm": 1.373098611831665, "learning_rate": 2e-05, "loss": 0.05390111, "step": 19167 }, { "epoch": 38.336, "grad_norm": 1.0912175178527832, "learning_rate": 2e-05, "loss": 0.04514299, "step": 19168 }, { "epoch": 38.338, "grad_norm": 1.0551931858062744, "learning_rate": 2e-05, "loss": 0.03682271, "step": 19169 }, { "epoch": 38.34, "grad_norm": 1.3323588371276855, "learning_rate": 2e-05, "loss": 0.04844888, "step": 19170 }, { "epoch": 38.342, "grad_norm": 1.0403633117675781, "learning_rate": 2e-05, "loss": 0.04522989, "step": 19171 }, { "epoch": 38.344, "grad_norm": 1.381260633468628, "learning_rate": 2e-05, "loss": 0.03899246, "step": 19172 }, { "epoch": 38.346, "grad_norm": 1.0804755687713623, "learning_rate": 2e-05, "loss": 0.04848024, "step": 19173 }, { "epoch": 38.348, "grad_norm": 1.0129166841506958, "learning_rate": 2e-05, "loss": 0.03700143, "step": 19174 }, { "epoch": 38.35, "grad_norm": 1.1090625524520874, "learning_rate": 2e-05, "loss": 0.02861697, "step": 19175 }, { "epoch": 38.352, "grad_norm": 1.074386477470398, "learning_rate": 2e-05, "loss": 0.03702103, "step": 19176 }, { "epoch": 38.354, "grad_norm": 1.0405902862548828, "learning_rate": 2e-05, "loss": 0.04739942, "step": 19177 }, { "epoch": 38.356, "grad_norm": 1.0555291175842285, "learning_rate": 2e-05, "loss": 0.04706403, "step": 19178 }, { "epoch": 38.358, "grad_norm": 0.9382142424583435, "learning_rate": 2e-05, "loss": 0.03866369, "step": 19179 }, { "epoch": 38.36, "grad_norm": 1.841883659362793, "learning_rate": 2e-05, "loss": 0.05673234, "step": 19180 }, { "epoch": 38.362, "grad_norm": 1.0642403364181519, "learning_rate": 2e-05, "loss": 0.05661555, "step": 19181 }, { "epoch": 38.364, "grad_norm": 1.0852385759353638, "learning_rate": 2e-05, "loss": 0.05863529, "step": 19182 }, { "epoch": 38.366, "grad_norm": 1.2981977462768555, "learning_rate": 2e-05, "loss": 0.05276012, "step": 19183 }, { "epoch": 38.368, "grad_norm": 1.2053310871124268, "learning_rate": 2e-05, "loss": 0.04411381, "step": 19184 }, { "epoch": 38.37, "grad_norm": 0.9720830917358398, "learning_rate": 2e-05, "loss": 0.03769328, "step": 19185 }, { "epoch": 38.372, "grad_norm": 0.8741254806518555, "learning_rate": 2e-05, "loss": 0.02743982, "step": 19186 }, { "epoch": 38.374, "grad_norm": 1.211116075515747, "learning_rate": 2e-05, "loss": 0.06047737, "step": 19187 }, { "epoch": 38.376, "grad_norm": 1.4047949314117432, "learning_rate": 2e-05, "loss": 0.03221051, "step": 19188 }, { "epoch": 38.378, "grad_norm": 1.1005504131317139, "learning_rate": 2e-05, "loss": 0.05221452, "step": 19189 }, { "epoch": 38.38, "grad_norm": 1.358961820602417, "learning_rate": 2e-05, "loss": 0.0560139, "step": 19190 }, { "epoch": 38.382, "grad_norm": 1.1197632551193237, "learning_rate": 2e-05, "loss": 0.03271473, "step": 19191 }, { "epoch": 38.384, "grad_norm": 1.2021021842956543, "learning_rate": 2e-05, "loss": 0.0614103, "step": 19192 }, { "epoch": 38.386, "grad_norm": 1.114485502243042, "learning_rate": 2e-05, "loss": 0.04767892, "step": 19193 }, { "epoch": 38.388, "grad_norm": 1.7689048051834106, "learning_rate": 2e-05, "loss": 0.06458927, "step": 19194 }, { "epoch": 38.39, "grad_norm": 1.4363694190979004, "learning_rate": 2e-05, "loss": 0.03300614, "step": 19195 }, { "epoch": 38.392, "grad_norm": 1.0873254537582397, "learning_rate": 2e-05, "loss": 0.04869257, "step": 19196 }, { "epoch": 38.394, "grad_norm": 1.2860513925552368, "learning_rate": 2e-05, "loss": 0.0411692, "step": 19197 }, { "epoch": 38.396, "grad_norm": 1.0096582174301147, "learning_rate": 2e-05, "loss": 0.03667551, "step": 19198 }, { "epoch": 38.398, "grad_norm": 1.5062832832336426, "learning_rate": 2e-05, "loss": 0.0409006, "step": 19199 }, { "epoch": 38.4, "grad_norm": 1.0778000354766846, "learning_rate": 2e-05, "loss": 0.04353277, "step": 19200 }, { "epoch": 38.402, "grad_norm": 1.2125732898712158, "learning_rate": 2e-05, "loss": 0.06344277, "step": 19201 }, { "epoch": 38.404, "grad_norm": 0.9937122464179993, "learning_rate": 2e-05, "loss": 0.05219335, "step": 19202 }, { "epoch": 38.406, "grad_norm": 1.0112395286560059, "learning_rate": 2e-05, "loss": 0.04649001, "step": 19203 }, { "epoch": 38.408, "grad_norm": 0.9232699275016785, "learning_rate": 2e-05, "loss": 0.03620752, "step": 19204 }, { "epoch": 38.41, "grad_norm": 4.602518081665039, "learning_rate": 2e-05, "loss": 0.05323177, "step": 19205 }, { "epoch": 38.412, "grad_norm": 1.2254105806350708, "learning_rate": 2e-05, "loss": 0.04876372, "step": 19206 }, { "epoch": 38.414, "grad_norm": 1.1117303371429443, "learning_rate": 2e-05, "loss": 0.0327386, "step": 19207 }, { "epoch": 38.416, "grad_norm": 1.401432752609253, "learning_rate": 2e-05, "loss": 0.05563305, "step": 19208 }, { "epoch": 38.418, "grad_norm": 1.1136245727539062, "learning_rate": 2e-05, "loss": 0.03039629, "step": 19209 }, { "epoch": 38.42, "grad_norm": 1.275688648223877, "learning_rate": 2e-05, "loss": 0.03978648, "step": 19210 }, { "epoch": 38.422, "grad_norm": 1.046920657157898, "learning_rate": 2e-05, "loss": 0.04119326, "step": 19211 }, { "epoch": 38.424, "grad_norm": 1.376677393913269, "learning_rate": 2e-05, "loss": 0.03641224, "step": 19212 }, { "epoch": 38.426, "grad_norm": 0.9722734689712524, "learning_rate": 2e-05, "loss": 0.03288822, "step": 19213 }, { "epoch": 38.428, "grad_norm": 1.3402529954910278, "learning_rate": 2e-05, "loss": 0.05356029, "step": 19214 }, { "epoch": 38.43, "grad_norm": 0.9656422138214111, "learning_rate": 2e-05, "loss": 0.03554551, "step": 19215 }, { "epoch": 38.432, "grad_norm": 0.9836605787277222, "learning_rate": 2e-05, "loss": 0.04253962, "step": 19216 }, { "epoch": 38.434, "grad_norm": 1.190706729888916, "learning_rate": 2e-05, "loss": 0.05271694, "step": 19217 }, { "epoch": 38.436, "grad_norm": 1.7680084705352783, "learning_rate": 2e-05, "loss": 0.04986738, "step": 19218 }, { "epoch": 38.438, "grad_norm": 1.0549100637435913, "learning_rate": 2e-05, "loss": 0.03742786, "step": 19219 }, { "epoch": 38.44, "grad_norm": 1.2387006282806396, "learning_rate": 2e-05, "loss": 0.03565872, "step": 19220 }, { "epoch": 38.442, "grad_norm": 1.1111475229263306, "learning_rate": 2e-05, "loss": 0.04186115, "step": 19221 }, { "epoch": 38.444, "grad_norm": 9.5558443069458, "learning_rate": 2e-05, "loss": 0.06209987, "step": 19222 }, { "epoch": 38.446, "grad_norm": 1.3623991012573242, "learning_rate": 2e-05, "loss": 0.0497506, "step": 19223 }, { "epoch": 38.448, "grad_norm": 1.0689963102340698, "learning_rate": 2e-05, "loss": 0.02821073, "step": 19224 }, { "epoch": 38.45, "grad_norm": 1.1239415407180786, "learning_rate": 2e-05, "loss": 0.05390476, "step": 19225 }, { "epoch": 38.452, "grad_norm": 1.7626467943191528, "learning_rate": 2e-05, "loss": 0.03833911, "step": 19226 }, { "epoch": 38.454, "grad_norm": 1.1680176258087158, "learning_rate": 2e-05, "loss": 0.04566617, "step": 19227 }, { "epoch": 38.456, "grad_norm": 1.240129828453064, "learning_rate": 2e-05, "loss": 0.04589807, "step": 19228 }, { "epoch": 38.458, "grad_norm": 1.087622880935669, "learning_rate": 2e-05, "loss": 0.03765169, "step": 19229 }, { "epoch": 38.46, "grad_norm": 1.124113917350769, "learning_rate": 2e-05, "loss": 0.0225396, "step": 19230 }, { "epoch": 38.462, "grad_norm": 0.950320839881897, "learning_rate": 2e-05, "loss": 0.03844571, "step": 19231 }, { "epoch": 38.464, "grad_norm": 1.507038950920105, "learning_rate": 2e-05, "loss": 0.04348389, "step": 19232 }, { "epoch": 38.466, "grad_norm": 0.9418063163757324, "learning_rate": 2e-05, "loss": 0.03322322, "step": 19233 }, { "epoch": 38.468, "grad_norm": 0.9347032308578491, "learning_rate": 2e-05, "loss": 0.03517582, "step": 19234 }, { "epoch": 38.47, "grad_norm": 1.4615377187728882, "learning_rate": 2e-05, "loss": 0.07190785, "step": 19235 }, { "epoch": 38.472, "grad_norm": 1.104866862297058, "learning_rate": 2e-05, "loss": 0.04152955, "step": 19236 }, { "epoch": 38.474, "grad_norm": 1.4071221351623535, "learning_rate": 2e-05, "loss": 0.03209016, "step": 19237 }, { "epoch": 38.476, "grad_norm": 1.7087613344192505, "learning_rate": 2e-05, "loss": 0.04162972, "step": 19238 }, { "epoch": 38.478, "grad_norm": 1.0330499410629272, "learning_rate": 2e-05, "loss": 0.04459159, "step": 19239 }, { "epoch": 38.48, "grad_norm": 0.9131955504417419, "learning_rate": 2e-05, "loss": 0.03749834, "step": 19240 }, { "epoch": 38.482, "grad_norm": 1.7954738140106201, "learning_rate": 2e-05, "loss": 0.04577599, "step": 19241 }, { "epoch": 38.484, "grad_norm": 1.0219485759735107, "learning_rate": 2e-05, "loss": 0.03506359, "step": 19242 }, { "epoch": 38.486, "grad_norm": 1.2258442640304565, "learning_rate": 2e-05, "loss": 0.0607101, "step": 19243 }, { "epoch": 38.488, "grad_norm": 1.0903128385543823, "learning_rate": 2e-05, "loss": 0.04742853, "step": 19244 }, { "epoch": 38.49, "grad_norm": 2.0555195808410645, "learning_rate": 2e-05, "loss": 0.03940747, "step": 19245 }, { "epoch": 38.492, "grad_norm": 1.930128574371338, "learning_rate": 2e-05, "loss": 0.06401127, "step": 19246 }, { "epoch": 38.494, "grad_norm": 1.1401786804199219, "learning_rate": 2e-05, "loss": 0.04158929, "step": 19247 }, { "epoch": 38.496, "grad_norm": 1.4530723094940186, "learning_rate": 2e-05, "loss": 0.05882206, "step": 19248 }, { "epoch": 38.498, "grad_norm": 0.9751104712486267, "learning_rate": 2e-05, "loss": 0.03726014, "step": 19249 }, { "epoch": 38.5, "grad_norm": 1.104914665222168, "learning_rate": 2e-05, "loss": 0.03955013, "step": 19250 }, { "epoch": 38.502, "grad_norm": 2.18229079246521, "learning_rate": 2e-05, "loss": 0.04199912, "step": 19251 }, { "epoch": 38.504, "grad_norm": 1.0920944213867188, "learning_rate": 2e-05, "loss": 0.04668424, "step": 19252 }, { "epoch": 38.506, "grad_norm": 1.4244221448898315, "learning_rate": 2e-05, "loss": 0.06517753, "step": 19253 }, { "epoch": 38.508, "grad_norm": 1.1719672679901123, "learning_rate": 2e-05, "loss": 0.05286897, "step": 19254 }, { "epoch": 38.51, "grad_norm": 2.209493637084961, "learning_rate": 2e-05, "loss": 0.04230143, "step": 19255 }, { "epoch": 38.512, "grad_norm": 1.3662041425704956, "learning_rate": 2e-05, "loss": 0.04110573, "step": 19256 }, { "epoch": 38.514, "grad_norm": 1.0804013013839722, "learning_rate": 2e-05, "loss": 0.04357454, "step": 19257 }, { "epoch": 38.516, "grad_norm": 1.322277307510376, "learning_rate": 2e-05, "loss": 0.04062482, "step": 19258 }, { "epoch": 38.518, "grad_norm": 8.24512004852295, "learning_rate": 2e-05, "loss": 0.05738362, "step": 19259 }, { "epoch": 38.52, "grad_norm": 1.2517683506011963, "learning_rate": 2e-05, "loss": 0.03990499, "step": 19260 }, { "epoch": 38.522, "grad_norm": 2.800863742828369, "learning_rate": 2e-05, "loss": 0.06731629, "step": 19261 }, { "epoch": 38.524, "grad_norm": 3.824667453765869, "learning_rate": 2e-05, "loss": 0.04885689, "step": 19262 }, { "epoch": 38.526, "grad_norm": 1.0599770545959473, "learning_rate": 2e-05, "loss": 0.04575955, "step": 19263 }, { "epoch": 38.528, "grad_norm": 1.1159656047821045, "learning_rate": 2e-05, "loss": 0.03959143, "step": 19264 }, { "epoch": 38.53, "grad_norm": 2.2125890254974365, "learning_rate": 2e-05, "loss": 0.04539743, "step": 19265 }, { "epoch": 38.532, "grad_norm": 1.108213186264038, "learning_rate": 2e-05, "loss": 0.04087426, "step": 19266 }, { "epoch": 38.534, "grad_norm": 1.079241156578064, "learning_rate": 2e-05, "loss": 0.04023705, "step": 19267 }, { "epoch": 38.536, "grad_norm": 2.5769498348236084, "learning_rate": 2e-05, "loss": 0.07000919, "step": 19268 }, { "epoch": 38.538, "grad_norm": 0.9308518171310425, "learning_rate": 2e-05, "loss": 0.03270379, "step": 19269 }, { "epoch": 38.54, "grad_norm": 2.1856353282928467, "learning_rate": 2e-05, "loss": 0.03065138, "step": 19270 }, { "epoch": 38.542, "grad_norm": 1.101056456565857, "learning_rate": 2e-05, "loss": 0.04193765, "step": 19271 }, { "epoch": 38.544, "grad_norm": 3.87975811958313, "learning_rate": 2e-05, "loss": 0.03239565, "step": 19272 }, { "epoch": 38.546, "grad_norm": 1.1910526752471924, "learning_rate": 2e-05, "loss": 0.03818787, "step": 19273 }, { "epoch": 38.548, "grad_norm": 0.8963406682014465, "learning_rate": 2e-05, "loss": 0.03271915, "step": 19274 }, { "epoch": 38.55, "grad_norm": 1.1577045917510986, "learning_rate": 2e-05, "loss": 0.04144982, "step": 19275 }, { "epoch": 38.552, "grad_norm": 1.4525401592254639, "learning_rate": 2e-05, "loss": 0.03169891, "step": 19276 }, { "epoch": 38.554, "grad_norm": 1.1017416715621948, "learning_rate": 2e-05, "loss": 0.04051752, "step": 19277 }, { "epoch": 38.556, "grad_norm": 1.1000051498413086, "learning_rate": 2e-05, "loss": 0.04704643, "step": 19278 }, { "epoch": 38.558, "grad_norm": 0.9752750396728516, "learning_rate": 2e-05, "loss": 0.03376535, "step": 19279 }, { "epoch": 38.56, "grad_norm": 1.38149893283844, "learning_rate": 2e-05, "loss": 0.0357708, "step": 19280 }, { "epoch": 38.562, "grad_norm": 1.0385102033615112, "learning_rate": 2e-05, "loss": 0.03646257, "step": 19281 }, { "epoch": 38.564, "grad_norm": 1.1683831214904785, "learning_rate": 2e-05, "loss": 0.05412796, "step": 19282 }, { "epoch": 38.566, "grad_norm": 1.094072699546814, "learning_rate": 2e-05, "loss": 0.04253392, "step": 19283 }, { "epoch": 38.568, "grad_norm": 0.9641970992088318, "learning_rate": 2e-05, "loss": 0.03569086, "step": 19284 }, { "epoch": 38.57, "grad_norm": 1.238326907157898, "learning_rate": 2e-05, "loss": 0.04633553, "step": 19285 }, { "epoch": 38.572, "grad_norm": 1.2132995128631592, "learning_rate": 2e-05, "loss": 0.04717588, "step": 19286 }, { "epoch": 38.574, "grad_norm": 1.0183780193328857, "learning_rate": 2e-05, "loss": 0.03563679, "step": 19287 }, { "epoch": 38.576, "grad_norm": 1.1055958271026611, "learning_rate": 2e-05, "loss": 0.04070506, "step": 19288 }, { "epoch": 38.578, "grad_norm": 1.331041693687439, "learning_rate": 2e-05, "loss": 0.05952827, "step": 19289 }, { "epoch": 38.58, "grad_norm": 1.5132273435592651, "learning_rate": 2e-05, "loss": 0.04813229, "step": 19290 }, { "epoch": 38.582, "grad_norm": 1.1615391969680786, "learning_rate": 2e-05, "loss": 0.05611588, "step": 19291 }, { "epoch": 38.584, "grad_norm": 1.071731686592102, "learning_rate": 2e-05, "loss": 0.03907536, "step": 19292 }, { "epoch": 38.586, "grad_norm": 1.0156368017196655, "learning_rate": 2e-05, "loss": 0.05412081, "step": 19293 }, { "epoch": 38.588, "grad_norm": 0.9692487120628357, "learning_rate": 2e-05, "loss": 0.03509232, "step": 19294 }, { "epoch": 38.59, "grad_norm": 1.601866364479065, "learning_rate": 2e-05, "loss": 0.05440705, "step": 19295 }, { "epoch": 38.592, "grad_norm": 1.373369812965393, "learning_rate": 2e-05, "loss": 0.05564079, "step": 19296 }, { "epoch": 38.594, "grad_norm": 1.95132577419281, "learning_rate": 2e-05, "loss": 0.0463981, "step": 19297 }, { "epoch": 38.596, "grad_norm": 0.8890618085861206, "learning_rate": 2e-05, "loss": 0.03463374, "step": 19298 }, { "epoch": 38.598, "grad_norm": 1.0668176412582397, "learning_rate": 2e-05, "loss": 0.04874559, "step": 19299 }, { "epoch": 38.6, "grad_norm": 0.9995484352111816, "learning_rate": 2e-05, "loss": 0.03593427, "step": 19300 }, { "epoch": 38.602, "grad_norm": 2.385881185531616, "learning_rate": 2e-05, "loss": 0.05592653, "step": 19301 }, { "epoch": 38.604, "grad_norm": 1.670006513595581, "learning_rate": 2e-05, "loss": 0.04213461, "step": 19302 }, { "epoch": 38.606, "grad_norm": 2.2803831100463867, "learning_rate": 2e-05, "loss": 0.05723564, "step": 19303 }, { "epoch": 38.608, "grad_norm": 1.0279027223587036, "learning_rate": 2e-05, "loss": 0.04811311, "step": 19304 }, { "epoch": 38.61, "grad_norm": 1.0963549613952637, "learning_rate": 2e-05, "loss": 0.04854143, "step": 19305 }, { "epoch": 38.612, "grad_norm": 1.0733052492141724, "learning_rate": 2e-05, "loss": 0.04882627, "step": 19306 }, { "epoch": 38.614, "grad_norm": 1.1822338104248047, "learning_rate": 2e-05, "loss": 0.04812266, "step": 19307 }, { "epoch": 38.616, "grad_norm": 1.0602601766586304, "learning_rate": 2e-05, "loss": 0.03608115, "step": 19308 }, { "epoch": 38.618, "grad_norm": 1.0106534957885742, "learning_rate": 2e-05, "loss": 0.03691368, "step": 19309 }, { "epoch": 38.62, "grad_norm": 1.0338962078094482, "learning_rate": 2e-05, "loss": 0.04008189, "step": 19310 }, { "epoch": 38.622, "grad_norm": 1.4804115295410156, "learning_rate": 2e-05, "loss": 0.06714594, "step": 19311 }, { "epoch": 38.624, "grad_norm": 0.882082998752594, "learning_rate": 2e-05, "loss": 0.03298275, "step": 19312 }, { "epoch": 38.626, "grad_norm": 0.9980672597885132, "learning_rate": 2e-05, "loss": 0.03561826, "step": 19313 }, { "epoch": 38.628, "grad_norm": 1.6455590724945068, "learning_rate": 2e-05, "loss": 0.05348264, "step": 19314 }, { "epoch": 38.63, "grad_norm": 0.9079676866531372, "learning_rate": 2e-05, "loss": 0.02981893, "step": 19315 }, { "epoch": 38.632, "grad_norm": 1.0559523105621338, "learning_rate": 2e-05, "loss": 0.05456626, "step": 19316 }, { "epoch": 38.634, "grad_norm": 0.9638671278953552, "learning_rate": 2e-05, "loss": 0.04093846, "step": 19317 }, { "epoch": 38.636, "grad_norm": 0.8385263681411743, "learning_rate": 2e-05, "loss": 0.0245842, "step": 19318 }, { "epoch": 38.638, "grad_norm": 1.0724859237670898, "learning_rate": 2e-05, "loss": 0.04252134, "step": 19319 }, { "epoch": 38.64, "grad_norm": 1.0646686553955078, "learning_rate": 2e-05, "loss": 0.03766834, "step": 19320 }, { "epoch": 38.642, "grad_norm": 0.9998976588249207, "learning_rate": 2e-05, "loss": 0.04044482, "step": 19321 }, { "epoch": 38.644, "grad_norm": 0.8522233963012695, "learning_rate": 2e-05, "loss": 0.03234659, "step": 19322 }, { "epoch": 38.646, "grad_norm": 1.3116410970687866, "learning_rate": 2e-05, "loss": 0.0679877, "step": 19323 }, { "epoch": 38.648, "grad_norm": 1.1549289226531982, "learning_rate": 2e-05, "loss": 0.03547323, "step": 19324 }, { "epoch": 38.65, "grad_norm": 1.051229476928711, "learning_rate": 2e-05, "loss": 0.04643082, "step": 19325 }, { "epoch": 38.652, "grad_norm": 1.4226535558700562, "learning_rate": 2e-05, "loss": 0.04567163, "step": 19326 }, { "epoch": 38.654, "grad_norm": 1.1238269805908203, "learning_rate": 2e-05, "loss": 0.04612422, "step": 19327 }, { "epoch": 38.656, "grad_norm": 0.9038845896720886, "learning_rate": 2e-05, "loss": 0.03385383, "step": 19328 }, { "epoch": 38.658, "grad_norm": 1.3185770511627197, "learning_rate": 2e-05, "loss": 0.04904406, "step": 19329 }, { "epoch": 38.66, "grad_norm": 1.009308099746704, "learning_rate": 2e-05, "loss": 0.03309182, "step": 19330 }, { "epoch": 38.662, "grad_norm": 1.0907920598983765, "learning_rate": 2e-05, "loss": 0.04859975, "step": 19331 }, { "epoch": 38.664, "grad_norm": 1.6204431056976318, "learning_rate": 2e-05, "loss": 0.06423303, "step": 19332 }, { "epoch": 38.666, "grad_norm": 1.0900827646255493, "learning_rate": 2e-05, "loss": 0.03651099, "step": 19333 }, { "epoch": 38.668, "grad_norm": 1.0801312923431396, "learning_rate": 2e-05, "loss": 0.04283509, "step": 19334 }, { "epoch": 38.67, "grad_norm": 1.1148172616958618, "learning_rate": 2e-05, "loss": 0.04175083, "step": 19335 }, { "epoch": 38.672, "grad_norm": 1.3437423706054688, "learning_rate": 2e-05, "loss": 0.05015115, "step": 19336 }, { "epoch": 38.674, "grad_norm": 4.181219577789307, "learning_rate": 2e-05, "loss": 0.04686987, "step": 19337 }, { "epoch": 38.676, "grad_norm": 1.5109001398086548, "learning_rate": 2e-05, "loss": 0.06039061, "step": 19338 }, { "epoch": 38.678, "grad_norm": 1.0285981893539429, "learning_rate": 2e-05, "loss": 0.04697583, "step": 19339 }, { "epoch": 38.68, "grad_norm": 1.3518024682998657, "learning_rate": 2e-05, "loss": 0.04377265, "step": 19340 }, { "epoch": 38.682, "grad_norm": 0.9568590521812439, "learning_rate": 2e-05, "loss": 0.03999636, "step": 19341 }, { "epoch": 38.684, "grad_norm": 1.028005838394165, "learning_rate": 2e-05, "loss": 0.03706386, "step": 19342 }, { "epoch": 38.686, "grad_norm": 1.0662776231765747, "learning_rate": 2e-05, "loss": 0.0290199, "step": 19343 }, { "epoch": 38.688, "grad_norm": 1.1227283477783203, "learning_rate": 2e-05, "loss": 0.04309115, "step": 19344 }, { "epoch": 38.69, "grad_norm": 2.102891445159912, "learning_rate": 2e-05, "loss": 0.05266853, "step": 19345 }, { "epoch": 38.692, "grad_norm": 1.332419991493225, "learning_rate": 2e-05, "loss": 0.05375844, "step": 19346 }, { "epoch": 38.694, "grad_norm": 1.225123643875122, "learning_rate": 2e-05, "loss": 0.06593364, "step": 19347 }, { "epoch": 38.696, "grad_norm": 1.0494664907455444, "learning_rate": 2e-05, "loss": 0.03990672, "step": 19348 }, { "epoch": 38.698, "grad_norm": 1.0080347061157227, "learning_rate": 2e-05, "loss": 0.030892, "step": 19349 }, { "epoch": 38.7, "grad_norm": 1.097760558128357, "learning_rate": 2e-05, "loss": 0.04047134, "step": 19350 }, { "epoch": 38.702, "grad_norm": 1.0847402811050415, "learning_rate": 2e-05, "loss": 0.04488046, "step": 19351 }, { "epoch": 38.704, "grad_norm": 1.0249265432357788, "learning_rate": 2e-05, "loss": 0.04965389, "step": 19352 }, { "epoch": 38.706, "grad_norm": 1.2041456699371338, "learning_rate": 2e-05, "loss": 0.05090973, "step": 19353 }, { "epoch": 38.708, "grad_norm": 1.1000607013702393, "learning_rate": 2e-05, "loss": 0.05245518, "step": 19354 }, { "epoch": 38.71, "grad_norm": 1.2249890565872192, "learning_rate": 2e-05, "loss": 0.05339778, "step": 19355 }, { "epoch": 38.712, "grad_norm": 1.2645611763000488, "learning_rate": 2e-05, "loss": 0.05283605, "step": 19356 }, { "epoch": 38.714, "grad_norm": 1.1105334758758545, "learning_rate": 2e-05, "loss": 0.03593569, "step": 19357 }, { "epoch": 38.716, "grad_norm": 1.3287653923034668, "learning_rate": 2e-05, "loss": 0.0488084, "step": 19358 }, { "epoch": 38.718, "grad_norm": 1.0509681701660156, "learning_rate": 2e-05, "loss": 0.03942019, "step": 19359 }, { "epoch": 38.72, "grad_norm": 1.7722209692001343, "learning_rate": 2e-05, "loss": 0.03066523, "step": 19360 }, { "epoch": 38.722, "grad_norm": 0.9916300773620605, "learning_rate": 2e-05, "loss": 0.0324635, "step": 19361 }, { "epoch": 38.724, "grad_norm": 1.1782196760177612, "learning_rate": 2e-05, "loss": 0.04620419, "step": 19362 }, { "epoch": 38.726, "grad_norm": 1.0661208629608154, "learning_rate": 2e-05, "loss": 0.03896091, "step": 19363 }, { "epoch": 38.728, "grad_norm": 1.0972884893417358, "learning_rate": 2e-05, "loss": 0.04361415, "step": 19364 }, { "epoch": 38.73, "grad_norm": 1.0587952136993408, "learning_rate": 2e-05, "loss": 0.05250075, "step": 19365 }, { "epoch": 38.732, "grad_norm": 0.9640065431594849, "learning_rate": 2e-05, "loss": 0.0490102, "step": 19366 }, { "epoch": 38.734, "grad_norm": 0.9679675102233887, "learning_rate": 2e-05, "loss": 0.0485023, "step": 19367 }, { "epoch": 38.736, "grad_norm": 0.9601588249206543, "learning_rate": 2e-05, "loss": 0.03191249, "step": 19368 }, { "epoch": 38.738, "grad_norm": 0.9147812128067017, "learning_rate": 2e-05, "loss": 0.03051454, "step": 19369 }, { "epoch": 38.74, "grad_norm": 1.3279993534088135, "learning_rate": 2e-05, "loss": 0.04855303, "step": 19370 }, { "epoch": 38.742, "grad_norm": 1.1139447689056396, "learning_rate": 2e-05, "loss": 0.0379698, "step": 19371 }, { "epoch": 38.744, "grad_norm": 3.099407911300659, "learning_rate": 2e-05, "loss": 0.05739495, "step": 19372 }, { "epoch": 38.746, "grad_norm": 1.2643882036209106, "learning_rate": 2e-05, "loss": 0.03910752, "step": 19373 }, { "epoch": 38.748, "grad_norm": 1.5956270694732666, "learning_rate": 2e-05, "loss": 0.03876605, "step": 19374 }, { "epoch": 38.75, "grad_norm": 1.0964267253875732, "learning_rate": 2e-05, "loss": 0.05385233, "step": 19375 }, { "epoch": 38.752, "grad_norm": 1.061686635017395, "learning_rate": 2e-05, "loss": 0.04640777, "step": 19376 }, { "epoch": 38.754, "grad_norm": 1.0934256315231323, "learning_rate": 2e-05, "loss": 0.0477779, "step": 19377 }, { "epoch": 38.756, "grad_norm": 1.264344573020935, "learning_rate": 2e-05, "loss": 0.04320396, "step": 19378 }, { "epoch": 38.758, "grad_norm": 0.8908719420433044, "learning_rate": 2e-05, "loss": 0.03581595, "step": 19379 }, { "epoch": 38.76, "grad_norm": 0.8562000393867493, "learning_rate": 2e-05, "loss": 0.03314196, "step": 19380 }, { "epoch": 38.762, "grad_norm": 1.3381973505020142, "learning_rate": 2e-05, "loss": 0.0530773, "step": 19381 }, { "epoch": 38.764, "grad_norm": 1.1104871034622192, "learning_rate": 2e-05, "loss": 0.03964019, "step": 19382 }, { "epoch": 38.766, "grad_norm": 0.8642142415046692, "learning_rate": 2e-05, "loss": 0.02890992, "step": 19383 }, { "epoch": 38.768, "grad_norm": 1.1693055629730225, "learning_rate": 2e-05, "loss": 0.05643238, "step": 19384 }, { "epoch": 38.77, "grad_norm": 1.1054975986480713, "learning_rate": 2e-05, "loss": 0.04296981, "step": 19385 }, { "epoch": 38.772, "grad_norm": 0.9312968850135803, "learning_rate": 2e-05, "loss": 0.03903844, "step": 19386 }, { "epoch": 38.774, "grad_norm": 0.9444169998168945, "learning_rate": 2e-05, "loss": 0.03578991, "step": 19387 }, { "epoch": 38.776, "grad_norm": 1.4450087547302246, "learning_rate": 2e-05, "loss": 0.05501417, "step": 19388 }, { "epoch": 38.778, "grad_norm": 0.8602413535118103, "learning_rate": 2e-05, "loss": 0.02327504, "step": 19389 }, { "epoch": 38.78, "grad_norm": 0.9514860510826111, "learning_rate": 2e-05, "loss": 0.03314387, "step": 19390 }, { "epoch": 38.782, "grad_norm": 1.8164118528366089, "learning_rate": 2e-05, "loss": 0.05498244, "step": 19391 }, { "epoch": 38.784, "grad_norm": 1.0155646800994873, "learning_rate": 2e-05, "loss": 0.03623019, "step": 19392 }, { "epoch": 38.786, "grad_norm": 1.072582483291626, "learning_rate": 2e-05, "loss": 0.04183128, "step": 19393 }, { "epoch": 38.788, "grad_norm": 1.230427622795105, "learning_rate": 2e-05, "loss": 0.05106575, "step": 19394 }, { "epoch": 38.79, "grad_norm": 1.2082676887512207, "learning_rate": 2e-05, "loss": 0.04019454, "step": 19395 }, { "epoch": 38.792, "grad_norm": 0.9523348212242126, "learning_rate": 2e-05, "loss": 0.03712245, "step": 19396 }, { "epoch": 38.794, "grad_norm": 1.1635305881500244, "learning_rate": 2e-05, "loss": 0.06478782, "step": 19397 }, { "epoch": 38.796, "grad_norm": 1.0057049989700317, "learning_rate": 2e-05, "loss": 0.0395334, "step": 19398 }, { "epoch": 38.798, "grad_norm": 4.972280502319336, "learning_rate": 2e-05, "loss": 0.06964958, "step": 19399 }, { "epoch": 38.8, "grad_norm": 1.0035690069198608, "learning_rate": 2e-05, "loss": 0.0374224, "step": 19400 }, { "epoch": 38.802, "grad_norm": 1.2331874370574951, "learning_rate": 2e-05, "loss": 0.06056982, "step": 19401 }, { "epoch": 38.804, "grad_norm": 0.9898601770401001, "learning_rate": 2e-05, "loss": 0.0408797, "step": 19402 }, { "epoch": 38.806, "grad_norm": 1.6647192239761353, "learning_rate": 2e-05, "loss": 0.05810039, "step": 19403 }, { "epoch": 38.808, "grad_norm": 1.5244585275650024, "learning_rate": 2e-05, "loss": 0.04567143, "step": 19404 }, { "epoch": 38.81, "grad_norm": 1.7174359560012817, "learning_rate": 2e-05, "loss": 0.04539747, "step": 19405 }, { "epoch": 38.812, "grad_norm": 1.0813353061676025, "learning_rate": 2e-05, "loss": 0.05624075, "step": 19406 }, { "epoch": 38.814, "grad_norm": 1.0967518091201782, "learning_rate": 2e-05, "loss": 0.04889642, "step": 19407 }, { "epoch": 38.816, "grad_norm": 1.3253763914108276, "learning_rate": 2e-05, "loss": 0.04598528, "step": 19408 }, { "epoch": 38.818, "grad_norm": 1.9320805072784424, "learning_rate": 2e-05, "loss": 0.044526, "step": 19409 }, { "epoch": 38.82, "grad_norm": 1.460055947303772, "learning_rate": 2e-05, "loss": 0.04301167, "step": 19410 }, { "epoch": 38.822, "grad_norm": 0.9948683381080627, "learning_rate": 2e-05, "loss": 0.03881852, "step": 19411 }, { "epoch": 38.824, "grad_norm": 1.8715769052505493, "learning_rate": 2e-05, "loss": 0.06176414, "step": 19412 }, { "epoch": 38.826, "grad_norm": 0.9490826725959778, "learning_rate": 2e-05, "loss": 0.03033833, "step": 19413 }, { "epoch": 38.828, "grad_norm": 1.244544267654419, "learning_rate": 2e-05, "loss": 0.03608648, "step": 19414 }, { "epoch": 38.83, "grad_norm": 1.0204013586044312, "learning_rate": 2e-05, "loss": 0.0391295, "step": 19415 }, { "epoch": 38.832, "grad_norm": 1.1057021617889404, "learning_rate": 2e-05, "loss": 0.03831366, "step": 19416 }, { "epoch": 38.834, "grad_norm": 0.8360907435417175, "learning_rate": 2e-05, "loss": 0.03464265, "step": 19417 }, { "epoch": 38.836, "grad_norm": 1.1959789991378784, "learning_rate": 2e-05, "loss": 0.0426341, "step": 19418 }, { "epoch": 38.838, "grad_norm": 1.7435823678970337, "learning_rate": 2e-05, "loss": 0.03919384, "step": 19419 }, { "epoch": 38.84, "grad_norm": 1.206491470336914, "learning_rate": 2e-05, "loss": 0.05015547, "step": 19420 }, { "epoch": 38.842, "grad_norm": 1.2069532871246338, "learning_rate": 2e-05, "loss": 0.05170257, "step": 19421 }, { "epoch": 38.844, "grad_norm": 0.9320435523986816, "learning_rate": 2e-05, "loss": 0.03840875, "step": 19422 }, { "epoch": 38.846, "grad_norm": 1.1888455152511597, "learning_rate": 2e-05, "loss": 0.03450851, "step": 19423 }, { "epoch": 38.848, "grad_norm": 1.221474051475525, "learning_rate": 2e-05, "loss": 0.03705349, "step": 19424 }, { "epoch": 38.85, "grad_norm": 1.1275914907455444, "learning_rate": 2e-05, "loss": 0.02588169, "step": 19425 }, { "epoch": 38.852, "grad_norm": 1.041494369506836, "learning_rate": 2e-05, "loss": 0.03554275, "step": 19426 }, { "epoch": 38.854, "grad_norm": 1.9732393026351929, "learning_rate": 2e-05, "loss": 0.0430849, "step": 19427 }, { "epoch": 38.856, "grad_norm": 0.9454289078712463, "learning_rate": 2e-05, "loss": 0.04227802, "step": 19428 }, { "epoch": 38.858, "grad_norm": 3.304931402206421, "learning_rate": 2e-05, "loss": 0.0448536, "step": 19429 }, { "epoch": 38.86, "grad_norm": 1.2048399448394775, "learning_rate": 2e-05, "loss": 0.0384104, "step": 19430 }, { "epoch": 38.862, "grad_norm": 0.9292692542076111, "learning_rate": 2e-05, "loss": 0.02343753, "step": 19431 }, { "epoch": 38.864, "grad_norm": 0.9677687287330627, "learning_rate": 2e-05, "loss": 0.03706508, "step": 19432 }, { "epoch": 38.866, "grad_norm": 1.823501467704773, "learning_rate": 2e-05, "loss": 0.0490564, "step": 19433 }, { "epoch": 38.868, "grad_norm": 1.0649175643920898, "learning_rate": 2e-05, "loss": 0.04258116, "step": 19434 }, { "epoch": 38.87, "grad_norm": 1.1792458295822144, "learning_rate": 2e-05, "loss": 0.04708882, "step": 19435 }, { "epoch": 38.872, "grad_norm": 2.8755428791046143, "learning_rate": 2e-05, "loss": 0.0499303, "step": 19436 }, { "epoch": 38.874, "grad_norm": 1.249969244003296, "learning_rate": 2e-05, "loss": 0.05888974, "step": 19437 }, { "epoch": 38.876, "grad_norm": 3.622722625732422, "learning_rate": 2e-05, "loss": 0.05942838, "step": 19438 }, { "epoch": 38.878, "grad_norm": 0.9560860991477966, "learning_rate": 2e-05, "loss": 0.02706887, "step": 19439 }, { "epoch": 38.88, "grad_norm": 1.1858021020889282, "learning_rate": 2e-05, "loss": 0.04468223, "step": 19440 }, { "epoch": 38.882, "grad_norm": 1.09776771068573, "learning_rate": 2e-05, "loss": 0.03100686, "step": 19441 }, { "epoch": 38.884, "grad_norm": 1.03327476978302, "learning_rate": 2e-05, "loss": 0.03282113, "step": 19442 }, { "epoch": 38.886, "grad_norm": 0.8793101906776428, "learning_rate": 2e-05, "loss": 0.03897075, "step": 19443 }, { "epoch": 38.888, "grad_norm": 0.9808013439178467, "learning_rate": 2e-05, "loss": 0.03626175, "step": 19444 }, { "epoch": 38.89, "grad_norm": 1.9631421566009521, "learning_rate": 2e-05, "loss": 0.03005378, "step": 19445 }, { "epoch": 38.892, "grad_norm": 1.3513725996017456, "learning_rate": 2e-05, "loss": 0.06195151, "step": 19446 }, { "epoch": 38.894, "grad_norm": 1.0996699333190918, "learning_rate": 2e-05, "loss": 0.03472177, "step": 19447 }, { "epoch": 38.896, "grad_norm": 1.6526262760162354, "learning_rate": 2e-05, "loss": 0.0470412, "step": 19448 }, { "epoch": 38.898, "grad_norm": 1.188834309577942, "learning_rate": 2e-05, "loss": 0.05335227, "step": 19449 }, { "epoch": 38.9, "grad_norm": 0.89872145652771, "learning_rate": 2e-05, "loss": 0.03062792, "step": 19450 }, { "epoch": 38.902, "grad_norm": 1.1985739469528198, "learning_rate": 2e-05, "loss": 0.05644695, "step": 19451 }, { "epoch": 38.904, "grad_norm": 1.271598219871521, "learning_rate": 2e-05, "loss": 0.04005952, "step": 19452 }, { "epoch": 38.906, "grad_norm": 1.0948083400726318, "learning_rate": 2e-05, "loss": 0.04059462, "step": 19453 }, { "epoch": 38.908, "grad_norm": 1.0310593843460083, "learning_rate": 2e-05, "loss": 0.0409682, "step": 19454 }, { "epoch": 38.91, "grad_norm": 1.0133187770843506, "learning_rate": 2e-05, "loss": 0.03088975, "step": 19455 }, { "epoch": 38.912, "grad_norm": 1.0480992794036865, "learning_rate": 2e-05, "loss": 0.03860742, "step": 19456 }, { "epoch": 38.914, "grad_norm": 1.1045564413070679, "learning_rate": 2e-05, "loss": 0.05509812, "step": 19457 }, { "epoch": 38.916, "grad_norm": 1.1643575429916382, "learning_rate": 2e-05, "loss": 0.04056837, "step": 19458 }, { "epoch": 38.918, "grad_norm": 0.9490357637405396, "learning_rate": 2e-05, "loss": 0.0363201, "step": 19459 }, { "epoch": 38.92, "grad_norm": 2.307276964187622, "learning_rate": 2e-05, "loss": 0.0493319, "step": 19460 }, { "epoch": 38.922, "grad_norm": 0.9341371655464172, "learning_rate": 2e-05, "loss": 0.03447349, "step": 19461 }, { "epoch": 38.924, "grad_norm": 1.2869606018066406, "learning_rate": 2e-05, "loss": 0.06975304, "step": 19462 }, { "epoch": 38.926, "grad_norm": 1.735102891921997, "learning_rate": 2e-05, "loss": 0.03511361, "step": 19463 }, { "epoch": 38.928, "grad_norm": 0.8264475464820862, "learning_rate": 2e-05, "loss": 0.02556785, "step": 19464 }, { "epoch": 38.93, "grad_norm": 1.0930505990982056, "learning_rate": 2e-05, "loss": 0.03736534, "step": 19465 }, { "epoch": 38.932, "grad_norm": 1.5734564065933228, "learning_rate": 2e-05, "loss": 0.05633171, "step": 19466 }, { "epoch": 38.934, "grad_norm": 2.77915358543396, "learning_rate": 2e-05, "loss": 0.04652165, "step": 19467 }, { "epoch": 38.936, "grad_norm": 1.1311591863632202, "learning_rate": 2e-05, "loss": 0.03719268, "step": 19468 }, { "epoch": 38.938, "grad_norm": 0.9112476706504822, "learning_rate": 2e-05, "loss": 0.03151006, "step": 19469 }, { "epoch": 38.94, "grad_norm": 1.0156446695327759, "learning_rate": 2e-05, "loss": 0.035737, "step": 19470 }, { "epoch": 38.942, "grad_norm": 0.9524457454681396, "learning_rate": 2e-05, "loss": 0.03295144, "step": 19471 }, { "epoch": 38.944, "grad_norm": 1.012307047843933, "learning_rate": 2e-05, "loss": 0.04285971, "step": 19472 }, { "epoch": 38.946, "grad_norm": 1.062119483947754, "learning_rate": 2e-05, "loss": 0.04979744, "step": 19473 }, { "epoch": 38.948, "grad_norm": 2.3729474544525146, "learning_rate": 2e-05, "loss": 0.04409181, "step": 19474 }, { "epoch": 38.95, "grad_norm": 1.2182929515838623, "learning_rate": 2e-05, "loss": 0.04355058, "step": 19475 }, { "epoch": 38.952, "grad_norm": 1.1094915866851807, "learning_rate": 2e-05, "loss": 0.03414284, "step": 19476 }, { "epoch": 38.954, "grad_norm": 0.8603318333625793, "learning_rate": 2e-05, "loss": 0.03019584, "step": 19477 }, { "epoch": 38.956, "grad_norm": 0.9269896745681763, "learning_rate": 2e-05, "loss": 0.03276512, "step": 19478 }, { "epoch": 38.958, "grad_norm": 1.4540586471557617, "learning_rate": 2e-05, "loss": 0.06042841, "step": 19479 }, { "epoch": 38.96, "grad_norm": 2.0777792930603027, "learning_rate": 2e-05, "loss": 0.04476339, "step": 19480 }, { "epoch": 38.962, "grad_norm": 1.1270149946212769, "learning_rate": 2e-05, "loss": 0.04692201, "step": 19481 }, { "epoch": 38.964, "grad_norm": 1.1485213041305542, "learning_rate": 2e-05, "loss": 0.03909953, "step": 19482 }, { "epoch": 38.966, "grad_norm": 1.2533081769943237, "learning_rate": 2e-05, "loss": 0.05358913, "step": 19483 }, { "epoch": 38.968, "grad_norm": 2.505784273147583, "learning_rate": 2e-05, "loss": 0.05908766, "step": 19484 }, { "epoch": 38.97, "grad_norm": 1.1840784549713135, "learning_rate": 2e-05, "loss": 0.05253317, "step": 19485 }, { "epoch": 38.972, "grad_norm": 1.205790400505066, "learning_rate": 2e-05, "loss": 0.04245798, "step": 19486 }, { "epoch": 38.974, "grad_norm": 1.2023859024047852, "learning_rate": 2e-05, "loss": 0.05430716, "step": 19487 }, { "epoch": 38.976, "grad_norm": 1.0986604690551758, "learning_rate": 2e-05, "loss": 0.0496498, "step": 19488 }, { "epoch": 38.978, "grad_norm": 1.2010287046432495, "learning_rate": 2e-05, "loss": 0.03882793, "step": 19489 }, { "epoch": 38.98, "grad_norm": 1.0033947229385376, "learning_rate": 2e-05, "loss": 0.03239113, "step": 19490 }, { "epoch": 38.982, "grad_norm": 1.1096763610839844, "learning_rate": 2e-05, "loss": 0.04537747, "step": 19491 }, { "epoch": 38.984, "grad_norm": 2.3018782138824463, "learning_rate": 2e-05, "loss": 0.07274872, "step": 19492 }, { "epoch": 38.986, "grad_norm": 1.8061745166778564, "learning_rate": 2e-05, "loss": 0.06697616, "step": 19493 }, { "epoch": 38.988, "grad_norm": 1.0990080833435059, "learning_rate": 2e-05, "loss": 0.04230041, "step": 19494 }, { "epoch": 38.99, "grad_norm": 1.3976279497146606, "learning_rate": 2e-05, "loss": 0.04429794, "step": 19495 }, { "epoch": 38.992, "grad_norm": 1.0603405237197876, "learning_rate": 2e-05, "loss": 0.04230534, "step": 19496 }, { "epoch": 38.994, "grad_norm": 1.284908413887024, "learning_rate": 2e-05, "loss": 0.04383961, "step": 19497 }, { "epoch": 38.996, "grad_norm": 3.1431775093078613, "learning_rate": 2e-05, "loss": 0.05683126, "step": 19498 }, { "epoch": 38.998, "grad_norm": 0.9128478169441223, "learning_rate": 2e-05, "loss": 0.03710816, "step": 19499 }, { "epoch": 39.0, "grad_norm": 1.276460886001587, "learning_rate": 2e-05, "loss": 0.04558346, "step": 19500 }, { "epoch": 39.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9780439121756487, "Equal_1": 0.998, "Equal_2": 0.9760479041916168, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9940119760479041, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.994, "Perpendicular_1": 0.996, "Perpendicular_2": 0.996, "Perpendicular_3": 0.875751503006012, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.984, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 227.9955, "eval_samples_per_second": 46.054, "eval_steps_per_second": 0.921, "step": 19500 }, { "epoch": 39.002, "grad_norm": 1.2714347839355469, "learning_rate": 2e-05, "loss": 0.04300114, "step": 19501 }, { "epoch": 39.004, "grad_norm": 1.0751265287399292, "learning_rate": 2e-05, "loss": 0.03841715, "step": 19502 }, { "epoch": 39.006, "grad_norm": 1.1400203704833984, "learning_rate": 2e-05, "loss": 0.04756, "step": 19503 }, { "epoch": 39.008, "grad_norm": 1.38941490650177, "learning_rate": 2e-05, "loss": 0.0549323, "step": 19504 }, { "epoch": 39.01, "grad_norm": 1.1502317190170288, "learning_rate": 2e-05, "loss": 0.03302306, "step": 19505 }, { "epoch": 39.012, "grad_norm": 2.8259105682373047, "learning_rate": 2e-05, "loss": 0.05374134, "step": 19506 }, { "epoch": 39.014, "grad_norm": 1.0943992137908936, "learning_rate": 2e-05, "loss": 0.04286569, "step": 19507 }, { "epoch": 39.016, "grad_norm": 1.1421023607254028, "learning_rate": 2e-05, "loss": 0.03799384, "step": 19508 }, { "epoch": 39.018, "grad_norm": 1.3032474517822266, "learning_rate": 2e-05, "loss": 0.04835647, "step": 19509 }, { "epoch": 39.02, "grad_norm": 0.9900496602058411, "learning_rate": 2e-05, "loss": 0.03727106, "step": 19510 }, { "epoch": 39.022, "grad_norm": 1.1914006471633911, "learning_rate": 2e-05, "loss": 0.04466703, "step": 19511 }, { "epoch": 39.024, "grad_norm": 1.3005322217941284, "learning_rate": 2e-05, "loss": 0.04008747, "step": 19512 }, { "epoch": 39.026, "grad_norm": 1.909360647201538, "learning_rate": 2e-05, "loss": 0.073837, "step": 19513 }, { "epoch": 39.028, "grad_norm": 1.0366175174713135, "learning_rate": 2e-05, "loss": 0.05594283, "step": 19514 }, { "epoch": 39.03, "grad_norm": 1.0882703065872192, "learning_rate": 2e-05, "loss": 0.04728037, "step": 19515 }, { "epoch": 39.032, "grad_norm": 0.8995373845100403, "learning_rate": 2e-05, "loss": 0.03831483, "step": 19516 }, { "epoch": 39.034, "grad_norm": 1.3030656576156616, "learning_rate": 2e-05, "loss": 0.04437663, "step": 19517 }, { "epoch": 39.036, "grad_norm": 1.0662935972213745, "learning_rate": 2e-05, "loss": 0.0328241, "step": 19518 }, { "epoch": 39.038, "grad_norm": 0.8671280145645142, "learning_rate": 2e-05, "loss": 0.0283519, "step": 19519 }, { "epoch": 39.04, "grad_norm": 1.1673743724822998, "learning_rate": 2e-05, "loss": 0.0414161, "step": 19520 }, { "epoch": 39.042, "grad_norm": 1.2139179706573486, "learning_rate": 2e-05, "loss": 0.04635594, "step": 19521 }, { "epoch": 39.044, "grad_norm": 0.9512651562690735, "learning_rate": 2e-05, "loss": 0.03813294, "step": 19522 }, { "epoch": 39.046, "grad_norm": 1.8997867107391357, "learning_rate": 2e-05, "loss": 0.05606607, "step": 19523 }, { "epoch": 39.048, "grad_norm": 1.0596754550933838, "learning_rate": 2e-05, "loss": 0.04884341, "step": 19524 }, { "epoch": 39.05, "grad_norm": 1.1825029850006104, "learning_rate": 2e-05, "loss": 0.03288661, "step": 19525 }, { "epoch": 39.052, "grad_norm": 1.3473883867263794, "learning_rate": 2e-05, "loss": 0.0437719, "step": 19526 }, { "epoch": 39.054, "grad_norm": 0.9618358612060547, "learning_rate": 2e-05, "loss": 0.04271593, "step": 19527 }, { "epoch": 39.056, "grad_norm": 1.1144673824310303, "learning_rate": 2e-05, "loss": 0.04895557, "step": 19528 }, { "epoch": 39.058, "grad_norm": 3.2009544372558594, "learning_rate": 2e-05, "loss": 0.05361964, "step": 19529 }, { "epoch": 39.06, "grad_norm": 1.7477309703826904, "learning_rate": 2e-05, "loss": 0.03844352, "step": 19530 }, { "epoch": 39.062, "grad_norm": 1.0468238592147827, "learning_rate": 2e-05, "loss": 0.0461535, "step": 19531 }, { "epoch": 39.064, "grad_norm": 0.9263973832130432, "learning_rate": 2e-05, "loss": 0.02911148, "step": 19532 }, { "epoch": 39.066, "grad_norm": 1.0982331037521362, "learning_rate": 2e-05, "loss": 0.04957676, "step": 19533 }, { "epoch": 39.068, "grad_norm": 1.115716576576233, "learning_rate": 2e-05, "loss": 0.04164068, "step": 19534 }, { "epoch": 39.07, "grad_norm": 0.9107357263565063, "learning_rate": 2e-05, "loss": 0.03207695, "step": 19535 }, { "epoch": 39.072, "grad_norm": 1.0058120489120483, "learning_rate": 2e-05, "loss": 0.0498001, "step": 19536 }, { "epoch": 39.074, "grad_norm": 1.0123580694198608, "learning_rate": 2e-05, "loss": 0.04986209, "step": 19537 }, { "epoch": 39.076, "grad_norm": 0.9739790558815002, "learning_rate": 2e-05, "loss": 0.03777283, "step": 19538 }, { "epoch": 39.078, "grad_norm": 0.8499792218208313, "learning_rate": 2e-05, "loss": 0.03196893, "step": 19539 }, { "epoch": 39.08, "grad_norm": 1.4085592031478882, "learning_rate": 2e-05, "loss": 0.04340118, "step": 19540 }, { "epoch": 39.082, "grad_norm": 1.3276405334472656, "learning_rate": 2e-05, "loss": 0.07472995, "step": 19541 }, { "epoch": 39.084, "grad_norm": 0.946808934211731, "learning_rate": 2e-05, "loss": 0.03309186, "step": 19542 }, { "epoch": 39.086, "grad_norm": 1.3628284931182861, "learning_rate": 2e-05, "loss": 0.04011816, "step": 19543 }, { "epoch": 39.088, "grad_norm": 1.0845359563827515, "learning_rate": 2e-05, "loss": 0.03995037, "step": 19544 }, { "epoch": 39.09, "grad_norm": 1.0413905382156372, "learning_rate": 2e-05, "loss": 0.0328539, "step": 19545 }, { "epoch": 39.092, "grad_norm": 1.0118858814239502, "learning_rate": 2e-05, "loss": 0.03670184, "step": 19546 }, { "epoch": 39.094, "grad_norm": 1.3958373069763184, "learning_rate": 2e-05, "loss": 0.05120326, "step": 19547 }, { "epoch": 39.096, "grad_norm": 0.9187886714935303, "learning_rate": 2e-05, "loss": 0.03461922, "step": 19548 }, { "epoch": 39.098, "grad_norm": 0.9393334984779358, "learning_rate": 2e-05, "loss": 0.03134007, "step": 19549 }, { "epoch": 39.1, "grad_norm": 4.266992568969727, "learning_rate": 2e-05, "loss": 0.04023593, "step": 19550 }, { "epoch": 39.102, "grad_norm": 1.163954496383667, "learning_rate": 2e-05, "loss": 0.04735773, "step": 19551 }, { "epoch": 39.104, "grad_norm": 1.1031147241592407, "learning_rate": 2e-05, "loss": 0.03149136, "step": 19552 }, { "epoch": 39.106, "grad_norm": 1.1919732093811035, "learning_rate": 2e-05, "loss": 0.04811309, "step": 19553 }, { "epoch": 39.108, "grad_norm": 1.4298028945922852, "learning_rate": 2e-05, "loss": 0.04341234, "step": 19554 }, { "epoch": 39.11, "grad_norm": 1.0870345830917358, "learning_rate": 2e-05, "loss": 0.04019815, "step": 19555 }, { "epoch": 39.112, "grad_norm": 1.4308141469955444, "learning_rate": 2e-05, "loss": 0.04626186, "step": 19556 }, { "epoch": 39.114, "grad_norm": 1.073665738105774, "learning_rate": 2e-05, "loss": 0.0351837, "step": 19557 }, { "epoch": 39.116, "grad_norm": 1.3399205207824707, "learning_rate": 2e-05, "loss": 0.05304701, "step": 19558 }, { "epoch": 39.118, "grad_norm": 1.2672563791275024, "learning_rate": 2e-05, "loss": 0.0569851, "step": 19559 }, { "epoch": 39.12, "grad_norm": 0.8453909158706665, "learning_rate": 2e-05, "loss": 0.03032672, "step": 19560 }, { "epoch": 39.122, "grad_norm": 1.124037504196167, "learning_rate": 2e-05, "loss": 0.03775398, "step": 19561 }, { "epoch": 39.124, "grad_norm": 0.9561428427696228, "learning_rate": 2e-05, "loss": 0.04049622, "step": 19562 }, { "epoch": 39.126, "grad_norm": 1.1908433437347412, "learning_rate": 2e-05, "loss": 0.05023205, "step": 19563 }, { "epoch": 39.128, "grad_norm": 1.2511805295944214, "learning_rate": 2e-05, "loss": 0.05918074, "step": 19564 }, { "epoch": 39.13, "grad_norm": 1.2420400381088257, "learning_rate": 2e-05, "loss": 0.0524884, "step": 19565 }, { "epoch": 39.132, "grad_norm": 1.3621375560760498, "learning_rate": 2e-05, "loss": 0.04135905, "step": 19566 }, { "epoch": 39.134, "grad_norm": 1.0503735542297363, "learning_rate": 2e-05, "loss": 0.04336233, "step": 19567 }, { "epoch": 39.136, "grad_norm": 1.0883862972259521, "learning_rate": 2e-05, "loss": 0.04361054, "step": 19568 }, { "epoch": 39.138, "grad_norm": 1.4211379289627075, "learning_rate": 2e-05, "loss": 0.05795705, "step": 19569 }, { "epoch": 39.14, "grad_norm": 0.8153306841850281, "learning_rate": 2e-05, "loss": 0.02727619, "step": 19570 }, { "epoch": 39.142, "grad_norm": 1.0398224592208862, "learning_rate": 2e-05, "loss": 0.04660147, "step": 19571 }, { "epoch": 39.144, "grad_norm": 1.0585938692092896, "learning_rate": 2e-05, "loss": 0.04393147, "step": 19572 }, { "epoch": 39.146, "grad_norm": 0.8796592950820923, "learning_rate": 2e-05, "loss": 0.03518384, "step": 19573 }, { "epoch": 39.148, "grad_norm": 1.160361647605896, "learning_rate": 2e-05, "loss": 0.03703085, "step": 19574 }, { "epoch": 39.15, "grad_norm": 1.0942624807357788, "learning_rate": 2e-05, "loss": 0.03199557, "step": 19575 }, { "epoch": 39.152, "grad_norm": 1.1546415090560913, "learning_rate": 2e-05, "loss": 0.0371153, "step": 19576 }, { "epoch": 39.154, "grad_norm": 1.41097092628479, "learning_rate": 2e-05, "loss": 0.04051156, "step": 19577 }, { "epoch": 39.156, "grad_norm": 1.2237451076507568, "learning_rate": 2e-05, "loss": 0.04790863, "step": 19578 }, { "epoch": 39.158, "grad_norm": 0.9968982338905334, "learning_rate": 2e-05, "loss": 0.03655846, "step": 19579 }, { "epoch": 39.16, "grad_norm": 1.0683836936950684, "learning_rate": 2e-05, "loss": 0.03897025, "step": 19580 }, { "epoch": 39.162, "grad_norm": 1.1533839702606201, "learning_rate": 2e-05, "loss": 0.05251057, "step": 19581 }, { "epoch": 39.164, "grad_norm": 0.9645445942878723, "learning_rate": 2e-05, "loss": 0.03381518, "step": 19582 }, { "epoch": 39.166, "grad_norm": 1.1317728757858276, "learning_rate": 2e-05, "loss": 0.05725499, "step": 19583 }, { "epoch": 39.168, "grad_norm": 0.7362602353096008, "learning_rate": 2e-05, "loss": 0.02023915, "step": 19584 }, { "epoch": 39.17, "grad_norm": 1.0119396448135376, "learning_rate": 2e-05, "loss": 0.0383563, "step": 19585 }, { "epoch": 39.172, "grad_norm": 1.0122408866882324, "learning_rate": 2e-05, "loss": 0.05013464, "step": 19586 }, { "epoch": 39.174, "grad_norm": 1.0416827201843262, "learning_rate": 2e-05, "loss": 0.03425042, "step": 19587 }, { "epoch": 39.176, "grad_norm": 1.2084214687347412, "learning_rate": 2e-05, "loss": 0.03768454, "step": 19588 }, { "epoch": 39.178, "grad_norm": 1.2101374864578247, "learning_rate": 2e-05, "loss": 0.0475283, "step": 19589 }, { "epoch": 39.18, "grad_norm": 0.9598370790481567, "learning_rate": 2e-05, "loss": 0.03470413, "step": 19590 }, { "epoch": 39.182, "grad_norm": 1.0622376203536987, "learning_rate": 2e-05, "loss": 0.04423476, "step": 19591 }, { "epoch": 39.184, "grad_norm": 1.2616093158721924, "learning_rate": 2e-05, "loss": 0.05125679, "step": 19592 }, { "epoch": 39.186, "grad_norm": 1.1612454652786255, "learning_rate": 2e-05, "loss": 0.04343261, "step": 19593 }, { "epoch": 39.188, "grad_norm": 1.0475002527236938, "learning_rate": 2e-05, "loss": 0.05007475, "step": 19594 }, { "epoch": 39.19, "grad_norm": 1.9500411748886108, "learning_rate": 2e-05, "loss": 0.05376959, "step": 19595 }, { "epoch": 39.192, "grad_norm": 0.9748607277870178, "learning_rate": 2e-05, "loss": 0.04002687, "step": 19596 }, { "epoch": 39.194, "grad_norm": 0.9659441113471985, "learning_rate": 2e-05, "loss": 0.0354165, "step": 19597 }, { "epoch": 39.196, "grad_norm": 1.0485514402389526, "learning_rate": 2e-05, "loss": 0.0383821, "step": 19598 }, { "epoch": 39.198, "grad_norm": 0.896861732006073, "learning_rate": 2e-05, "loss": 0.02712493, "step": 19599 }, { "epoch": 39.2, "grad_norm": 0.9799025058746338, "learning_rate": 2e-05, "loss": 0.0336553, "step": 19600 }, { "epoch": 39.202, "grad_norm": 1.0601696968078613, "learning_rate": 2e-05, "loss": 0.0325969, "step": 19601 }, { "epoch": 39.204, "grad_norm": 0.896075427532196, "learning_rate": 2e-05, "loss": 0.04165824, "step": 19602 }, { "epoch": 39.206, "grad_norm": 3.560518503189087, "learning_rate": 2e-05, "loss": 0.05947834, "step": 19603 }, { "epoch": 39.208, "grad_norm": 6.4517292976379395, "learning_rate": 2e-05, "loss": 0.03291585, "step": 19604 }, { "epoch": 39.21, "grad_norm": 1.4783177375793457, "learning_rate": 2e-05, "loss": 0.05989231, "step": 19605 }, { "epoch": 39.212, "grad_norm": 1.169450044631958, "learning_rate": 2e-05, "loss": 0.05651071, "step": 19606 }, { "epoch": 39.214, "grad_norm": 1.1457308530807495, "learning_rate": 2e-05, "loss": 0.04954067, "step": 19607 }, { "epoch": 39.216, "grad_norm": 0.9234391450881958, "learning_rate": 2e-05, "loss": 0.03786669, "step": 19608 }, { "epoch": 39.218, "grad_norm": 1.8294939994812012, "learning_rate": 2e-05, "loss": 0.04765145, "step": 19609 }, { "epoch": 39.22, "grad_norm": 0.9753006100654602, "learning_rate": 2e-05, "loss": 0.03613771, "step": 19610 }, { "epoch": 39.222, "grad_norm": 1.076045274734497, "learning_rate": 2e-05, "loss": 0.05496455, "step": 19611 }, { "epoch": 39.224, "grad_norm": 0.9420369863510132, "learning_rate": 2e-05, "loss": 0.02675991, "step": 19612 }, { "epoch": 39.226, "grad_norm": 0.8606964945793152, "learning_rate": 2e-05, "loss": 0.03249316, "step": 19613 }, { "epoch": 39.228, "grad_norm": 0.8910853266716003, "learning_rate": 2e-05, "loss": 0.03209341, "step": 19614 }, { "epoch": 39.23, "grad_norm": 1.3207508325576782, "learning_rate": 2e-05, "loss": 0.05671993, "step": 19615 }, { "epoch": 39.232, "grad_norm": 1.0767929553985596, "learning_rate": 2e-05, "loss": 0.04855131, "step": 19616 }, { "epoch": 39.234, "grad_norm": 1.3225675821304321, "learning_rate": 2e-05, "loss": 0.04554161, "step": 19617 }, { "epoch": 39.236, "grad_norm": 1.0781341791152954, "learning_rate": 2e-05, "loss": 0.05150434, "step": 19618 }, { "epoch": 39.238, "grad_norm": 1.153771996498108, "learning_rate": 2e-05, "loss": 0.05380385, "step": 19619 }, { "epoch": 39.24, "grad_norm": 1.1624524593353271, "learning_rate": 2e-05, "loss": 0.04977474, "step": 19620 }, { "epoch": 39.242, "grad_norm": 2.89764666557312, "learning_rate": 2e-05, "loss": 0.05413162, "step": 19621 }, { "epoch": 39.244, "grad_norm": 1.6506226062774658, "learning_rate": 2e-05, "loss": 0.05025718, "step": 19622 }, { "epoch": 39.246, "grad_norm": 0.972945511341095, "learning_rate": 2e-05, "loss": 0.034791, "step": 19623 }, { "epoch": 39.248, "grad_norm": 1.0838979482650757, "learning_rate": 2e-05, "loss": 0.06034057, "step": 19624 }, { "epoch": 39.25, "grad_norm": 1.421980857849121, "learning_rate": 2e-05, "loss": 0.04393078, "step": 19625 }, { "epoch": 39.252, "grad_norm": 1.9003345966339111, "learning_rate": 2e-05, "loss": 0.02887249, "step": 19626 }, { "epoch": 39.254, "grad_norm": 1.196028709411621, "learning_rate": 2e-05, "loss": 0.05561232, "step": 19627 }, { "epoch": 39.256, "grad_norm": 1.0108635425567627, "learning_rate": 2e-05, "loss": 0.04469563, "step": 19628 }, { "epoch": 39.258, "grad_norm": 1.160940408706665, "learning_rate": 2e-05, "loss": 0.03017049, "step": 19629 }, { "epoch": 39.26, "grad_norm": 1.5453789234161377, "learning_rate": 2e-05, "loss": 0.0340977, "step": 19630 }, { "epoch": 39.262, "grad_norm": 1.465960144996643, "learning_rate": 2e-05, "loss": 0.04938908, "step": 19631 }, { "epoch": 39.264, "grad_norm": 1.1503047943115234, "learning_rate": 2e-05, "loss": 0.05061076, "step": 19632 }, { "epoch": 39.266, "grad_norm": 1.6009891033172607, "learning_rate": 2e-05, "loss": 0.06736432, "step": 19633 }, { "epoch": 39.268, "grad_norm": 1.4249523878097534, "learning_rate": 2e-05, "loss": 0.05180561, "step": 19634 }, { "epoch": 39.27, "grad_norm": 0.9332430362701416, "learning_rate": 2e-05, "loss": 0.03304841, "step": 19635 }, { "epoch": 39.272, "grad_norm": 1.6524488925933838, "learning_rate": 2e-05, "loss": 0.07958519, "step": 19636 }, { "epoch": 39.274, "grad_norm": 1.0456358194351196, "learning_rate": 2e-05, "loss": 0.03491893, "step": 19637 }, { "epoch": 39.276, "grad_norm": 0.9185405969619751, "learning_rate": 2e-05, "loss": 0.03046339, "step": 19638 }, { "epoch": 39.278, "grad_norm": 1.2008498907089233, "learning_rate": 2e-05, "loss": 0.03754835, "step": 19639 }, { "epoch": 39.28, "grad_norm": 2.05214786529541, "learning_rate": 2e-05, "loss": 0.04654014, "step": 19640 }, { "epoch": 39.282, "grad_norm": 1.0952036380767822, "learning_rate": 2e-05, "loss": 0.04373389, "step": 19641 }, { "epoch": 39.284, "grad_norm": 2.267845392227173, "learning_rate": 2e-05, "loss": 0.04513358, "step": 19642 }, { "epoch": 39.286, "grad_norm": 1.5832595825195312, "learning_rate": 2e-05, "loss": 0.0430153, "step": 19643 }, { "epoch": 39.288, "grad_norm": 2.135751962661743, "learning_rate": 2e-05, "loss": 0.04186058, "step": 19644 }, { "epoch": 39.29, "grad_norm": 1.3054178953170776, "learning_rate": 2e-05, "loss": 0.04115719, "step": 19645 }, { "epoch": 39.292, "grad_norm": 1.84879469871521, "learning_rate": 2e-05, "loss": 0.03141472, "step": 19646 }, { "epoch": 39.294, "grad_norm": 1.4434685707092285, "learning_rate": 2e-05, "loss": 0.05696239, "step": 19647 }, { "epoch": 39.296, "grad_norm": 1.730857491493225, "learning_rate": 2e-05, "loss": 0.04795872, "step": 19648 }, { "epoch": 39.298, "grad_norm": 1.7834736108779907, "learning_rate": 2e-05, "loss": 0.04523832, "step": 19649 }, { "epoch": 39.3, "grad_norm": 1.1831159591674805, "learning_rate": 2e-05, "loss": 0.0581971, "step": 19650 }, { "epoch": 39.302, "grad_norm": 1.0279744863510132, "learning_rate": 2e-05, "loss": 0.04346603, "step": 19651 }, { "epoch": 39.304, "grad_norm": 0.9996277093887329, "learning_rate": 2e-05, "loss": 0.04396886, "step": 19652 }, { "epoch": 39.306, "grad_norm": 0.9984725713729858, "learning_rate": 2e-05, "loss": 0.03297246, "step": 19653 }, { "epoch": 39.308, "grad_norm": 2.5902860164642334, "learning_rate": 2e-05, "loss": 0.03650786, "step": 19654 }, { "epoch": 39.31, "grad_norm": 0.8725726008415222, "learning_rate": 2e-05, "loss": 0.03605822, "step": 19655 }, { "epoch": 39.312, "grad_norm": 0.9934148192405701, "learning_rate": 2e-05, "loss": 0.04889242, "step": 19656 }, { "epoch": 39.314, "grad_norm": 0.9594886302947998, "learning_rate": 2e-05, "loss": 0.03192176, "step": 19657 }, { "epoch": 39.316, "grad_norm": 1.0329533815383911, "learning_rate": 2e-05, "loss": 0.04281428, "step": 19658 }, { "epoch": 39.318, "grad_norm": 1.023107886314392, "learning_rate": 2e-05, "loss": 0.04575653, "step": 19659 }, { "epoch": 39.32, "grad_norm": 1.0482814311981201, "learning_rate": 2e-05, "loss": 0.03597127, "step": 19660 }, { "epoch": 39.322, "grad_norm": 0.9163859486579895, "learning_rate": 2e-05, "loss": 0.03721383, "step": 19661 }, { "epoch": 39.324, "grad_norm": 1.3281673192977905, "learning_rate": 2e-05, "loss": 0.05865282, "step": 19662 }, { "epoch": 39.326, "grad_norm": 2.1809422969818115, "learning_rate": 2e-05, "loss": 0.05717122, "step": 19663 }, { "epoch": 39.328, "grad_norm": 1.197657823562622, "learning_rate": 2e-05, "loss": 0.04194178, "step": 19664 }, { "epoch": 39.33, "grad_norm": 1.0848605632781982, "learning_rate": 2e-05, "loss": 0.04082118, "step": 19665 }, { "epoch": 39.332, "grad_norm": 1.1988998651504517, "learning_rate": 2e-05, "loss": 0.06086764, "step": 19666 }, { "epoch": 39.334, "grad_norm": 1.7585020065307617, "learning_rate": 2e-05, "loss": 0.0494886, "step": 19667 }, { "epoch": 39.336, "grad_norm": 1.2867776155471802, "learning_rate": 2e-05, "loss": 0.0645042, "step": 19668 }, { "epoch": 39.338, "grad_norm": 0.9545384049415588, "learning_rate": 2e-05, "loss": 0.04120807, "step": 19669 }, { "epoch": 39.34, "grad_norm": 2.0641932487487793, "learning_rate": 2e-05, "loss": 0.05337419, "step": 19670 }, { "epoch": 39.342, "grad_norm": 1.1523480415344238, "learning_rate": 2e-05, "loss": 0.04138858, "step": 19671 }, { "epoch": 39.344, "grad_norm": 1.1756221055984497, "learning_rate": 2e-05, "loss": 0.05644086, "step": 19672 }, { "epoch": 39.346, "grad_norm": 1.2095904350280762, "learning_rate": 2e-05, "loss": 0.05488298, "step": 19673 }, { "epoch": 39.348, "grad_norm": 1.2694520950317383, "learning_rate": 2e-05, "loss": 0.04609445, "step": 19674 }, { "epoch": 39.35, "grad_norm": 1.3962328433990479, "learning_rate": 2e-05, "loss": 0.04385295, "step": 19675 }, { "epoch": 39.352, "grad_norm": 1.8774652481079102, "learning_rate": 2e-05, "loss": 0.04714279, "step": 19676 }, { "epoch": 39.354, "grad_norm": 1.1188228130340576, "learning_rate": 2e-05, "loss": 0.03821624, "step": 19677 }, { "epoch": 39.356, "grad_norm": 2.426961898803711, "learning_rate": 2e-05, "loss": 0.06795914, "step": 19678 }, { "epoch": 39.358, "grad_norm": 0.9878832101821899, "learning_rate": 2e-05, "loss": 0.04934359, "step": 19679 }, { "epoch": 39.36, "grad_norm": 1.2369712591171265, "learning_rate": 2e-05, "loss": 0.05098942, "step": 19680 }, { "epoch": 39.362, "grad_norm": 1.010457158088684, "learning_rate": 2e-05, "loss": 0.03751682, "step": 19681 }, { "epoch": 39.364, "grad_norm": 1.3120168447494507, "learning_rate": 2e-05, "loss": 0.05325425, "step": 19682 }, { "epoch": 39.366, "grad_norm": 1.2930068969726562, "learning_rate": 2e-05, "loss": 0.04439162, "step": 19683 }, { "epoch": 39.368, "grad_norm": 3.0419323444366455, "learning_rate": 2e-05, "loss": 0.04329475, "step": 19684 }, { "epoch": 39.37, "grad_norm": 1.2954773902893066, "learning_rate": 2e-05, "loss": 0.04884758, "step": 19685 }, { "epoch": 39.372, "grad_norm": 1.1993368864059448, "learning_rate": 2e-05, "loss": 0.03655628, "step": 19686 }, { "epoch": 39.374, "grad_norm": 1.3307867050170898, "learning_rate": 2e-05, "loss": 0.06023072, "step": 19687 }, { "epoch": 39.376, "grad_norm": 1.0052247047424316, "learning_rate": 2e-05, "loss": 0.04301659, "step": 19688 }, { "epoch": 39.378, "grad_norm": 1.235184907913208, "learning_rate": 2e-05, "loss": 0.05301848, "step": 19689 }, { "epoch": 39.38, "grad_norm": 1.4809142351150513, "learning_rate": 2e-05, "loss": 0.03989592, "step": 19690 }, { "epoch": 39.382, "grad_norm": 1.3421069383621216, "learning_rate": 2e-05, "loss": 0.06171556, "step": 19691 }, { "epoch": 39.384, "grad_norm": 1.6389429569244385, "learning_rate": 2e-05, "loss": 0.05432124, "step": 19692 }, { "epoch": 39.386, "grad_norm": 1.2374279499053955, "learning_rate": 2e-05, "loss": 0.05883455, "step": 19693 }, { "epoch": 39.388, "grad_norm": 1.0546265840530396, "learning_rate": 2e-05, "loss": 0.04780284, "step": 19694 }, { "epoch": 39.39, "grad_norm": 1.8565644025802612, "learning_rate": 2e-05, "loss": 0.04596139, "step": 19695 }, { "epoch": 39.392, "grad_norm": 0.9969097375869751, "learning_rate": 2e-05, "loss": 0.03689311, "step": 19696 }, { "epoch": 39.394, "grad_norm": 1.4636775255203247, "learning_rate": 2e-05, "loss": 0.05778562, "step": 19697 }, { "epoch": 39.396, "grad_norm": 1.2014180421829224, "learning_rate": 2e-05, "loss": 0.04290902, "step": 19698 }, { "epoch": 39.398, "grad_norm": 1.2907536029815674, "learning_rate": 2e-05, "loss": 0.06206372, "step": 19699 }, { "epoch": 39.4, "grad_norm": 1.1012358665466309, "learning_rate": 2e-05, "loss": 0.04259882, "step": 19700 }, { "epoch": 39.402, "grad_norm": 1.715025782585144, "learning_rate": 2e-05, "loss": 0.05486638, "step": 19701 }, { "epoch": 39.404, "grad_norm": 1.184453010559082, "learning_rate": 2e-05, "loss": 0.04721743, "step": 19702 }, { "epoch": 39.406, "grad_norm": 1.0328636169433594, "learning_rate": 2e-05, "loss": 0.04881582, "step": 19703 }, { "epoch": 39.408, "grad_norm": 1.305463433265686, "learning_rate": 2e-05, "loss": 0.05194881, "step": 19704 }, { "epoch": 39.41, "grad_norm": 1.1029982566833496, "learning_rate": 2e-05, "loss": 0.03533511, "step": 19705 }, { "epoch": 39.412, "grad_norm": 1.8075166940689087, "learning_rate": 2e-05, "loss": 0.05651566, "step": 19706 }, { "epoch": 39.414, "grad_norm": 1.0378196239471436, "learning_rate": 2e-05, "loss": 0.03688617, "step": 19707 }, { "epoch": 39.416, "grad_norm": 1.6773337125778198, "learning_rate": 2e-05, "loss": 0.05092984, "step": 19708 }, { "epoch": 39.418, "grad_norm": 0.8774677515029907, "learning_rate": 2e-05, "loss": 0.03383045, "step": 19709 }, { "epoch": 39.42, "grad_norm": 2.166248083114624, "learning_rate": 2e-05, "loss": 0.05698601, "step": 19710 }, { "epoch": 39.422, "grad_norm": 0.8093209266662598, "learning_rate": 2e-05, "loss": 0.02652819, "step": 19711 }, { "epoch": 39.424, "grad_norm": 2.0697109699249268, "learning_rate": 2e-05, "loss": 0.05185808, "step": 19712 }, { "epoch": 39.426, "grad_norm": 0.9343791604042053, "learning_rate": 2e-05, "loss": 0.04789653, "step": 19713 }, { "epoch": 39.428, "grad_norm": 0.9608621597290039, "learning_rate": 2e-05, "loss": 0.03463775, "step": 19714 }, { "epoch": 39.43, "grad_norm": 2.2530882358551025, "learning_rate": 2e-05, "loss": 0.04028586, "step": 19715 }, { "epoch": 39.432, "grad_norm": 0.9992815256118774, "learning_rate": 2e-05, "loss": 0.04037733, "step": 19716 }, { "epoch": 39.434, "grad_norm": 1.2169606685638428, "learning_rate": 2e-05, "loss": 0.03842113, "step": 19717 }, { "epoch": 39.436, "grad_norm": 1.2398910522460938, "learning_rate": 2e-05, "loss": 0.05360946, "step": 19718 }, { "epoch": 39.438, "grad_norm": 0.9501088857650757, "learning_rate": 2e-05, "loss": 0.03772445, "step": 19719 }, { "epoch": 39.44, "grad_norm": 1.1326839923858643, "learning_rate": 2e-05, "loss": 0.04760835, "step": 19720 }, { "epoch": 39.442, "grad_norm": 2.3582828044891357, "learning_rate": 2e-05, "loss": 0.05440258, "step": 19721 }, { "epoch": 39.444, "grad_norm": 1.0737148523330688, "learning_rate": 2e-05, "loss": 0.04560971, "step": 19722 }, { "epoch": 39.446, "grad_norm": 0.9080232977867126, "learning_rate": 2e-05, "loss": 0.02954752, "step": 19723 }, { "epoch": 39.448, "grad_norm": 1.383144736289978, "learning_rate": 2e-05, "loss": 0.05771355, "step": 19724 }, { "epoch": 39.45, "grad_norm": 1.6267149448394775, "learning_rate": 2e-05, "loss": 0.04903772, "step": 19725 }, { "epoch": 39.452, "grad_norm": 1.578048586845398, "learning_rate": 2e-05, "loss": 0.04099901, "step": 19726 }, { "epoch": 39.454, "grad_norm": 0.7801216840744019, "learning_rate": 2e-05, "loss": 0.03305111, "step": 19727 }, { "epoch": 39.456, "grad_norm": 0.9847111105918884, "learning_rate": 2e-05, "loss": 0.03775512, "step": 19728 }, { "epoch": 39.458, "grad_norm": 1.283754587173462, "learning_rate": 2e-05, "loss": 0.05883973, "step": 19729 }, { "epoch": 39.46, "grad_norm": 0.8882175087928772, "learning_rate": 2e-05, "loss": 0.02892267, "step": 19730 }, { "epoch": 39.462, "grad_norm": 0.9201284646987915, "learning_rate": 2e-05, "loss": 0.03425537, "step": 19731 }, { "epoch": 39.464, "grad_norm": 0.9793798923492432, "learning_rate": 2e-05, "loss": 0.03862665, "step": 19732 }, { "epoch": 39.466, "grad_norm": 1.6932294368743896, "learning_rate": 2e-05, "loss": 0.05198906, "step": 19733 }, { "epoch": 39.468, "grad_norm": 1.1708825826644897, "learning_rate": 2e-05, "loss": 0.04307234, "step": 19734 }, { "epoch": 39.47, "grad_norm": 0.8935628533363342, "learning_rate": 2e-05, "loss": 0.03484309, "step": 19735 }, { "epoch": 39.472, "grad_norm": 1.6596794128417969, "learning_rate": 2e-05, "loss": 0.06109672, "step": 19736 }, { "epoch": 39.474, "grad_norm": 1.313515305519104, "learning_rate": 2e-05, "loss": 0.06451312, "step": 19737 }, { "epoch": 39.476, "grad_norm": 1.2272006273269653, "learning_rate": 2e-05, "loss": 0.04218286, "step": 19738 }, { "epoch": 39.478, "grad_norm": 1.0606508255004883, "learning_rate": 2e-05, "loss": 0.03806088, "step": 19739 }, { "epoch": 39.48, "grad_norm": 1.0461317300796509, "learning_rate": 2e-05, "loss": 0.03215779, "step": 19740 }, { "epoch": 39.482, "grad_norm": 2.622462272644043, "learning_rate": 2e-05, "loss": 0.06584635, "step": 19741 }, { "epoch": 39.484, "grad_norm": 0.9646690487861633, "learning_rate": 2e-05, "loss": 0.03738637, "step": 19742 }, { "epoch": 39.486, "grad_norm": 1.0413398742675781, "learning_rate": 2e-05, "loss": 0.03843875, "step": 19743 }, { "epoch": 39.488, "grad_norm": 2.0859758853912354, "learning_rate": 2e-05, "loss": 0.0608715, "step": 19744 }, { "epoch": 39.49, "grad_norm": 1.5020564794540405, "learning_rate": 2e-05, "loss": 0.05634893, "step": 19745 }, { "epoch": 39.492, "grad_norm": 2.362874984741211, "learning_rate": 2e-05, "loss": 0.04640964, "step": 19746 }, { "epoch": 39.494, "grad_norm": 1.7708141803741455, "learning_rate": 2e-05, "loss": 0.05005526, "step": 19747 }, { "epoch": 39.496, "grad_norm": 0.999367356300354, "learning_rate": 2e-05, "loss": 0.04214387, "step": 19748 }, { "epoch": 39.498, "grad_norm": 1.485107183456421, "learning_rate": 2e-05, "loss": 0.05445093, "step": 19749 }, { "epoch": 39.5, "grad_norm": 1.3996080160140991, "learning_rate": 2e-05, "loss": 0.04682521, "step": 19750 }, { "epoch": 39.502, "grad_norm": 0.9800930619239807, "learning_rate": 2e-05, "loss": 0.03586971, "step": 19751 }, { "epoch": 39.504, "grad_norm": 2.324505567550659, "learning_rate": 2e-05, "loss": 0.07270438, "step": 19752 }, { "epoch": 39.506, "grad_norm": 1.1953097581863403, "learning_rate": 2e-05, "loss": 0.04381161, "step": 19753 }, { "epoch": 39.508, "grad_norm": 1.462424397468567, "learning_rate": 2e-05, "loss": 0.05508835, "step": 19754 }, { "epoch": 39.51, "grad_norm": 1.0941414833068848, "learning_rate": 2e-05, "loss": 0.03967017, "step": 19755 }, { "epoch": 39.512, "grad_norm": 1.123353123664856, "learning_rate": 2e-05, "loss": 0.04578426, "step": 19756 }, { "epoch": 39.514, "grad_norm": 1.2062989473342896, "learning_rate": 2e-05, "loss": 0.0551085, "step": 19757 }, { "epoch": 39.516, "grad_norm": 1.455043077468872, "learning_rate": 2e-05, "loss": 0.0505494, "step": 19758 }, { "epoch": 39.518, "grad_norm": 1.2047113180160522, "learning_rate": 2e-05, "loss": 0.04069864, "step": 19759 }, { "epoch": 39.52, "grad_norm": 1.217572808265686, "learning_rate": 2e-05, "loss": 0.0511783, "step": 19760 }, { "epoch": 39.522, "grad_norm": 1.2422960996627808, "learning_rate": 2e-05, "loss": 0.05930006, "step": 19761 }, { "epoch": 39.524, "grad_norm": 0.8707858324050903, "learning_rate": 2e-05, "loss": 0.03119957, "step": 19762 }, { "epoch": 39.526, "grad_norm": 1.217329978942871, "learning_rate": 2e-05, "loss": 0.04707095, "step": 19763 }, { "epoch": 39.528, "grad_norm": 1.0223344564437866, "learning_rate": 2e-05, "loss": 0.0463704, "step": 19764 }, { "epoch": 39.53, "grad_norm": 1.1190708875656128, "learning_rate": 2e-05, "loss": 0.04334144, "step": 19765 }, { "epoch": 39.532, "grad_norm": 1.0793894529342651, "learning_rate": 2e-05, "loss": 0.03269261, "step": 19766 }, { "epoch": 39.534, "grad_norm": 1.0165287256240845, "learning_rate": 2e-05, "loss": 0.03077252, "step": 19767 }, { "epoch": 39.536, "grad_norm": 1.2656840085983276, "learning_rate": 2e-05, "loss": 0.05329069, "step": 19768 }, { "epoch": 39.538, "grad_norm": 1.1519137620925903, "learning_rate": 2e-05, "loss": 0.04197564, "step": 19769 }, { "epoch": 39.54, "grad_norm": 1.2953381538391113, "learning_rate": 2e-05, "loss": 0.04630489, "step": 19770 }, { "epoch": 39.542, "grad_norm": 1.0278592109680176, "learning_rate": 2e-05, "loss": 0.03626328, "step": 19771 }, { "epoch": 39.544, "grad_norm": 1.7225301265716553, "learning_rate": 2e-05, "loss": 0.03131746, "step": 19772 }, { "epoch": 39.546, "grad_norm": 1.4201761484146118, "learning_rate": 2e-05, "loss": 0.04268201, "step": 19773 }, { "epoch": 39.548, "grad_norm": 0.9142163395881653, "learning_rate": 2e-05, "loss": 0.03836469, "step": 19774 }, { "epoch": 39.55, "grad_norm": 2.1987462043762207, "learning_rate": 2e-05, "loss": 0.04568999, "step": 19775 }, { "epoch": 39.552, "grad_norm": 1.217750906944275, "learning_rate": 2e-05, "loss": 0.05439132, "step": 19776 }, { "epoch": 39.554, "grad_norm": 1.0809926986694336, "learning_rate": 2e-05, "loss": 0.03456289, "step": 19777 }, { "epoch": 39.556, "grad_norm": 1.1169228553771973, "learning_rate": 2e-05, "loss": 0.05055427, "step": 19778 }, { "epoch": 39.558, "grad_norm": 1.3289159536361694, "learning_rate": 2e-05, "loss": 0.0430658, "step": 19779 }, { "epoch": 39.56, "grad_norm": 3.019728660583496, "learning_rate": 2e-05, "loss": 0.06542116, "step": 19780 }, { "epoch": 39.562, "grad_norm": 1.0415998697280884, "learning_rate": 2e-05, "loss": 0.03222946, "step": 19781 }, { "epoch": 39.564, "grad_norm": 1.207776427268982, "learning_rate": 2e-05, "loss": 0.04781389, "step": 19782 }, { "epoch": 39.566, "grad_norm": 0.9968594908714294, "learning_rate": 2e-05, "loss": 0.03552169, "step": 19783 }, { "epoch": 39.568, "grad_norm": 1.033646821975708, "learning_rate": 2e-05, "loss": 0.03394993, "step": 19784 }, { "epoch": 39.57, "grad_norm": 1.1830904483795166, "learning_rate": 2e-05, "loss": 0.04758063, "step": 19785 }, { "epoch": 39.572, "grad_norm": 1.1416414976119995, "learning_rate": 2e-05, "loss": 0.0358948, "step": 19786 }, { "epoch": 39.574, "grad_norm": 1.2189161777496338, "learning_rate": 2e-05, "loss": 0.04232068, "step": 19787 }, { "epoch": 39.576, "grad_norm": 1.1525717973709106, "learning_rate": 2e-05, "loss": 0.03134789, "step": 19788 }, { "epoch": 39.578, "grad_norm": 1.1093817949295044, "learning_rate": 2e-05, "loss": 0.04284113, "step": 19789 }, { "epoch": 39.58, "grad_norm": 1.2334413528442383, "learning_rate": 2e-05, "loss": 0.04276598, "step": 19790 }, { "epoch": 39.582, "grad_norm": 6.114707946777344, "learning_rate": 2e-05, "loss": 0.05117049, "step": 19791 }, { "epoch": 39.584, "grad_norm": 1.2791383266448975, "learning_rate": 2e-05, "loss": 0.05639, "step": 19792 }, { "epoch": 39.586, "grad_norm": 1.3566539287567139, "learning_rate": 2e-05, "loss": 0.06486918, "step": 19793 }, { "epoch": 39.588, "grad_norm": 1.3956037759780884, "learning_rate": 2e-05, "loss": 0.0436568, "step": 19794 }, { "epoch": 39.59, "grad_norm": 1.0201810598373413, "learning_rate": 2e-05, "loss": 0.0297365, "step": 19795 }, { "epoch": 39.592, "grad_norm": 1.3810011148452759, "learning_rate": 2e-05, "loss": 0.05078568, "step": 19796 }, { "epoch": 39.594, "grad_norm": 0.962624192237854, "learning_rate": 2e-05, "loss": 0.03738275, "step": 19797 }, { "epoch": 39.596, "grad_norm": 1.0029946565628052, "learning_rate": 2e-05, "loss": 0.03617393, "step": 19798 }, { "epoch": 39.598, "grad_norm": 1.1275755167007446, "learning_rate": 2e-05, "loss": 0.04925808, "step": 19799 }, { "epoch": 39.6, "grad_norm": 1.1802276372909546, "learning_rate": 2e-05, "loss": 0.04820387, "step": 19800 }, { "epoch": 39.602, "grad_norm": 1.0468865633010864, "learning_rate": 2e-05, "loss": 0.03381919, "step": 19801 }, { "epoch": 39.604, "grad_norm": 1.0697009563446045, "learning_rate": 2e-05, "loss": 0.04107271, "step": 19802 }, { "epoch": 39.606, "grad_norm": 1.0449153184890747, "learning_rate": 2e-05, "loss": 0.03359612, "step": 19803 }, { "epoch": 39.608, "grad_norm": 2.2875797748565674, "learning_rate": 2e-05, "loss": 0.03718928, "step": 19804 }, { "epoch": 39.61, "grad_norm": 3.7707579135894775, "learning_rate": 2e-05, "loss": 0.04203282, "step": 19805 }, { "epoch": 39.612, "grad_norm": 1.0831050872802734, "learning_rate": 2e-05, "loss": 0.04487085, "step": 19806 }, { "epoch": 39.614, "grad_norm": 1.546043038368225, "learning_rate": 2e-05, "loss": 0.04467985, "step": 19807 }, { "epoch": 39.616, "grad_norm": 2.5732364654541016, "learning_rate": 2e-05, "loss": 0.05453048, "step": 19808 }, { "epoch": 39.618, "grad_norm": 1.2084025144577026, "learning_rate": 2e-05, "loss": 0.05112646, "step": 19809 }, { "epoch": 39.62, "grad_norm": 1.7309622764587402, "learning_rate": 2e-05, "loss": 0.05346811, "step": 19810 }, { "epoch": 39.622, "grad_norm": 2.9345250129699707, "learning_rate": 2e-05, "loss": 0.03821341, "step": 19811 }, { "epoch": 39.624, "grad_norm": 0.8954547047615051, "learning_rate": 2e-05, "loss": 0.02864383, "step": 19812 }, { "epoch": 39.626, "grad_norm": 1.422338604927063, "learning_rate": 2e-05, "loss": 0.0414606, "step": 19813 }, { "epoch": 39.628, "grad_norm": 1.0488382577896118, "learning_rate": 2e-05, "loss": 0.04773368, "step": 19814 }, { "epoch": 39.63, "grad_norm": 1.029525637626648, "learning_rate": 2e-05, "loss": 0.05554564, "step": 19815 }, { "epoch": 39.632, "grad_norm": 1.33245050907135, "learning_rate": 2e-05, "loss": 0.0377542, "step": 19816 }, { "epoch": 39.634, "grad_norm": 1.0919883251190186, "learning_rate": 2e-05, "loss": 0.04626956, "step": 19817 }, { "epoch": 39.636, "grad_norm": 0.9759886264801025, "learning_rate": 2e-05, "loss": 0.03899803, "step": 19818 }, { "epoch": 39.638, "grad_norm": 1.091922402381897, "learning_rate": 2e-05, "loss": 0.04786177, "step": 19819 }, { "epoch": 39.64, "grad_norm": 1.7021945714950562, "learning_rate": 2e-05, "loss": 0.04430627, "step": 19820 }, { "epoch": 39.642, "grad_norm": 0.8844581246376038, "learning_rate": 2e-05, "loss": 0.03581714, "step": 19821 }, { "epoch": 39.644, "grad_norm": 1.0511246919631958, "learning_rate": 2e-05, "loss": 0.03747585, "step": 19822 }, { "epoch": 39.646, "grad_norm": 1.602131962776184, "learning_rate": 2e-05, "loss": 0.05775043, "step": 19823 }, { "epoch": 39.648, "grad_norm": 1.1373029947280884, "learning_rate": 2e-05, "loss": 0.04366532, "step": 19824 }, { "epoch": 39.65, "grad_norm": 1.1913431882858276, "learning_rate": 2e-05, "loss": 0.03942533, "step": 19825 }, { "epoch": 39.652, "grad_norm": 1.2877463102340698, "learning_rate": 2e-05, "loss": 0.05508751, "step": 19826 }, { "epoch": 39.654, "grad_norm": 1.5902690887451172, "learning_rate": 2e-05, "loss": 0.04940099, "step": 19827 }, { "epoch": 39.656, "grad_norm": 1.5489622354507446, "learning_rate": 2e-05, "loss": 0.04990978, "step": 19828 }, { "epoch": 39.658, "grad_norm": 0.7974315285682678, "learning_rate": 2e-05, "loss": 0.02475277, "step": 19829 }, { "epoch": 39.66, "grad_norm": 1.4391497373580933, "learning_rate": 2e-05, "loss": 0.05540859, "step": 19830 }, { "epoch": 39.662, "grad_norm": 2.0422706604003906, "learning_rate": 2e-05, "loss": 0.05429158, "step": 19831 }, { "epoch": 39.664, "grad_norm": 0.9763295650482178, "learning_rate": 2e-05, "loss": 0.03541855, "step": 19832 }, { "epoch": 39.666, "grad_norm": 1.0644862651824951, "learning_rate": 2e-05, "loss": 0.0442564, "step": 19833 }, { "epoch": 39.668, "grad_norm": 1.1711417436599731, "learning_rate": 2e-05, "loss": 0.05811571, "step": 19834 }, { "epoch": 39.67, "grad_norm": 1.1323226690292358, "learning_rate": 2e-05, "loss": 0.04926597, "step": 19835 }, { "epoch": 39.672, "grad_norm": 1.428341269493103, "learning_rate": 2e-05, "loss": 0.06229048, "step": 19836 }, { "epoch": 39.674, "grad_norm": 1.1269856691360474, "learning_rate": 2e-05, "loss": 0.05206765, "step": 19837 }, { "epoch": 39.676, "grad_norm": 1.044885277748108, "learning_rate": 2e-05, "loss": 0.03840274, "step": 19838 }, { "epoch": 39.678, "grad_norm": 1.166600227355957, "learning_rate": 2e-05, "loss": 0.04657107, "step": 19839 }, { "epoch": 39.68, "grad_norm": 0.9863473176956177, "learning_rate": 2e-05, "loss": 0.03338449, "step": 19840 }, { "epoch": 39.682, "grad_norm": 1.4325553178787231, "learning_rate": 2e-05, "loss": 0.05557626, "step": 19841 }, { "epoch": 39.684, "grad_norm": 1.468109130859375, "learning_rate": 2e-05, "loss": 0.07358497, "step": 19842 }, { "epoch": 39.686, "grad_norm": 2.2002451419830322, "learning_rate": 2e-05, "loss": 0.03650321, "step": 19843 }, { "epoch": 39.688, "grad_norm": 0.9192159175872803, "learning_rate": 2e-05, "loss": 0.03903075, "step": 19844 }, { "epoch": 39.69, "grad_norm": 0.9829854965209961, "learning_rate": 2e-05, "loss": 0.03883012, "step": 19845 }, { "epoch": 39.692, "grad_norm": 0.9893613457679749, "learning_rate": 2e-05, "loss": 0.04050479, "step": 19846 }, { "epoch": 39.694, "grad_norm": 0.9253523349761963, "learning_rate": 2e-05, "loss": 0.04659057, "step": 19847 }, { "epoch": 39.696, "grad_norm": 1.1441282033920288, "learning_rate": 2e-05, "loss": 0.04921231, "step": 19848 }, { "epoch": 39.698, "grad_norm": 1.2405463457107544, "learning_rate": 2e-05, "loss": 0.04759157, "step": 19849 }, { "epoch": 39.7, "grad_norm": 0.859405517578125, "learning_rate": 2e-05, "loss": 0.03923379, "step": 19850 }, { "epoch": 39.702, "grad_norm": 1.4156253337860107, "learning_rate": 2e-05, "loss": 0.05922413, "step": 19851 }, { "epoch": 39.704, "grad_norm": 2.0626978874206543, "learning_rate": 2e-05, "loss": 0.02834044, "step": 19852 }, { "epoch": 39.706, "grad_norm": 1.071313500404358, "learning_rate": 2e-05, "loss": 0.04010179, "step": 19853 }, { "epoch": 39.708, "grad_norm": 1.0088459253311157, "learning_rate": 2e-05, "loss": 0.0334592, "step": 19854 }, { "epoch": 39.71, "grad_norm": 1.0834213495254517, "learning_rate": 2e-05, "loss": 0.03653051, "step": 19855 }, { "epoch": 39.712, "grad_norm": 1.0218042135238647, "learning_rate": 2e-05, "loss": 0.04736985, "step": 19856 }, { "epoch": 39.714, "grad_norm": 1.1127095222473145, "learning_rate": 2e-05, "loss": 0.04735993, "step": 19857 }, { "epoch": 39.716, "grad_norm": 1.10434091091156, "learning_rate": 2e-05, "loss": 0.04687006, "step": 19858 }, { "epoch": 39.718, "grad_norm": 2.2171404361724854, "learning_rate": 2e-05, "loss": 0.0693571, "step": 19859 }, { "epoch": 39.72, "grad_norm": 1.129449725151062, "learning_rate": 2e-05, "loss": 0.05298089, "step": 19860 }, { "epoch": 39.722, "grad_norm": 1.0971157550811768, "learning_rate": 2e-05, "loss": 0.05703637, "step": 19861 }, { "epoch": 39.724, "grad_norm": 1.3797575235366821, "learning_rate": 2e-05, "loss": 0.06408994, "step": 19862 }, { "epoch": 39.726, "grad_norm": 1.000963807106018, "learning_rate": 2e-05, "loss": 0.03131079, "step": 19863 }, { "epoch": 39.728, "grad_norm": 1.2767345905303955, "learning_rate": 2e-05, "loss": 0.04227749, "step": 19864 }, { "epoch": 39.73, "grad_norm": 1.058678150177002, "learning_rate": 2e-05, "loss": 0.03231667, "step": 19865 }, { "epoch": 39.732, "grad_norm": 1.0262880325317383, "learning_rate": 2e-05, "loss": 0.0330482, "step": 19866 }, { "epoch": 39.734, "grad_norm": 1.0627272129058838, "learning_rate": 2e-05, "loss": 0.05308692, "step": 19867 }, { "epoch": 39.736, "grad_norm": 1.1702196598052979, "learning_rate": 2e-05, "loss": 0.03337921, "step": 19868 }, { "epoch": 39.738, "grad_norm": 1.3394055366516113, "learning_rate": 2e-05, "loss": 0.06042112, "step": 19869 }, { "epoch": 39.74, "grad_norm": 0.9854897260665894, "learning_rate": 2e-05, "loss": 0.03741299, "step": 19870 }, { "epoch": 39.742, "grad_norm": 1.540318489074707, "learning_rate": 2e-05, "loss": 0.03294612, "step": 19871 }, { "epoch": 39.744, "grad_norm": 1.0081778764724731, "learning_rate": 2e-05, "loss": 0.04358434, "step": 19872 }, { "epoch": 39.746, "grad_norm": 1.0711407661437988, "learning_rate": 2e-05, "loss": 0.04167827, "step": 19873 }, { "epoch": 39.748, "grad_norm": 1.0182355642318726, "learning_rate": 2e-05, "loss": 0.04739642, "step": 19874 }, { "epoch": 39.75, "grad_norm": 0.9622233510017395, "learning_rate": 2e-05, "loss": 0.03988034, "step": 19875 }, { "epoch": 39.752, "grad_norm": 0.9959332942962646, "learning_rate": 2e-05, "loss": 0.03294655, "step": 19876 }, { "epoch": 39.754, "grad_norm": 1.2218499183654785, "learning_rate": 2e-05, "loss": 0.04638077, "step": 19877 }, { "epoch": 39.756, "grad_norm": 2.0775938034057617, "learning_rate": 2e-05, "loss": 0.05717273, "step": 19878 }, { "epoch": 39.758, "grad_norm": 1.0010204315185547, "learning_rate": 2e-05, "loss": 0.04771541, "step": 19879 }, { "epoch": 39.76, "grad_norm": 1.3019424676895142, "learning_rate": 2e-05, "loss": 0.03234541, "step": 19880 }, { "epoch": 39.762, "grad_norm": 1.1951885223388672, "learning_rate": 2e-05, "loss": 0.04422656, "step": 19881 }, { "epoch": 39.764, "grad_norm": 0.8559284210205078, "learning_rate": 2e-05, "loss": 0.03210206, "step": 19882 }, { "epoch": 39.766, "grad_norm": 1.1166956424713135, "learning_rate": 2e-05, "loss": 0.03560802, "step": 19883 }, { "epoch": 39.768, "grad_norm": 1.3731318712234497, "learning_rate": 2e-05, "loss": 0.04366145, "step": 19884 }, { "epoch": 39.77, "grad_norm": 1.0742740631103516, "learning_rate": 2e-05, "loss": 0.05316631, "step": 19885 }, { "epoch": 39.772, "grad_norm": 1.1962692737579346, "learning_rate": 2e-05, "loss": 0.03208228, "step": 19886 }, { "epoch": 39.774, "grad_norm": 1.0320333242416382, "learning_rate": 2e-05, "loss": 0.04224065, "step": 19887 }, { "epoch": 39.776, "grad_norm": 1.0977202653884888, "learning_rate": 2e-05, "loss": 0.04830211, "step": 19888 }, { "epoch": 39.778, "grad_norm": 1.0206058025360107, "learning_rate": 2e-05, "loss": 0.03756482, "step": 19889 }, { "epoch": 39.78, "grad_norm": 1.269673228263855, "learning_rate": 2e-05, "loss": 0.05750541, "step": 19890 }, { "epoch": 39.782, "grad_norm": 1.1724382638931274, "learning_rate": 2e-05, "loss": 0.05656551, "step": 19891 }, { "epoch": 39.784, "grad_norm": 1.096116304397583, "learning_rate": 2e-05, "loss": 0.03955997, "step": 19892 }, { "epoch": 39.786, "grad_norm": 1.536861777305603, "learning_rate": 2e-05, "loss": 0.04093381, "step": 19893 }, { "epoch": 39.788, "grad_norm": 1.2449777126312256, "learning_rate": 2e-05, "loss": 0.04258414, "step": 19894 }, { "epoch": 39.79, "grad_norm": 1.100707769393921, "learning_rate": 2e-05, "loss": 0.04184557, "step": 19895 }, { "epoch": 39.792, "grad_norm": 0.7601773738861084, "learning_rate": 2e-05, "loss": 0.02240694, "step": 19896 }, { "epoch": 39.794, "grad_norm": 1.099068284034729, "learning_rate": 2e-05, "loss": 0.04857173, "step": 19897 }, { "epoch": 39.796, "grad_norm": 1.0962532758712769, "learning_rate": 2e-05, "loss": 0.05060228, "step": 19898 }, { "epoch": 39.798, "grad_norm": 1.1620053052902222, "learning_rate": 2e-05, "loss": 0.04748674, "step": 19899 }, { "epoch": 39.8, "grad_norm": 1.5494929552078247, "learning_rate": 2e-05, "loss": 0.04990292, "step": 19900 }, { "epoch": 39.802, "grad_norm": 2.5488200187683105, "learning_rate": 2e-05, "loss": 0.07006623, "step": 19901 }, { "epoch": 39.804, "grad_norm": 1.0101559162139893, "learning_rate": 2e-05, "loss": 0.04366206, "step": 19902 }, { "epoch": 39.806, "grad_norm": 0.8662826418876648, "learning_rate": 2e-05, "loss": 0.02595711, "step": 19903 }, { "epoch": 39.808, "grad_norm": 1.2867019176483154, "learning_rate": 2e-05, "loss": 0.05447638, "step": 19904 }, { "epoch": 39.81, "grad_norm": 1.1928508281707764, "learning_rate": 2e-05, "loss": 0.03939505, "step": 19905 }, { "epoch": 39.812, "grad_norm": 0.8319052457809448, "learning_rate": 2e-05, "loss": 0.02472933, "step": 19906 }, { "epoch": 39.814, "grad_norm": 1.4692742824554443, "learning_rate": 2e-05, "loss": 0.05500904, "step": 19907 }, { "epoch": 39.816, "grad_norm": 1.1972748041152954, "learning_rate": 2e-05, "loss": 0.04034562, "step": 19908 }, { "epoch": 39.818, "grad_norm": 0.958222508430481, "learning_rate": 2e-05, "loss": 0.03489382, "step": 19909 }, { "epoch": 39.82, "grad_norm": 1.2841933965682983, "learning_rate": 2e-05, "loss": 0.03793344, "step": 19910 }, { "epoch": 39.822, "grad_norm": 0.8531423211097717, "learning_rate": 2e-05, "loss": 0.0387709, "step": 19911 }, { "epoch": 39.824, "grad_norm": 1.0664366483688354, "learning_rate": 2e-05, "loss": 0.04708088, "step": 19912 }, { "epoch": 39.826, "grad_norm": 0.9853864908218384, "learning_rate": 2e-05, "loss": 0.03214358, "step": 19913 }, { "epoch": 39.828, "grad_norm": 1.0170292854309082, "learning_rate": 2e-05, "loss": 0.04311734, "step": 19914 }, { "epoch": 39.83, "grad_norm": 1.334947109222412, "learning_rate": 2e-05, "loss": 0.05240899, "step": 19915 }, { "epoch": 39.832, "grad_norm": 1.2091861963272095, "learning_rate": 2e-05, "loss": 0.02730595, "step": 19916 }, { "epoch": 39.834, "grad_norm": 1.0959655046463013, "learning_rate": 2e-05, "loss": 0.04869179, "step": 19917 }, { "epoch": 39.836, "grad_norm": 1.2375555038452148, "learning_rate": 2e-05, "loss": 0.05659986, "step": 19918 }, { "epoch": 39.838, "grad_norm": 1.1543670892715454, "learning_rate": 2e-05, "loss": 0.0418166, "step": 19919 }, { "epoch": 39.84, "grad_norm": 1.0154297351837158, "learning_rate": 2e-05, "loss": 0.03767615, "step": 19920 }, { "epoch": 39.842, "grad_norm": 1.1322821378707886, "learning_rate": 2e-05, "loss": 0.04442841, "step": 19921 }, { "epoch": 39.844, "grad_norm": 1.080886960029602, "learning_rate": 2e-05, "loss": 0.043917, "step": 19922 }, { "epoch": 39.846, "grad_norm": 1.0678915977478027, "learning_rate": 2e-05, "loss": 0.03454448, "step": 19923 }, { "epoch": 39.848, "grad_norm": 1.1571611166000366, "learning_rate": 2e-05, "loss": 0.05229175, "step": 19924 }, { "epoch": 39.85, "grad_norm": 0.9871798157691956, "learning_rate": 2e-05, "loss": 0.03294233, "step": 19925 }, { "epoch": 39.852, "grad_norm": 0.9386829733848572, "learning_rate": 2e-05, "loss": 0.03467434, "step": 19926 }, { "epoch": 39.854, "grad_norm": 1.2393286228179932, "learning_rate": 2e-05, "loss": 0.04885034, "step": 19927 }, { "epoch": 39.856, "grad_norm": 1.5080839395523071, "learning_rate": 2e-05, "loss": 0.04443352, "step": 19928 }, { "epoch": 39.858, "grad_norm": 1.0465118885040283, "learning_rate": 2e-05, "loss": 0.03984166, "step": 19929 }, { "epoch": 39.86, "grad_norm": 1.28429114818573, "learning_rate": 2e-05, "loss": 0.04410757, "step": 19930 }, { "epoch": 39.862, "grad_norm": 1.1174023151397705, "learning_rate": 2e-05, "loss": 0.05240105, "step": 19931 }, { "epoch": 39.864, "grad_norm": 1.055867075920105, "learning_rate": 2e-05, "loss": 0.04787189, "step": 19932 }, { "epoch": 39.866, "grad_norm": 1.026498556137085, "learning_rate": 2e-05, "loss": 0.04451855, "step": 19933 }, { "epoch": 39.868, "grad_norm": 1.058374285697937, "learning_rate": 2e-05, "loss": 0.04313889, "step": 19934 }, { "epoch": 39.87, "grad_norm": 1.3353815078735352, "learning_rate": 2e-05, "loss": 0.03507809, "step": 19935 }, { "epoch": 39.872, "grad_norm": 1.2824405431747437, "learning_rate": 2e-05, "loss": 0.04376249, "step": 19936 }, { "epoch": 39.874, "grad_norm": 1.0729347467422485, "learning_rate": 2e-05, "loss": 0.04265709, "step": 19937 }, { "epoch": 39.876, "grad_norm": 1.160787582397461, "learning_rate": 2e-05, "loss": 0.04455629, "step": 19938 }, { "epoch": 39.878, "grad_norm": 1.0313104391098022, "learning_rate": 2e-05, "loss": 0.03348815, "step": 19939 }, { "epoch": 39.88, "grad_norm": 1.274091362953186, "learning_rate": 2e-05, "loss": 0.05094112, "step": 19940 }, { "epoch": 39.882, "grad_norm": 3.509244441986084, "learning_rate": 2e-05, "loss": 0.05603273, "step": 19941 }, { "epoch": 39.884, "grad_norm": 1.2183585166931152, "learning_rate": 2e-05, "loss": 0.04602589, "step": 19942 }, { "epoch": 39.886, "grad_norm": 0.9860644936561584, "learning_rate": 2e-05, "loss": 0.04182311, "step": 19943 }, { "epoch": 39.888, "grad_norm": 2.4805495738983154, "learning_rate": 2e-05, "loss": 0.06502492, "step": 19944 }, { "epoch": 39.89, "grad_norm": 1.114504337310791, "learning_rate": 2e-05, "loss": 0.0421165, "step": 19945 }, { "epoch": 39.892, "grad_norm": 1.634761929512024, "learning_rate": 2e-05, "loss": 0.048902, "step": 19946 }, { "epoch": 39.894, "grad_norm": 1.0386641025543213, "learning_rate": 2e-05, "loss": 0.04220989, "step": 19947 }, { "epoch": 39.896, "grad_norm": 0.8268201947212219, "learning_rate": 2e-05, "loss": 0.02308436, "step": 19948 }, { "epoch": 39.898, "grad_norm": 0.8323472738265991, "learning_rate": 2e-05, "loss": 0.02323181, "step": 19949 }, { "epoch": 39.9, "grad_norm": 0.9594294428825378, "learning_rate": 2e-05, "loss": 0.03834385, "step": 19950 }, { "epoch": 39.902, "grad_norm": 1.028228759765625, "learning_rate": 2e-05, "loss": 0.05046041, "step": 19951 }, { "epoch": 39.904, "grad_norm": 1.1153631210327148, "learning_rate": 2e-05, "loss": 0.03777427, "step": 19952 }, { "epoch": 39.906, "grad_norm": 1.0633679628372192, "learning_rate": 2e-05, "loss": 0.04464621, "step": 19953 }, { "epoch": 39.908, "grad_norm": 0.8765658140182495, "learning_rate": 2e-05, "loss": 0.02763903, "step": 19954 }, { "epoch": 39.91, "grad_norm": 0.9144088625907898, "learning_rate": 2e-05, "loss": 0.03576268, "step": 19955 }, { "epoch": 39.912, "grad_norm": 1.8870497941970825, "learning_rate": 2e-05, "loss": 0.05473586, "step": 19956 }, { "epoch": 39.914, "grad_norm": 0.9866358041763306, "learning_rate": 2e-05, "loss": 0.03893233, "step": 19957 }, { "epoch": 39.916, "grad_norm": 1.2957180738449097, "learning_rate": 2e-05, "loss": 0.05159867, "step": 19958 }, { "epoch": 39.918, "grad_norm": 1.1522823572158813, "learning_rate": 2e-05, "loss": 0.04481231, "step": 19959 }, { "epoch": 39.92, "grad_norm": 1.455897331237793, "learning_rate": 2e-05, "loss": 0.03888647, "step": 19960 }, { "epoch": 39.922, "grad_norm": 1.1373776197433472, "learning_rate": 2e-05, "loss": 0.05754067, "step": 19961 }, { "epoch": 39.924, "grad_norm": 1.0495020151138306, "learning_rate": 2e-05, "loss": 0.03694631, "step": 19962 }, { "epoch": 39.926, "grad_norm": 3.984205484390259, "learning_rate": 2e-05, "loss": 0.06486093, "step": 19963 }, { "epoch": 39.928, "grad_norm": 1.1145609617233276, "learning_rate": 2e-05, "loss": 0.04970375, "step": 19964 }, { "epoch": 39.93, "grad_norm": 1.1545813083648682, "learning_rate": 2e-05, "loss": 0.04139733, "step": 19965 }, { "epoch": 39.932, "grad_norm": 0.9979121685028076, "learning_rate": 2e-05, "loss": 0.04871643, "step": 19966 }, { "epoch": 39.934, "grad_norm": 1.3384544849395752, "learning_rate": 2e-05, "loss": 0.04304737, "step": 19967 }, { "epoch": 39.936, "grad_norm": 3.8958451747894287, "learning_rate": 2e-05, "loss": 0.04801209, "step": 19968 }, { "epoch": 39.938, "grad_norm": 1.019285798072815, "learning_rate": 2e-05, "loss": 0.03199055, "step": 19969 }, { "epoch": 39.94, "grad_norm": 1.1194003820419312, "learning_rate": 2e-05, "loss": 0.04431958, "step": 19970 }, { "epoch": 39.942, "grad_norm": 2.315790891647339, "learning_rate": 2e-05, "loss": 0.04831096, "step": 19971 }, { "epoch": 39.944, "grad_norm": 0.991619348526001, "learning_rate": 2e-05, "loss": 0.03754363, "step": 19972 }, { "epoch": 39.946, "grad_norm": 0.848356306552887, "learning_rate": 2e-05, "loss": 0.03359814, "step": 19973 }, { "epoch": 39.948, "grad_norm": 1.2409000396728516, "learning_rate": 2e-05, "loss": 0.03768143, "step": 19974 }, { "epoch": 39.95, "grad_norm": 1.084582805633545, "learning_rate": 2e-05, "loss": 0.03662058, "step": 19975 }, { "epoch": 39.952, "grad_norm": 1.2749706506729126, "learning_rate": 2e-05, "loss": 0.03990535, "step": 19976 }, { "epoch": 39.954, "grad_norm": 2.607482433319092, "learning_rate": 2e-05, "loss": 0.06303118, "step": 19977 }, { "epoch": 39.956, "grad_norm": 1.2147165536880493, "learning_rate": 2e-05, "loss": 0.04660799, "step": 19978 }, { "epoch": 39.958, "grad_norm": 1.0502467155456543, "learning_rate": 2e-05, "loss": 0.05218858, "step": 19979 }, { "epoch": 39.96, "grad_norm": 1.1756283044815063, "learning_rate": 2e-05, "loss": 0.05041253, "step": 19980 }, { "epoch": 39.962, "grad_norm": 1.2641695737838745, "learning_rate": 2e-05, "loss": 0.03502963, "step": 19981 }, { "epoch": 39.964, "grad_norm": 1.1930348873138428, "learning_rate": 2e-05, "loss": 0.0482903, "step": 19982 }, { "epoch": 39.966, "grad_norm": 0.8650665879249573, "learning_rate": 2e-05, "loss": 0.03334688, "step": 19983 }, { "epoch": 39.968, "grad_norm": 1.1346603631973267, "learning_rate": 2e-05, "loss": 0.0446093, "step": 19984 }, { "epoch": 39.97, "grad_norm": 2.3015024662017822, "learning_rate": 2e-05, "loss": 0.05563396, "step": 19985 }, { "epoch": 39.972, "grad_norm": 0.9218971133232117, "learning_rate": 2e-05, "loss": 0.04290501, "step": 19986 }, { "epoch": 39.974, "grad_norm": 1.0835826396942139, "learning_rate": 2e-05, "loss": 0.04011057, "step": 19987 }, { "epoch": 39.976, "grad_norm": 1.1462428569793701, "learning_rate": 2e-05, "loss": 0.04896361, "step": 19988 }, { "epoch": 39.978, "grad_norm": 1.8022863864898682, "learning_rate": 2e-05, "loss": 0.04648117, "step": 19989 }, { "epoch": 39.98, "grad_norm": 1.362650752067566, "learning_rate": 2e-05, "loss": 0.05301976, "step": 19990 }, { "epoch": 39.982, "grad_norm": 1.1382851600646973, "learning_rate": 2e-05, "loss": 0.0476516, "step": 19991 }, { "epoch": 39.984, "grad_norm": 1.4238429069519043, "learning_rate": 2e-05, "loss": 0.05370012, "step": 19992 }, { "epoch": 39.986, "grad_norm": 0.8693823218345642, "learning_rate": 2e-05, "loss": 0.03766875, "step": 19993 }, { "epoch": 39.988, "grad_norm": 1.6572775840759277, "learning_rate": 2e-05, "loss": 0.0413289, "step": 19994 }, { "epoch": 39.99, "grad_norm": 1.1973378658294678, "learning_rate": 2e-05, "loss": 0.0487441, "step": 19995 }, { "epoch": 39.992, "grad_norm": 1.213873028755188, "learning_rate": 2e-05, "loss": 0.0331713, "step": 19996 }, { "epoch": 39.994, "grad_norm": 1.024364948272705, "learning_rate": 2e-05, "loss": 0.04477939, "step": 19997 }, { "epoch": 39.996, "grad_norm": 1.897985577583313, "learning_rate": 2e-05, "loss": 0.04816069, "step": 19998 }, { "epoch": 39.998, "grad_norm": 0.9153301119804382, "learning_rate": 2e-05, "loss": 0.02916267, "step": 19999 }, { "epoch": 40.0, "grad_norm": 1.2273842096328735, "learning_rate": 2e-05, "loss": 0.04473015, "step": 20000 }, { "epoch": 40.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9800399201596807, "Equal_1": 0.996, "Equal_2": 0.9760479041916168, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.99, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 0.9939879759519038, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 226.0082, "eval_samples_per_second": 46.458, "eval_steps_per_second": 0.929, "step": 20000 }, { "epoch": 40.002, "grad_norm": 1.071014404296875, "learning_rate": 2e-05, "loss": 0.04129304, "step": 20001 }, { "epoch": 40.004, "grad_norm": 1.2165446281433105, "learning_rate": 2e-05, "loss": 0.04476281, "step": 20002 }, { "epoch": 40.006, "grad_norm": 2.5240094661712646, "learning_rate": 2e-05, "loss": 0.05591761, "step": 20003 }, { "epoch": 40.008, "grad_norm": 1.1321076154708862, "learning_rate": 2e-05, "loss": 0.03877566, "step": 20004 }, { "epoch": 40.01, "grad_norm": 1.14009428024292, "learning_rate": 2e-05, "loss": 0.03674002, "step": 20005 }, { "epoch": 40.012, "grad_norm": 1.0428156852722168, "learning_rate": 2e-05, "loss": 0.03883694, "step": 20006 }, { "epoch": 40.014, "grad_norm": 1.7450840473175049, "learning_rate": 2e-05, "loss": 0.0481742, "step": 20007 }, { "epoch": 40.016, "grad_norm": 1.037800908088684, "learning_rate": 2e-05, "loss": 0.05823126, "step": 20008 }, { "epoch": 40.018, "grad_norm": 0.8892733454704285, "learning_rate": 2e-05, "loss": 0.03413287, "step": 20009 }, { "epoch": 40.02, "grad_norm": 0.9641358852386475, "learning_rate": 2e-05, "loss": 0.03495368, "step": 20010 }, { "epoch": 40.022, "grad_norm": 1.2989205121994019, "learning_rate": 2e-05, "loss": 0.05214029, "step": 20011 }, { "epoch": 40.024, "grad_norm": 1.1618056297302246, "learning_rate": 2e-05, "loss": 0.04802158, "step": 20012 }, { "epoch": 40.026, "grad_norm": 1.2091317176818848, "learning_rate": 2e-05, "loss": 0.04787968, "step": 20013 }, { "epoch": 40.028, "grad_norm": 0.9857677221298218, "learning_rate": 2e-05, "loss": 0.04007314, "step": 20014 }, { "epoch": 40.03, "grad_norm": 1.0429917573928833, "learning_rate": 2e-05, "loss": 0.03963727, "step": 20015 }, { "epoch": 40.032, "grad_norm": 1.0672388076782227, "learning_rate": 2e-05, "loss": 0.03768752, "step": 20016 }, { "epoch": 40.034, "grad_norm": 0.9452791810035706, "learning_rate": 2e-05, "loss": 0.03715968, "step": 20017 }, { "epoch": 40.036, "grad_norm": 0.8694630265235901, "learning_rate": 2e-05, "loss": 0.03308756, "step": 20018 }, { "epoch": 40.038, "grad_norm": 0.9217482805252075, "learning_rate": 2e-05, "loss": 0.03674925, "step": 20019 }, { "epoch": 40.04, "grad_norm": 0.9997850656509399, "learning_rate": 2e-05, "loss": 0.0359928, "step": 20020 }, { "epoch": 40.042, "grad_norm": 1.8567707538604736, "learning_rate": 2e-05, "loss": 0.03756915, "step": 20021 }, { "epoch": 40.044, "grad_norm": 1.9562739133834839, "learning_rate": 2e-05, "loss": 0.07046899, "step": 20022 }, { "epoch": 40.046, "grad_norm": 1.0020452737808228, "learning_rate": 2e-05, "loss": 0.04096894, "step": 20023 }, { "epoch": 40.048, "grad_norm": 1.1804124116897583, "learning_rate": 2e-05, "loss": 0.05127569, "step": 20024 }, { "epoch": 40.05, "grad_norm": 2.2624127864837646, "learning_rate": 2e-05, "loss": 0.05016618, "step": 20025 }, { "epoch": 40.052, "grad_norm": 0.9638793468475342, "learning_rate": 2e-05, "loss": 0.03528046, "step": 20026 }, { "epoch": 40.054, "grad_norm": 1.4188004732131958, "learning_rate": 2e-05, "loss": 0.04649553, "step": 20027 }, { "epoch": 40.056, "grad_norm": 1.067525029182434, "learning_rate": 2e-05, "loss": 0.04353593, "step": 20028 }, { "epoch": 40.058, "grad_norm": 0.8470382690429688, "learning_rate": 2e-05, "loss": 0.02927515, "step": 20029 }, { "epoch": 40.06, "grad_norm": 1.0620425939559937, "learning_rate": 2e-05, "loss": 0.03921532, "step": 20030 }, { "epoch": 40.062, "grad_norm": 1.0946286916732788, "learning_rate": 2e-05, "loss": 0.03290474, "step": 20031 }, { "epoch": 40.064, "grad_norm": 1.1386669874191284, "learning_rate": 2e-05, "loss": 0.03573207, "step": 20032 }, { "epoch": 40.066, "grad_norm": 1.3364137411117554, "learning_rate": 2e-05, "loss": 0.0393317, "step": 20033 }, { "epoch": 40.068, "grad_norm": 1.0459754467010498, "learning_rate": 2e-05, "loss": 0.03220486, "step": 20034 }, { "epoch": 40.07, "grad_norm": 0.9784430861473083, "learning_rate": 2e-05, "loss": 0.04371426, "step": 20035 }, { "epoch": 40.072, "grad_norm": 1.097764015197754, "learning_rate": 2e-05, "loss": 0.03310665, "step": 20036 }, { "epoch": 40.074, "grad_norm": 1.450361967086792, "learning_rate": 2e-05, "loss": 0.08026856, "step": 20037 }, { "epoch": 40.076, "grad_norm": 0.9722415208816528, "learning_rate": 2e-05, "loss": 0.02877928, "step": 20038 }, { "epoch": 40.078, "grad_norm": 1.089726209640503, "learning_rate": 2e-05, "loss": 0.03498789, "step": 20039 }, { "epoch": 40.08, "grad_norm": 1.0014487504959106, "learning_rate": 2e-05, "loss": 0.03770518, "step": 20040 }, { "epoch": 40.082, "grad_norm": 1.0518802404403687, "learning_rate": 2e-05, "loss": 0.04029042, "step": 20041 }, { "epoch": 40.084, "grad_norm": 1.1188164949417114, "learning_rate": 2e-05, "loss": 0.03957991, "step": 20042 }, { "epoch": 40.086, "grad_norm": 1.298071026802063, "learning_rate": 2e-05, "loss": 0.05333746, "step": 20043 }, { "epoch": 40.088, "grad_norm": 1.0623351335525513, "learning_rate": 2e-05, "loss": 0.04033545, "step": 20044 }, { "epoch": 40.09, "grad_norm": 1.6574100255966187, "learning_rate": 2e-05, "loss": 0.04998593, "step": 20045 }, { "epoch": 40.092, "grad_norm": 1.6891632080078125, "learning_rate": 2e-05, "loss": 0.04545986, "step": 20046 }, { "epoch": 40.094, "grad_norm": 1.3112374544143677, "learning_rate": 2e-05, "loss": 0.04508246, "step": 20047 }, { "epoch": 40.096, "grad_norm": 1.7705312967300415, "learning_rate": 2e-05, "loss": 0.03236531, "step": 20048 }, { "epoch": 40.098, "grad_norm": 1.2569351196289062, "learning_rate": 2e-05, "loss": 0.03774524, "step": 20049 }, { "epoch": 40.1, "grad_norm": 2.009286403656006, "learning_rate": 2e-05, "loss": 0.05481736, "step": 20050 }, { "epoch": 40.102, "grad_norm": 1.2503094673156738, "learning_rate": 2e-05, "loss": 0.05809206, "step": 20051 }, { "epoch": 40.104, "grad_norm": 1.1484287977218628, "learning_rate": 2e-05, "loss": 0.05998381, "step": 20052 }, { "epoch": 40.106, "grad_norm": 1.3085055351257324, "learning_rate": 2e-05, "loss": 0.0701361, "step": 20053 }, { "epoch": 40.108, "grad_norm": 1.1804625988006592, "learning_rate": 2e-05, "loss": 0.04368751, "step": 20054 }, { "epoch": 40.11, "grad_norm": 1.086667776107788, "learning_rate": 2e-05, "loss": 0.05277239, "step": 20055 }, { "epoch": 40.112, "grad_norm": 1.1138943433761597, "learning_rate": 2e-05, "loss": 0.03357423, "step": 20056 }, { "epoch": 40.114, "grad_norm": 1.106635570526123, "learning_rate": 2e-05, "loss": 0.04059854, "step": 20057 }, { "epoch": 40.116, "grad_norm": 1.8338348865509033, "learning_rate": 2e-05, "loss": 0.04736425, "step": 20058 }, { "epoch": 40.118, "grad_norm": 1.3241244554519653, "learning_rate": 2e-05, "loss": 0.05309791, "step": 20059 }, { "epoch": 40.12, "grad_norm": 1.0946670770645142, "learning_rate": 2e-05, "loss": 0.04184254, "step": 20060 }, { "epoch": 40.122, "grad_norm": 1.2872495651245117, "learning_rate": 2e-05, "loss": 0.03892963, "step": 20061 }, { "epoch": 40.124, "grad_norm": 2.0154688358306885, "learning_rate": 2e-05, "loss": 0.0612685, "step": 20062 }, { "epoch": 40.126, "grad_norm": 1.084977149963379, "learning_rate": 2e-05, "loss": 0.03851228, "step": 20063 }, { "epoch": 40.128, "grad_norm": 1.127941370010376, "learning_rate": 2e-05, "loss": 0.05298203, "step": 20064 }, { "epoch": 40.13, "grad_norm": 1.244422435760498, "learning_rate": 2e-05, "loss": 0.04970325, "step": 20065 }, { "epoch": 40.132, "grad_norm": 3.169532299041748, "learning_rate": 2e-05, "loss": 0.05995256, "step": 20066 }, { "epoch": 40.134, "grad_norm": 1.1561757326126099, "learning_rate": 2e-05, "loss": 0.05017846, "step": 20067 }, { "epoch": 40.136, "grad_norm": 0.9019343256950378, "learning_rate": 2e-05, "loss": 0.03231926, "step": 20068 }, { "epoch": 40.138, "grad_norm": 1.0413838624954224, "learning_rate": 2e-05, "loss": 0.04674189, "step": 20069 }, { "epoch": 40.14, "grad_norm": 2.4518508911132812, "learning_rate": 2e-05, "loss": 0.04723153, "step": 20070 }, { "epoch": 40.142, "grad_norm": 1.0835013389587402, "learning_rate": 2e-05, "loss": 0.03925369, "step": 20071 }, { "epoch": 40.144, "grad_norm": 1.1196209192276, "learning_rate": 2e-05, "loss": 0.04555103, "step": 20072 }, { "epoch": 40.146, "grad_norm": 1.5784748792648315, "learning_rate": 2e-05, "loss": 0.0524052, "step": 20073 }, { "epoch": 40.148, "grad_norm": 1.210171103477478, "learning_rate": 2e-05, "loss": 0.02720325, "step": 20074 }, { "epoch": 40.15, "grad_norm": 1.4669727087020874, "learning_rate": 2e-05, "loss": 0.05448768, "step": 20075 }, { "epoch": 40.152, "grad_norm": 0.9620411992073059, "learning_rate": 2e-05, "loss": 0.04644699, "step": 20076 }, { "epoch": 40.154, "grad_norm": 1.2830628156661987, "learning_rate": 2e-05, "loss": 0.04726451, "step": 20077 }, { "epoch": 40.156, "grad_norm": 1.0022696256637573, "learning_rate": 2e-05, "loss": 0.03043354, "step": 20078 }, { "epoch": 40.158, "grad_norm": 1.1224088668823242, "learning_rate": 2e-05, "loss": 0.04900142, "step": 20079 }, { "epoch": 40.16, "grad_norm": 1.150498390197754, "learning_rate": 2e-05, "loss": 0.04304528, "step": 20080 }, { "epoch": 40.162, "grad_norm": 1.090377926826477, "learning_rate": 2e-05, "loss": 0.04447605, "step": 20081 }, { "epoch": 40.164, "grad_norm": 1.212199330329895, "learning_rate": 2e-05, "loss": 0.04734999, "step": 20082 }, { "epoch": 40.166, "grad_norm": 1.1343926191329956, "learning_rate": 2e-05, "loss": 0.04121265, "step": 20083 }, { "epoch": 40.168, "grad_norm": 0.945184051990509, "learning_rate": 2e-05, "loss": 0.04123488, "step": 20084 }, { "epoch": 40.17, "grad_norm": 1.1315958499908447, "learning_rate": 2e-05, "loss": 0.04689094, "step": 20085 }, { "epoch": 40.172, "grad_norm": 2.013328790664673, "learning_rate": 2e-05, "loss": 0.03365181, "step": 20086 }, { "epoch": 40.174, "grad_norm": 1.0423808097839355, "learning_rate": 2e-05, "loss": 0.04498311, "step": 20087 }, { "epoch": 40.176, "grad_norm": 1.0776348114013672, "learning_rate": 2e-05, "loss": 0.05483859, "step": 20088 }, { "epoch": 40.178, "grad_norm": 1.1126986742019653, "learning_rate": 2e-05, "loss": 0.05042816, "step": 20089 }, { "epoch": 40.18, "grad_norm": 1.0208160877227783, "learning_rate": 2e-05, "loss": 0.0329367, "step": 20090 }, { "epoch": 40.182, "grad_norm": 1.151786208152771, "learning_rate": 2e-05, "loss": 0.04685981, "step": 20091 }, { "epoch": 40.184, "grad_norm": 1.049393653869629, "learning_rate": 2e-05, "loss": 0.03034136, "step": 20092 }, { "epoch": 40.186, "grad_norm": 1.0787166357040405, "learning_rate": 2e-05, "loss": 0.0400468, "step": 20093 }, { "epoch": 40.188, "grad_norm": 1.3546357154846191, "learning_rate": 2e-05, "loss": 0.03797982, "step": 20094 }, { "epoch": 40.19, "grad_norm": 1.578726887702942, "learning_rate": 2e-05, "loss": 0.05866269, "step": 20095 }, { "epoch": 40.192, "grad_norm": 1.6083524227142334, "learning_rate": 2e-05, "loss": 0.04566522, "step": 20096 }, { "epoch": 40.194, "grad_norm": 2.5575644969940186, "learning_rate": 2e-05, "loss": 0.04490903, "step": 20097 }, { "epoch": 40.196, "grad_norm": 2.4700403213500977, "learning_rate": 2e-05, "loss": 0.063129, "step": 20098 }, { "epoch": 40.198, "grad_norm": 0.872857928276062, "learning_rate": 2e-05, "loss": 0.02387144, "step": 20099 }, { "epoch": 40.2, "grad_norm": 0.9970892071723938, "learning_rate": 2e-05, "loss": 0.04536032, "step": 20100 }, { "epoch": 40.202, "grad_norm": 1.3674415349960327, "learning_rate": 2e-05, "loss": 0.05120435, "step": 20101 }, { "epoch": 40.204, "grad_norm": 0.9996839165687561, "learning_rate": 2e-05, "loss": 0.0301784, "step": 20102 }, { "epoch": 40.206, "grad_norm": 1.458074927330017, "learning_rate": 2e-05, "loss": 0.07427418, "step": 20103 }, { "epoch": 40.208, "grad_norm": 1.0482983589172363, "learning_rate": 2e-05, "loss": 0.03004123, "step": 20104 }, { "epoch": 40.21, "grad_norm": 1.5062034130096436, "learning_rate": 2e-05, "loss": 0.05862463, "step": 20105 }, { "epoch": 40.212, "grad_norm": 1.6953305006027222, "learning_rate": 2e-05, "loss": 0.04210478, "step": 20106 }, { "epoch": 40.214, "grad_norm": 1.2317745685577393, "learning_rate": 2e-05, "loss": 0.05779662, "step": 20107 }, { "epoch": 40.216, "grad_norm": 1.2175332307815552, "learning_rate": 2e-05, "loss": 0.05812114, "step": 20108 }, { "epoch": 40.218, "grad_norm": 1.4096474647521973, "learning_rate": 2e-05, "loss": 0.0688202, "step": 20109 }, { "epoch": 40.22, "grad_norm": 1.234533429145813, "learning_rate": 2e-05, "loss": 0.04126355, "step": 20110 }, { "epoch": 40.222, "grad_norm": 1.381717324256897, "learning_rate": 2e-05, "loss": 0.06189049, "step": 20111 }, { "epoch": 40.224, "grad_norm": 0.7836905121803284, "learning_rate": 2e-05, "loss": 0.02318118, "step": 20112 }, { "epoch": 40.226, "grad_norm": 1.1245120763778687, "learning_rate": 2e-05, "loss": 0.04832661, "step": 20113 }, { "epoch": 40.228, "grad_norm": 1.0359984636306763, "learning_rate": 2e-05, "loss": 0.04039298, "step": 20114 }, { "epoch": 40.23, "grad_norm": 1.2905807495117188, "learning_rate": 2e-05, "loss": 0.06329786, "step": 20115 }, { "epoch": 40.232, "grad_norm": 5.25101375579834, "learning_rate": 2e-05, "loss": 0.04391546, "step": 20116 }, { "epoch": 40.234, "grad_norm": 1.0588263273239136, "learning_rate": 2e-05, "loss": 0.04291525, "step": 20117 }, { "epoch": 40.236, "grad_norm": 1.041487693786621, "learning_rate": 2e-05, "loss": 0.03578708, "step": 20118 }, { "epoch": 40.238, "grad_norm": 1.145468831062317, "learning_rate": 2e-05, "loss": 0.04874301, "step": 20119 }, { "epoch": 40.24, "grad_norm": 1.0374046564102173, "learning_rate": 2e-05, "loss": 0.0321741, "step": 20120 }, { "epoch": 40.242, "grad_norm": 1.0574382543563843, "learning_rate": 2e-05, "loss": 0.04703354, "step": 20121 }, { "epoch": 40.244, "grad_norm": 0.9897115230560303, "learning_rate": 2e-05, "loss": 0.03915127, "step": 20122 }, { "epoch": 40.246, "grad_norm": 0.9520100355148315, "learning_rate": 2e-05, "loss": 0.04126954, "step": 20123 }, { "epoch": 40.248, "grad_norm": 1.0222336053848267, "learning_rate": 2e-05, "loss": 0.04256253, "step": 20124 }, { "epoch": 40.25, "grad_norm": 1.209964632987976, "learning_rate": 2e-05, "loss": 0.06622823, "step": 20125 }, { "epoch": 40.252, "grad_norm": 0.8616648316383362, "learning_rate": 2e-05, "loss": 0.02794971, "step": 20126 }, { "epoch": 40.254, "grad_norm": 1.0609954595565796, "learning_rate": 2e-05, "loss": 0.03738583, "step": 20127 }, { "epoch": 40.256, "grad_norm": 0.9684075117111206, "learning_rate": 2e-05, "loss": 0.03452997, "step": 20128 }, { "epoch": 40.258, "grad_norm": 1.2077908515930176, "learning_rate": 2e-05, "loss": 0.0478143, "step": 20129 }, { "epoch": 40.26, "grad_norm": 1.2240910530090332, "learning_rate": 2e-05, "loss": 0.04795501, "step": 20130 }, { "epoch": 40.262, "grad_norm": 0.9324694275856018, "learning_rate": 2e-05, "loss": 0.03767016, "step": 20131 }, { "epoch": 40.264, "grad_norm": 1.7170013189315796, "learning_rate": 2e-05, "loss": 0.05961518, "step": 20132 }, { "epoch": 40.266, "grad_norm": 1.055739402770996, "learning_rate": 2e-05, "loss": 0.04104446, "step": 20133 }, { "epoch": 40.268, "grad_norm": 1.1865100860595703, "learning_rate": 2e-05, "loss": 0.04684532, "step": 20134 }, { "epoch": 40.27, "grad_norm": 1.5056294202804565, "learning_rate": 2e-05, "loss": 0.04708206, "step": 20135 }, { "epoch": 40.272, "grad_norm": 1.6504653692245483, "learning_rate": 2e-05, "loss": 0.02502301, "step": 20136 }, { "epoch": 40.274, "grad_norm": 1.07023024559021, "learning_rate": 2e-05, "loss": 0.03616626, "step": 20137 }, { "epoch": 40.276, "grad_norm": 1.0220813751220703, "learning_rate": 2e-05, "loss": 0.03862039, "step": 20138 }, { "epoch": 40.278, "grad_norm": 1.225319504737854, "learning_rate": 2e-05, "loss": 0.05396026, "step": 20139 }, { "epoch": 40.28, "grad_norm": 1.075613021850586, "learning_rate": 2e-05, "loss": 0.03588823, "step": 20140 }, { "epoch": 40.282, "grad_norm": 0.8682084083557129, "learning_rate": 2e-05, "loss": 0.03014431, "step": 20141 }, { "epoch": 40.284, "grad_norm": 1.105204463005066, "learning_rate": 2e-05, "loss": 0.03917256, "step": 20142 }, { "epoch": 40.286, "grad_norm": 1.215539574623108, "learning_rate": 2e-05, "loss": 0.04887994, "step": 20143 }, { "epoch": 40.288, "grad_norm": 1.3170772790908813, "learning_rate": 2e-05, "loss": 0.04747734, "step": 20144 }, { "epoch": 40.29, "grad_norm": 0.9253361225128174, "learning_rate": 2e-05, "loss": 0.03626566, "step": 20145 }, { "epoch": 40.292, "grad_norm": 1.279989242553711, "learning_rate": 2e-05, "loss": 0.06118982, "step": 20146 }, { "epoch": 40.294, "grad_norm": 1.0756986141204834, "learning_rate": 2e-05, "loss": 0.04997719, "step": 20147 }, { "epoch": 40.296, "grad_norm": 1.7090152502059937, "learning_rate": 2e-05, "loss": 0.0496064, "step": 20148 }, { "epoch": 40.298, "grad_norm": 1.155979037284851, "learning_rate": 2e-05, "loss": 0.0395203, "step": 20149 }, { "epoch": 40.3, "grad_norm": 1.6735153198242188, "learning_rate": 2e-05, "loss": 0.04876689, "step": 20150 }, { "epoch": 40.302, "grad_norm": 1.120461344718933, "learning_rate": 2e-05, "loss": 0.03875158, "step": 20151 }, { "epoch": 40.304, "grad_norm": 1.804721713066101, "learning_rate": 2e-05, "loss": 0.04355071, "step": 20152 }, { "epoch": 40.306, "grad_norm": 1.2840361595153809, "learning_rate": 2e-05, "loss": 0.04243275, "step": 20153 }, { "epoch": 40.308, "grad_norm": 1.3396196365356445, "learning_rate": 2e-05, "loss": 0.0477254, "step": 20154 }, { "epoch": 40.31, "grad_norm": 1.2071930170059204, "learning_rate": 2e-05, "loss": 0.04932068, "step": 20155 }, { "epoch": 40.312, "grad_norm": 1.200128197669983, "learning_rate": 2e-05, "loss": 0.03878069, "step": 20156 }, { "epoch": 40.314, "grad_norm": 1.1535923480987549, "learning_rate": 2e-05, "loss": 0.03459312, "step": 20157 }, { "epoch": 40.316, "grad_norm": 2.175842761993408, "learning_rate": 2e-05, "loss": 0.05796396, "step": 20158 }, { "epoch": 40.318, "grad_norm": 0.9883548021316528, "learning_rate": 2e-05, "loss": 0.04057959, "step": 20159 }, { "epoch": 40.32, "grad_norm": 0.9627332091331482, "learning_rate": 2e-05, "loss": 0.03382679, "step": 20160 }, { "epoch": 40.322, "grad_norm": 1.1802172660827637, "learning_rate": 2e-05, "loss": 0.04416179, "step": 20161 }, { "epoch": 40.324, "grad_norm": 1.405636191368103, "learning_rate": 2e-05, "loss": 0.04495843, "step": 20162 }, { "epoch": 40.326, "grad_norm": 0.909905731678009, "learning_rate": 2e-05, "loss": 0.03422809, "step": 20163 }, { "epoch": 40.328, "grad_norm": 1.2897473573684692, "learning_rate": 2e-05, "loss": 0.05411062, "step": 20164 }, { "epoch": 40.33, "grad_norm": 1.2469984292984009, "learning_rate": 2e-05, "loss": 0.03869724, "step": 20165 }, { "epoch": 40.332, "grad_norm": 2.335200548171997, "learning_rate": 2e-05, "loss": 0.0441972, "step": 20166 }, { "epoch": 40.334, "grad_norm": 1.467454433441162, "learning_rate": 2e-05, "loss": 0.03722456, "step": 20167 }, { "epoch": 40.336, "grad_norm": 1.0239427089691162, "learning_rate": 2e-05, "loss": 0.03515876, "step": 20168 }, { "epoch": 40.338, "grad_norm": 1.4381844997406006, "learning_rate": 2e-05, "loss": 0.03920049, "step": 20169 }, { "epoch": 40.34, "grad_norm": 1.2720179557800293, "learning_rate": 2e-05, "loss": 0.04872498, "step": 20170 }, { "epoch": 40.342, "grad_norm": 1.4870506525039673, "learning_rate": 2e-05, "loss": 0.04553374, "step": 20171 }, { "epoch": 40.344, "grad_norm": 2.992097854614258, "learning_rate": 2e-05, "loss": 0.03753857, "step": 20172 }, { "epoch": 40.346, "grad_norm": 0.9910198450088501, "learning_rate": 2e-05, "loss": 0.03125165, "step": 20173 }, { "epoch": 40.348, "grad_norm": 1.3177121877670288, "learning_rate": 2e-05, "loss": 0.05781029, "step": 20174 }, { "epoch": 40.35, "grad_norm": 1.2031747102737427, "learning_rate": 2e-05, "loss": 0.0438956, "step": 20175 }, { "epoch": 40.352, "grad_norm": 1.1581616401672363, "learning_rate": 2e-05, "loss": 0.04768595, "step": 20176 }, { "epoch": 40.354, "grad_norm": 1.0423896312713623, "learning_rate": 2e-05, "loss": 0.03720285, "step": 20177 }, { "epoch": 40.356, "grad_norm": 1.0717381238937378, "learning_rate": 2e-05, "loss": 0.04229707, "step": 20178 }, { "epoch": 40.358, "grad_norm": 1.2199180126190186, "learning_rate": 2e-05, "loss": 0.04044171, "step": 20179 }, { "epoch": 40.36, "grad_norm": 0.8346745371818542, "learning_rate": 2e-05, "loss": 0.02193231, "step": 20180 }, { "epoch": 40.362, "grad_norm": 0.9765499830245972, "learning_rate": 2e-05, "loss": 0.03285219, "step": 20181 }, { "epoch": 40.364, "grad_norm": 0.8899419903755188, "learning_rate": 2e-05, "loss": 0.03198574, "step": 20182 }, { "epoch": 40.366, "grad_norm": 1.0089216232299805, "learning_rate": 2e-05, "loss": 0.0348563, "step": 20183 }, { "epoch": 40.368, "grad_norm": 0.8968978524208069, "learning_rate": 2e-05, "loss": 0.02762327, "step": 20184 }, { "epoch": 40.37, "grad_norm": 1.1147799491882324, "learning_rate": 2e-05, "loss": 0.04701362, "step": 20185 }, { "epoch": 40.372, "grad_norm": 1.0164750814437866, "learning_rate": 2e-05, "loss": 0.04109369, "step": 20186 }, { "epoch": 40.374, "grad_norm": 1.1856544017791748, "learning_rate": 2e-05, "loss": 0.0528947, "step": 20187 }, { "epoch": 40.376, "grad_norm": 2.374528646469116, "learning_rate": 2e-05, "loss": 0.04910507, "step": 20188 }, { "epoch": 40.378, "grad_norm": 1.7527254819869995, "learning_rate": 2e-05, "loss": 0.04944891, "step": 20189 }, { "epoch": 40.38, "grad_norm": 1.0547006130218506, "learning_rate": 2e-05, "loss": 0.04531793, "step": 20190 }, { "epoch": 40.382, "grad_norm": 1.1457308530807495, "learning_rate": 2e-05, "loss": 0.04302062, "step": 20191 }, { "epoch": 40.384, "grad_norm": 1.5927281379699707, "learning_rate": 2e-05, "loss": 0.06276862, "step": 20192 }, { "epoch": 40.386, "grad_norm": 1.0521491765975952, "learning_rate": 2e-05, "loss": 0.04277148, "step": 20193 }, { "epoch": 40.388, "grad_norm": 2.513277769088745, "learning_rate": 2e-05, "loss": 0.05132864, "step": 20194 }, { "epoch": 40.39, "grad_norm": 1.6542408466339111, "learning_rate": 2e-05, "loss": 0.04169479, "step": 20195 }, { "epoch": 40.392, "grad_norm": 0.8452298641204834, "learning_rate": 2e-05, "loss": 0.02803415, "step": 20196 }, { "epoch": 40.394, "grad_norm": 1.0574208498001099, "learning_rate": 2e-05, "loss": 0.03791597, "step": 20197 }, { "epoch": 40.396, "grad_norm": 1.1237210035324097, "learning_rate": 2e-05, "loss": 0.04784253, "step": 20198 }, { "epoch": 40.398, "grad_norm": 1.5573137998580933, "learning_rate": 2e-05, "loss": 0.05567899, "step": 20199 }, { "epoch": 40.4, "grad_norm": 1.0879147052764893, "learning_rate": 2e-05, "loss": 0.0420449, "step": 20200 }, { "epoch": 40.402, "grad_norm": 1.1107966899871826, "learning_rate": 2e-05, "loss": 0.04462323, "step": 20201 }, { "epoch": 40.404, "grad_norm": 0.9409716129302979, "learning_rate": 2e-05, "loss": 0.03638512, "step": 20202 }, { "epoch": 40.406, "grad_norm": 1.0237654447555542, "learning_rate": 2e-05, "loss": 0.04747014, "step": 20203 }, { "epoch": 40.408, "grad_norm": 1.025316596031189, "learning_rate": 2e-05, "loss": 0.03784552, "step": 20204 }, { "epoch": 40.41, "grad_norm": 1.0894849300384521, "learning_rate": 2e-05, "loss": 0.04116376, "step": 20205 }, { "epoch": 40.412, "grad_norm": 0.931501567363739, "learning_rate": 2e-05, "loss": 0.03581708, "step": 20206 }, { "epoch": 40.414, "grad_norm": 1.4848265647888184, "learning_rate": 2e-05, "loss": 0.05216502, "step": 20207 }, { "epoch": 40.416, "grad_norm": 1.344570517539978, "learning_rate": 2e-05, "loss": 0.05081491, "step": 20208 }, { "epoch": 40.418, "grad_norm": 1.1433031558990479, "learning_rate": 2e-05, "loss": 0.04995571, "step": 20209 }, { "epoch": 40.42, "grad_norm": 1.2923433780670166, "learning_rate": 2e-05, "loss": 0.03958894, "step": 20210 }, { "epoch": 40.422, "grad_norm": 1.1613800525665283, "learning_rate": 2e-05, "loss": 0.05347838, "step": 20211 }, { "epoch": 40.424, "grad_norm": 1.1029516458511353, "learning_rate": 2e-05, "loss": 0.04881627, "step": 20212 }, { "epoch": 40.426, "grad_norm": 1.0604616403579712, "learning_rate": 2e-05, "loss": 0.04084657, "step": 20213 }, { "epoch": 40.428, "grad_norm": 1.3371881246566772, "learning_rate": 2e-05, "loss": 0.04373746, "step": 20214 }, { "epoch": 40.43, "grad_norm": 1.2692286968231201, "learning_rate": 2e-05, "loss": 0.03431744, "step": 20215 }, { "epoch": 40.432, "grad_norm": 1.424799919128418, "learning_rate": 2e-05, "loss": 0.05148813, "step": 20216 }, { "epoch": 40.434, "grad_norm": 1.3533345460891724, "learning_rate": 2e-05, "loss": 0.04609526, "step": 20217 }, { "epoch": 40.436, "grad_norm": 1.0171220302581787, "learning_rate": 2e-05, "loss": 0.04212869, "step": 20218 }, { "epoch": 40.438, "grad_norm": 1.0982251167297363, "learning_rate": 2e-05, "loss": 0.04340376, "step": 20219 }, { "epoch": 40.44, "grad_norm": 1.0333088636398315, "learning_rate": 2e-05, "loss": 0.03488145, "step": 20220 }, { "epoch": 40.442, "grad_norm": 1.0224990844726562, "learning_rate": 2e-05, "loss": 0.04395539, "step": 20221 }, { "epoch": 40.444, "grad_norm": 1.0278171300888062, "learning_rate": 2e-05, "loss": 0.03672622, "step": 20222 }, { "epoch": 40.446, "grad_norm": 1.0057718753814697, "learning_rate": 2e-05, "loss": 0.03577653, "step": 20223 }, { "epoch": 40.448, "grad_norm": 0.965559184551239, "learning_rate": 2e-05, "loss": 0.04927844, "step": 20224 }, { "epoch": 40.45, "grad_norm": 1.8529305458068848, "learning_rate": 2e-05, "loss": 0.06187294, "step": 20225 }, { "epoch": 40.452, "grad_norm": 0.8120150566101074, "learning_rate": 2e-05, "loss": 0.02520864, "step": 20226 }, { "epoch": 40.454, "grad_norm": 4.008787155151367, "learning_rate": 2e-05, "loss": 0.04784146, "step": 20227 }, { "epoch": 40.456, "grad_norm": 1.0603026151657104, "learning_rate": 2e-05, "loss": 0.04573495, "step": 20228 }, { "epoch": 40.458, "grad_norm": 1.0922075510025024, "learning_rate": 2e-05, "loss": 0.04751965, "step": 20229 }, { "epoch": 40.46, "grad_norm": 1.0364751815795898, "learning_rate": 2e-05, "loss": 0.04096706, "step": 20230 }, { "epoch": 40.462, "grad_norm": 0.9609272480010986, "learning_rate": 2e-05, "loss": 0.0345999, "step": 20231 }, { "epoch": 40.464, "grad_norm": 1.1895791292190552, "learning_rate": 2e-05, "loss": 0.05389999, "step": 20232 }, { "epoch": 40.466, "grad_norm": 1.072493553161621, "learning_rate": 2e-05, "loss": 0.04262653, "step": 20233 }, { "epoch": 40.468, "grad_norm": 0.8788025379180908, "learning_rate": 2e-05, "loss": 0.03747414, "step": 20234 }, { "epoch": 40.47, "grad_norm": 1.0566481351852417, "learning_rate": 2e-05, "loss": 0.05086055, "step": 20235 }, { "epoch": 40.472, "grad_norm": 0.9598913192749023, "learning_rate": 2e-05, "loss": 0.02933006, "step": 20236 }, { "epoch": 40.474, "grad_norm": 1.0616053342819214, "learning_rate": 2e-05, "loss": 0.04874229, "step": 20237 }, { "epoch": 40.476, "grad_norm": 0.9987836480140686, "learning_rate": 2e-05, "loss": 0.04263033, "step": 20238 }, { "epoch": 40.478, "grad_norm": 1.1502844095230103, "learning_rate": 2e-05, "loss": 0.05938703, "step": 20239 }, { "epoch": 40.48, "grad_norm": 0.8494642972946167, "learning_rate": 2e-05, "loss": 0.03391844, "step": 20240 }, { "epoch": 40.482, "grad_norm": 2.1978564262390137, "learning_rate": 2e-05, "loss": 0.04045141, "step": 20241 }, { "epoch": 40.484, "grad_norm": 1.0367884635925293, "learning_rate": 2e-05, "loss": 0.04026084, "step": 20242 }, { "epoch": 40.486, "grad_norm": 1.1896312236785889, "learning_rate": 2e-05, "loss": 0.03787327, "step": 20243 }, { "epoch": 40.488, "grad_norm": 3.0052671432495117, "learning_rate": 2e-05, "loss": 0.04362085, "step": 20244 }, { "epoch": 40.49, "grad_norm": 1.595651388168335, "learning_rate": 2e-05, "loss": 0.04177163, "step": 20245 }, { "epoch": 40.492, "grad_norm": 1.0803983211517334, "learning_rate": 2e-05, "loss": 0.03839463, "step": 20246 }, { "epoch": 40.494, "grad_norm": 0.9788938760757446, "learning_rate": 2e-05, "loss": 0.04250223, "step": 20247 }, { "epoch": 40.496, "grad_norm": 0.9361246824264526, "learning_rate": 2e-05, "loss": 0.03469218, "step": 20248 }, { "epoch": 40.498, "grad_norm": 2.0592877864837646, "learning_rate": 2e-05, "loss": 0.05483123, "step": 20249 }, { "epoch": 40.5, "grad_norm": 1.3336786031723022, "learning_rate": 2e-05, "loss": 0.06266036, "step": 20250 }, { "epoch": 40.502, "grad_norm": 0.9841834902763367, "learning_rate": 2e-05, "loss": 0.04395983, "step": 20251 }, { "epoch": 40.504, "grad_norm": 1.1584612131118774, "learning_rate": 2e-05, "loss": 0.04770331, "step": 20252 }, { "epoch": 40.506, "grad_norm": 1.1918705701828003, "learning_rate": 2e-05, "loss": 0.03540057, "step": 20253 }, { "epoch": 40.508, "grad_norm": 0.9238448739051819, "learning_rate": 2e-05, "loss": 0.03953602, "step": 20254 }, { "epoch": 40.51, "grad_norm": 1.2111542224884033, "learning_rate": 2e-05, "loss": 0.0400702, "step": 20255 }, { "epoch": 40.512, "grad_norm": 3.0884039402008057, "learning_rate": 2e-05, "loss": 0.06924517, "step": 20256 }, { "epoch": 40.514, "grad_norm": 1.8901320695877075, "learning_rate": 2e-05, "loss": 0.04622996, "step": 20257 }, { "epoch": 40.516, "grad_norm": 1.123457431793213, "learning_rate": 2e-05, "loss": 0.04242891, "step": 20258 }, { "epoch": 40.518, "grad_norm": 1.097454309463501, "learning_rate": 2e-05, "loss": 0.02857732, "step": 20259 }, { "epoch": 40.52, "grad_norm": 1.2910747528076172, "learning_rate": 2e-05, "loss": 0.04702651, "step": 20260 }, { "epoch": 40.522, "grad_norm": 1.3933396339416504, "learning_rate": 2e-05, "loss": 0.04370378, "step": 20261 }, { "epoch": 40.524, "grad_norm": 1.1261711120605469, "learning_rate": 2e-05, "loss": 0.03628248, "step": 20262 }, { "epoch": 40.526, "grad_norm": 1.2315053939819336, "learning_rate": 2e-05, "loss": 0.05335899, "step": 20263 }, { "epoch": 40.528, "grad_norm": 1.0281670093536377, "learning_rate": 2e-05, "loss": 0.04791902, "step": 20264 }, { "epoch": 40.53, "grad_norm": 1.1635000705718994, "learning_rate": 2e-05, "loss": 0.03693828, "step": 20265 }, { "epoch": 40.532, "grad_norm": 1.118584156036377, "learning_rate": 2e-05, "loss": 0.03977263, "step": 20266 }, { "epoch": 40.534, "grad_norm": 1.351843237876892, "learning_rate": 2e-05, "loss": 0.04477321, "step": 20267 }, { "epoch": 40.536, "grad_norm": 0.9837387800216675, "learning_rate": 2e-05, "loss": 0.0403834, "step": 20268 }, { "epoch": 40.538, "grad_norm": 1.3609554767608643, "learning_rate": 2e-05, "loss": 0.04967888, "step": 20269 }, { "epoch": 40.54, "grad_norm": 1.2057619094848633, "learning_rate": 2e-05, "loss": 0.04181759, "step": 20270 }, { "epoch": 40.542, "grad_norm": 0.997383713722229, "learning_rate": 2e-05, "loss": 0.03458607, "step": 20271 }, { "epoch": 40.544, "grad_norm": 1.1576420068740845, "learning_rate": 2e-05, "loss": 0.04939636, "step": 20272 }, { "epoch": 40.546, "grad_norm": 1.2516052722930908, "learning_rate": 2e-05, "loss": 0.0572699, "step": 20273 }, { "epoch": 40.548, "grad_norm": 0.886772871017456, "learning_rate": 2e-05, "loss": 0.03422844, "step": 20274 }, { "epoch": 40.55, "grad_norm": 0.886111855506897, "learning_rate": 2e-05, "loss": 0.03704466, "step": 20275 }, { "epoch": 40.552, "grad_norm": 1.1848208904266357, "learning_rate": 2e-05, "loss": 0.0401883, "step": 20276 }, { "epoch": 40.554, "grad_norm": 1.0184283256530762, "learning_rate": 2e-05, "loss": 0.03515718, "step": 20277 }, { "epoch": 40.556, "grad_norm": 1.0910574197769165, "learning_rate": 2e-05, "loss": 0.03522867, "step": 20278 }, { "epoch": 40.558, "grad_norm": 0.9338254928588867, "learning_rate": 2e-05, "loss": 0.03539358, "step": 20279 }, { "epoch": 40.56, "grad_norm": 0.8921304941177368, "learning_rate": 2e-05, "loss": 0.02580054, "step": 20280 }, { "epoch": 40.562, "grad_norm": 1.095262885093689, "learning_rate": 2e-05, "loss": 0.04738864, "step": 20281 }, { "epoch": 40.564, "grad_norm": 1.177894949913025, "learning_rate": 2e-05, "loss": 0.03564753, "step": 20282 }, { "epoch": 40.566, "grad_norm": 1.0457638502120972, "learning_rate": 2e-05, "loss": 0.04893558, "step": 20283 }, { "epoch": 40.568, "grad_norm": 1.7753831148147583, "learning_rate": 2e-05, "loss": 0.04905668, "step": 20284 }, { "epoch": 40.57, "grad_norm": 1.5942280292510986, "learning_rate": 2e-05, "loss": 0.0502409, "step": 20285 }, { "epoch": 40.572, "grad_norm": 1.0441535711288452, "learning_rate": 2e-05, "loss": 0.0409278, "step": 20286 }, { "epoch": 40.574, "grad_norm": 0.8997386693954468, "learning_rate": 2e-05, "loss": 0.02775328, "step": 20287 }, { "epoch": 40.576, "grad_norm": 1.3574177026748657, "learning_rate": 2e-05, "loss": 0.05415213, "step": 20288 }, { "epoch": 40.578, "grad_norm": 0.9810806512832642, "learning_rate": 2e-05, "loss": 0.04187949, "step": 20289 }, { "epoch": 40.58, "grad_norm": 0.8568405508995056, "learning_rate": 2e-05, "loss": 0.03226211, "step": 20290 }, { "epoch": 40.582, "grad_norm": 1.2484982013702393, "learning_rate": 2e-05, "loss": 0.03542585, "step": 20291 }, { "epoch": 40.584, "grad_norm": 1.2269036769866943, "learning_rate": 2e-05, "loss": 0.05073411, "step": 20292 }, { "epoch": 40.586, "grad_norm": 0.975385844707489, "learning_rate": 2e-05, "loss": 0.03001665, "step": 20293 }, { "epoch": 40.588, "grad_norm": 1.7891687154769897, "learning_rate": 2e-05, "loss": 0.02943169, "step": 20294 }, { "epoch": 40.59, "grad_norm": 0.9739797115325928, "learning_rate": 2e-05, "loss": 0.0351387, "step": 20295 }, { "epoch": 40.592, "grad_norm": 1.5896590948104858, "learning_rate": 2e-05, "loss": 0.05730762, "step": 20296 }, { "epoch": 40.594, "grad_norm": 1.2909826040267944, "learning_rate": 2e-05, "loss": 0.05764409, "step": 20297 }, { "epoch": 40.596, "grad_norm": 1.0026129484176636, "learning_rate": 2e-05, "loss": 0.04050486, "step": 20298 }, { "epoch": 40.598, "grad_norm": 1.2445855140686035, "learning_rate": 2e-05, "loss": 0.05117834, "step": 20299 }, { "epoch": 40.6, "grad_norm": 1.2250120639801025, "learning_rate": 2e-05, "loss": 0.04400096, "step": 20300 }, { "epoch": 40.602, "grad_norm": 0.8650736808776855, "learning_rate": 2e-05, "loss": 0.03073621, "step": 20301 }, { "epoch": 40.604, "grad_norm": 1.4635863304138184, "learning_rate": 2e-05, "loss": 0.05701938, "step": 20302 }, { "epoch": 40.606, "grad_norm": 0.9434235692024231, "learning_rate": 2e-05, "loss": 0.03264166, "step": 20303 }, { "epoch": 40.608, "grad_norm": 1.2432655096054077, "learning_rate": 2e-05, "loss": 0.05706852, "step": 20304 }, { "epoch": 40.61, "grad_norm": 0.9455082416534424, "learning_rate": 2e-05, "loss": 0.03301438, "step": 20305 }, { "epoch": 40.612, "grad_norm": 1.4059230089187622, "learning_rate": 2e-05, "loss": 0.03893852, "step": 20306 }, { "epoch": 40.614, "grad_norm": 1.2401387691497803, "learning_rate": 2e-05, "loss": 0.03187525, "step": 20307 }, { "epoch": 40.616, "grad_norm": 1.151043176651001, "learning_rate": 2e-05, "loss": 0.05010274, "step": 20308 }, { "epoch": 40.618, "grad_norm": 1.132710576057434, "learning_rate": 2e-05, "loss": 0.04922212, "step": 20309 }, { "epoch": 40.62, "grad_norm": 1.053712010383606, "learning_rate": 2e-05, "loss": 0.03406072, "step": 20310 }, { "epoch": 40.622, "grad_norm": 1.4546141624450684, "learning_rate": 2e-05, "loss": 0.03321842, "step": 20311 }, { "epoch": 40.624, "grad_norm": 1.8502954244613647, "learning_rate": 2e-05, "loss": 0.05678663, "step": 20312 }, { "epoch": 40.626, "grad_norm": 1.27000093460083, "learning_rate": 2e-05, "loss": 0.05507036, "step": 20313 }, { "epoch": 40.628, "grad_norm": 1.1061609983444214, "learning_rate": 2e-05, "loss": 0.04016719, "step": 20314 }, { "epoch": 40.63, "grad_norm": 1.1528037786483765, "learning_rate": 2e-05, "loss": 0.04462687, "step": 20315 }, { "epoch": 40.632, "grad_norm": 1.1579402685165405, "learning_rate": 2e-05, "loss": 0.04771682, "step": 20316 }, { "epoch": 40.634, "grad_norm": 0.8906006217002869, "learning_rate": 2e-05, "loss": 0.03497612, "step": 20317 }, { "epoch": 40.636, "grad_norm": 1.2519670724868774, "learning_rate": 2e-05, "loss": 0.05451996, "step": 20318 }, { "epoch": 40.638, "grad_norm": 1.175042986869812, "learning_rate": 2e-05, "loss": 0.0437104, "step": 20319 }, { "epoch": 40.64, "grad_norm": 1.1057565212249756, "learning_rate": 2e-05, "loss": 0.03950239, "step": 20320 }, { "epoch": 40.642, "grad_norm": 1.4335005283355713, "learning_rate": 2e-05, "loss": 0.05036731, "step": 20321 }, { "epoch": 40.644, "grad_norm": 0.9865288734436035, "learning_rate": 2e-05, "loss": 0.03005748, "step": 20322 }, { "epoch": 40.646, "grad_norm": 1.6056369543075562, "learning_rate": 2e-05, "loss": 0.03315169, "step": 20323 }, { "epoch": 40.648, "grad_norm": 1.389215350151062, "learning_rate": 2e-05, "loss": 0.0401629, "step": 20324 }, { "epoch": 40.65, "grad_norm": 1.053136944770813, "learning_rate": 2e-05, "loss": 0.03864453, "step": 20325 }, { "epoch": 40.652, "grad_norm": 1.017822265625, "learning_rate": 2e-05, "loss": 0.04037557, "step": 20326 }, { "epoch": 40.654, "grad_norm": 1.096161961555481, "learning_rate": 2e-05, "loss": 0.03881522, "step": 20327 }, { "epoch": 40.656, "grad_norm": 0.8721410632133484, "learning_rate": 2e-05, "loss": 0.03521979, "step": 20328 }, { "epoch": 40.658, "grad_norm": 0.9844672083854675, "learning_rate": 2e-05, "loss": 0.02812034, "step": 20329 }, { "epoch": 40.66, "grad_norm": 1.0463004112243652, "learning_rate": 2e-05, "loss": 0.03988159, "step": 20330 }, { "epoch": 40.662, "grad_norm": 2.0531327724456787, "learning_rate": 2e-05, "loss": 0.04565425, "step": 20331 }, { "epoch": 40.664, "grad_norm": 1.187804102897644, "learning_rate": 2e-05, "loss": 0.03722551, "step": 20332 }, { "epoch": 40.666, "grad_norm": 0.9988803267478943, "learning_rate": 2e-05, "loss": 0.03328416, "step": 20333 }, { "epoch": 40.668, "grad_norm": 3.2058463096618652, "learning_rate": 2e-05, "loss": 0.04477774, "step": 20334 }, { "epoch": 40.67, "grad_norm": 0.9278386235237122, "learning_rate": 2e-05, "loss": 0.03188913, "step": 20335 }, { "epoch": 40.672, "grad_norm": 0.8796897530555725, "learning_rate": 2e-05, "loss": 0.02682002, "step": 20336 }, { "epoch": 40.674, "grad_norm": 1.3731905221939087, "learning_rate": 2e-05, "loss": 0.04300943, "step": 20337 }, { "epoch": 40.676, "grad_norm": 1.0222986936569214, "learning_rate": 2e-05, "loss": 0.045429, "step": 20338 }, { "epoch": 40.678, "grad_norm": 0.9639245867729187, "learning_rate": 2e-05, "loss": 0.03402236, "step": 20339 }, { "epoch": 40.68, "grad_norm": 1.011451244354248, "learning_rate": 2e-05, "loss": 0.03988052, "step": 20340 }, { "epoch": 40.682, "grad_norm": 0.9638349413871765, "learning_rate": 2e-05, "loss": 0.03173074, "step": 20341 }, { "epoch": 40.684, "grad_norm": 1.2763216495513916, "learning_rate": 2e-05, "loss": 0.0467791, "step": 20342 }, { "epoch": 40.686, "grad_norm": 0.9802601337432861, "learning_rate": 2e-05, "loss": 0.03727366, "step": 20343 }, { "epoch": 40.688, "grad_norm": 1.4489079713821411, "learning_rate": 2e-05, "loss": 0.05987734, "step": 20344 }, { "epoch": 40.69, "grad_norm": 1.3176933526992798, "learning_rate": 2e-05, "loss": 0.04923481, "step": 20345 }, { "epoch": 40.692, "grad_norm": 1.661275863647461, "learning_rate": 2e-05, "loss": 0.05209614, "step": 20346 }, { "epoch": 40.694, "grad_norm": 1.256117582321167, "learning_rate": 2e-05, "loss": 0.05711231, "step": 20347 }, { "epoch": 40.696, "grad_norm": 0.8871097564697266, "learning_rate": 2e-05, "loss": 0.03012238, "step": 20348 }, { "epoch": 40.698, "grad_norm": 1.1306544542312622, "learning_rate": 2e-05, "loss": 0.03791804, "step": 20349 }, { "epoch": 40.7, "grad_norm": 1.034216046333313, "learning_rate": 2e-05, "loss": 0.04173868, "step": 20350 }, { "epoch": 40.702, "grad_norm": 1.2503635883331299, "learning_rate": 2e-05, "loss": 0.04013094, "step": 20351 }, { "epoch": 40.704, "grad_norm": 1.7045819759368896, "learning_rate": 2e-05, "loss": 0.05411009, "step": 20352 }, { "epoch": 40.706, "grad_norm": 1.124104619026184, "learning_rate": 2e-05, "loss": 0.04904299, "step": 20353 }, { "epoch": 40.708, "grad_norm": 1.3626705408096313, "learning_rate": 2e-05, "loss": 0.04543092, "step": 20354 }, { "epoch": 40.71, "grad_norm": 1.1447986364364624, "learning_rate": 2e-05, "loss": 0.04599103, "step": 20355 }, { "epoch": 40.712, "grad_norm": 1.231918215751648, "learning_rate": 2e-05, "loss": 0.04845871, "step": 20356 }, { "epoch": 40.714, "grad_norm": 2.2015082836151123, "learning_rate": 2e-05, "loss": 0.04750804, "step": 20357 }, { "epoch": 40.716, "grad_norm": 1.3974066972732544, "learning_rate": 2e-05, "loss": 0.04666973, "step": 20358 }, { "epoch": 40.718, "grad_norm": 1.179800033569336, "learning_rate": 2e-05, "loss": 0.04111189, "step": 20359 }, { "epoch": 40.72, "grad_norm": 1.0869503021240234, "learning_rate": 2e-05, "loss": 0.0396762, "step": 20360 }, { "epoch": 40.722, "grad_norm": 1.3897340297698975, "learning_rate": 2e-05, "loss": 0.04975088, "step": 20361 }, { "epoch": 40.724, "grad_norm": 1.260200023651123, "learning_rate": 2e-05, "loss": 0.05744781, "step": 20362 }, { "epoch": 40.726, "grad_norm": 1.095646619796753, "learning_rate": 2e-05, "loss": 0.02707324, "step": 20363 }, { "epoch": 40.728, "grad_norm": 2.098297357559204, "learning_rate": 2e-05, "loss": 0.05802426, "step": 20364 }, { "epoch": 40.73, "grad_norm": 1.2377839088439941, "learning_rate": 2e-05, "loss": 0.04048099, "step": 20365 }, { "epoch": 40.732, "grad_norm": 1.082080364227295, "learning_rate": 2e-05, "loss": 0.03142042, "step": 20366 }, { "epoch": 40.734, "grad_norm": 1.0381139516830444, "learning_rate": 2e-05, "loss": 0.0383639, "step": 20367 }, { "epoch": 40.736, "grad_norm": 1.1092157363891602, "learning_rate": 2e-05, "loss": 0.04246139, "step": 20368 }, { "epoch": 40.738, "grad_norm": 1.228360652923584, "learning_rate": 2e-05, "loss": 0.05272875, "step": 20369 }, { "epoch": 40.74, "grad_norm": 0.9492017030715942, "learning_rate": 2e-05, "loss": 0.03540252, "step": 20370 }, { "epoch": 40.742, "grad_norm": 1.8718535900115967, "learning_rate": 2e-05, "loss": 0.05970877, "step": 20371 }, { "epoch": 40.744, "grad_norm": 2.2667911052703857, "learning_rate": 2e-05, "loss": 0.04998243, "step": 20372 }, { "epoch": 40.746, "grad_norm": 0.8534602522850037, "learning_rate": 2e-05, "loss": 0.02303358, "step": 20373 }, { "epoch": 40.748, "grad_norm": 1.2956347465515137, "learning_rate": 2e-05, "loss": 0.03930848, "step": 20374 }, { "epoch": 40.75, "grad_norm": 1.1072241067886353, "learning_rate": 2e-05, "loss": 0.04723698, "step": 20375 }, { "epoch": 40.752, "grad_norm": 1.1236096620559692, "learning_rate": 2e-05, "loss": 0.03615674, "step": 20376 }, { "epoch": 40.754, "grad_norm": 1.199188470840454, "learning_rate": 2e-05, "loss": 0.03079618, "step": 20377 }, { "epoch": 40.756, "grad_norm": 1.0769990682601929, "learning_rate": 2e-05, "loss": 0.0315218, "step": 20378 }, { "epoch": 40.758, "grad_norm": 1.4402025938034058, "learning_rate": 2e-05, "loss": 0.04766921, "step": 20379 }, { "epoch": 40.76, "grad_norm": 1.8209091424942017, "learning_rate": 2e-05, "loss": 0.05263519, "step": 20380 }, { "epoch": 40.762, "grad_norm": 0.9779090881347656, "learning_rate": 2e-05, "loss": 0.04188849, "step": 20381 }, { "epoch": 40.764, "grad_norm": 1.2546898126602173, "learning_rate": 2e-05, "loss": 0.04343712, "step": 20382 }, { "epoch": 40.766, "grad_norm": 1.0758841037750244, "learning_rate": 2e-05, "loss": 0.03132466, "step": 20383 }, { "epoch": 40.768, "grad_norm": 0.9773158431053162, "learning_rate": 2e-05, "loss": 0.03976764, "step": 20384 }, { "epoch": 40.77, "grad_norm": 1.0184385776519775, "learning_rate": 2e-05, "loss": 0.02913499, "step": 20385 }, { "epoch": 40.772, "grad_norm": 1.086414098739624, "learning_rate": 2e-05, "loss": 0.04423934, "step": 20386 }, { "epoch": 40.774, "grad_norm": 1.0270086526870728, "learning_rate": 2e-05, "loss": 0.03927654, "step": 20387 }, { "epoch": 40.776, "grad_norm": 5.107845306396484, "learning_rate": 2e-05, "loss": 0.04423544, "step": 20388 }, { "epoch": 40.778, "grad_norm": 1.0064409971237183, "learning_rate": 2e-05, "loss": 0.03523261, "step": 20389 }, { "epoch": 40.78, "grad_norm": 1.9079705476760864, "learning_rate": 2e-05, "loss": 0.04026254, "step": 20390 }, { "epoch": 40.782, "grad_norm": 1.1550371646881104, "learning_rate": 2e-05, "loss": 0.04364779, "step": 20391 }, { "epoch": 40.784, "grad_norm": 1.136885166168213, "learning_rate": 2e-05, "loss": 0.03812756, "step": 20392 }, { "epoch": 40.786, "grad_norm": 1.9610389471054077, "learning_rate": 2e-05, "loss": 0.04168766, "step": 20393 }, { "epoch": 40.788, "grad_norm": 1.1401041746139526, "learning_rate": 2e-05, "loss": 0.05820818, "step": 20394 }, { "epoch": 40.79, "grad_norm": 1.0432277917861938, "learning_rate": 2e-05, "loss": 0.04399939, "step": 20395 }, { "epoch": 40.792, "grad_norm": 1.0911237001419067, "learning_rate": 2e-05, "loss": 0.03605608, "step": 20396 }, { "epoch": 40.794, "grad_norm": 1.2671798467636108, "learning_rate": 2e-05, "loss": 0.03537956, "step": 20397 }, { "epoch": 40.796, "grad_norm": 2.9806571006774902, "learning_rate": 2e-05, "loss": 0.04979237, "step": 20398 }, { "epoch": 40.798, "grad_norm": 1.0320297479629517, "learning_rate": 2e-05, "loss": 0.0366925, "step": 20399 }, { "epoch": 40.8, "grad_norm": 1.0680067539215088, "learning_rate": 2e-05, "loss": 0.04264978, "step": 20400 }, { "epoch": 40.802, "grad_norm": 1.2657119035720825, "learning_rate": 2e-05, "loss": 0.04683827, "step": 20401 }, { "epoch": 40.804, "grad_norm": 1.5166106224060059, "learning_rate": 2e-05, "loss": 0.04532358, "step": 20402 }, { "epoch": 40.806, "grad_norm": 1.6744019985198975, "learning_rate": 2e-05, "loss": 0.04953721, "step": 20403 }, { "epoch": 40.808, "grad_norm": 1.4129294157028198, "learning_rate": 2e-05, "loss": 0.06239362, "step": 20404 }, { "epoch": 40.81, "grad_norm": 1.0602716207504272, "learning_rate": 2e-05, "loss": 0.03704911, "step": 20405 }, { "epoch": 40.812, "grad_norm": 1.2755035161972046, "learning_rate": 2e-05, "loss": 0.06213048, "step": 20406 }, { "epoch": 40.814, "grad_norm": 1.1491618156433105, "learning_rate": 2e-05, "loss": 0.0415276, "step": 20407 }, { "epoch": 40.816, "grad_norm": 1.0170224905014038, "learning_rate": 2e-05, "loss": 0.03638375, "step": 20408 }, { "epoch": 40.818, "grad_norm": 1.0383493900299072, "learning_rate": 2e-05, "loss": 0.04267569, "step": 20409 }, { "epoch": 40.82, "grad_norm": 1.0420001745224, "learning_rate": 2e-05, "loss": 0.03564225, "step": 20410 }, { "epoch": 40.822, "grad_norm": 1.0572384595870972, "learning_rate": 2e-05, "loss": 0.03767185, "step": 20411 }, { "epoch": 40.824, "grad_norm": 2.1216719150543213, "learning_rate": 2e-05, "loss": 0.04272071, "step": 20412 }, { "epoch": 40.826, "grad_norm": 1.0535728931427002, "learning_rate": 2e-05, "loss": 0.04361134, "step": 20413 }, { "epoch": 40.828, "grad_norm": 0.9623695015907288, "learning_rate": 2e-05, "loss": 0.03322486, "step": 20414 }, { "epoch": 40.83, "grad_norm": 1.2571234703063965, "learning_rate": 2e-05, "loss": 0.05839237, "step": 20415 }, { "epoch": 40.832, "grad_norm": 1.230086088180542, "learning_rate": 2e-05, "loss": 0.03829009, "step": 20416 }, { "epoch": 40.834, "grad_norm": 0.9868780970573425, "learning_rate": 2e-05, "loss": 0.04419571, "step": 20417 }, { "epoch": 40.836, "grad_norm": 1.2525875568389893, "learning_rate": 2e-05, "loss": 0.04573279, "step": 20418 }, { "epoch": 40.838, "grad_norm": 1.3724699020385742, "learning_rate": 2e-05, "loss": 0.04285372, "step": 20419 }, { "epoch": 40.84, "grad_norm": 1.6279878616333008, "learning_rate": 2e-05, "loss": 0.05431481, "step": 20420 }, { "epoch": 40.842, "grad_norm": 1.180940866470337, "learning_rate": 2e-05, "loss": 0.03457396, "step": 20421 }, { "epoch": 40.844, "grad_norm": 0.8618980050086975, "learning_rate": 2e-05, "loss": 0.02405167, "step": 20422 }, { "epoch": 40.846, "grad_norm": 0.9395048022270203, "learning_rate": 2e-05, "loss": 0.04073718, "step": 20423 }, { "epoch": 40.848, "grad_norm": 1.3827964067459106, "learning_rate": 2e-05, "loss": 0.05020389, "step": 20424 }, { "epoch": 40.85, "grad_norm": 1.789374828338623, "learning_rate": 2e-05, "loss": 0.0430795, "step": 20425 }, { "epoch": 40.852, "grad_norm": 0.9966129064559937, "learning_rate": 2e-05, "loss": 0.04153362, "step": 20426 }, { "epoch": 40.854, "grad_norm": 1.3448255062103271, "learning_rate": 2e-05, "loss": 0.06270552, "step": 20427 }, { "epoch": 40.856, "grad_norm": 0.9094908833503723, "learning_rate": 2e-05, "loss": 0.0360358, "step": 20428 }, { "epoch": 40.858, "grad_norm": 1.1281365156173706, "learning_rate": 2e-05, "loss": 0.03770623, "step": 20429 }, { "epoch": 40.86, "grad_norm": 1.858094573020935, "learning_rate": 2e-05, "loss": 0.04254647, "step": 20430 }, { "epoch": 40.862, "grad_norm": 1.1025338172912598, "learning_rate": 2e-05, "loss": 0.03372105, "step": 20431 }, { "epoch": 40.864, "grad_norm": 1.0470693111419678, "learning_rate": 2e-05, "loss": 0.03594878, "step": 20432 }, { "epoch": 40.866, "grad_norm": 1.183687686920166, "learning_rate": 2e-05, "loss": 0.04691194, "step": 20433 }, { "epoch": 40.868, "grad_norm": 1.816702961921692, "learning_rate": 2e-05, "loss": 0.07005389, "step": 20434 }, { "epoch": 40.87, "grad_norm": 1.0484319925308228, "learning_rate": 2e-05, "loss": 0.03401724, "step": 20435 }, { "epoch": 40.872, "grad_norm": 1.0516014099121094, "learning_rate": 2e-05, "loss": 0.04603431, "step": 20436 }, { "epoch": 40.874, "grad_norm": 0.9627552032470703, "learning_rate": 2e-05, "loss": 0.04285864, "step": 20437 }, { "epoch": 40.876, "grad_norm": 1.7328182458877563, "learning_rate": 2e-05, "loss": 0.04591936, "step": 20438 }, { "epoch": 40.878, "grad_norm": 3.189844846725464, "learning_rate": 2e-05, "loss": 0.05473142, "step": 20439 }, { "epoch": 40.88, "grad_norm": 1.2837296724319458, "learning_rate": 2e-05, "loss": 0.04808864, "step": 20440 }, { "epoch": 40.882, "grad_norm": 1.0054432153701782, "learning_rate": 2e-05, "loss": 0.04453343, "step": 20441 }, { "epoch": 40.884, "grad_norm": 1.1040476560592651, "learning_rate": 2e-05, "loss": 0.04867121, "step": 20442 }, { "epoch": 40.886, "grad_norm": 1.3341351747512817, "learning_rate": 2e-05, "loss": 0.03911396, "step": 20443 }, { "epoch": 40.888, "grad_norm": 1.0010677576065063, "learning_rate": 2e-05, "loss": 0.03776209, "step": 20444 }, { "epoch": 40.89, "grad_norm": 1.0065970420837402, "learning_rate": 2e-05, "loss": 0.037475, "step": 20445 }, { "epoch": 40.892, "grad_norm": 1.9140510559082031, "learning_rate": 2e-05, "loss": 0.05182716, "step": 20446 }, { "epoch": 40.894, "grad_norm": 1.0341143608093262, "learning_rate": 2e-05, "loss": 0.0312047, "step": 20447 }, { "epoch": 40.896, "grad_norm": 1.0456278324127197, "learning_rate": 2e-05, "loss": 0.03903262, "step": 20448 }, { "epoch": 40.898, "grad_norm": 1.2141480445861816, "learning_rate": 2e-05, "loss": 0.05411732, "step": 20449 }, { "epoch": 40.9, "grad_norm": 1.1321849822998047, "learning_rate": 2e-05, "loss": 0.04777317, "step": 20450 }, { "epoch": 40.902, "grad_norm": 0.8977165222167969, "learning_rate": 2e-05, "loss": 0.03562894, "step": 20451 }, { "epoch": 40.904, "grad_norm": 1.4474440813064575, "learning_rate": 2e-05, "loss": 0.04305565, "step": 20452 }, { "epoch": 40.906, "grad_norm": 1.0124871730804443, "learning_rate": 2e-05, "loss": 0.04430645, "step": 20453 }, { "epoch": 40.908, "grad_norm": 1.0951857566833496, "learning_rate": 2e-05, "loss": 0.05267274, "step": 20454 }, { "epoch": 40.91, "grad_norm": 1.056492567062378, "learning_rate": 2e-05, "loss": 0.03960077, "step": 20455 }, { "epoch": 40.912, "grad_norm": 1.447960376739502, "learning_rate": 2e-05, "loss": 0.05186929, "step": 20456 }, { "epoch": 40.914, "grad_norm": 0.8418309092521667, "learning_rate": 2e-05, "loss": 0.03061183, "step": 20457 }, { "epoch": 40.916, "grad_norm": 0.9745206236839294, "learning_rate": 2e-05, "loss": 0.0293571, "step": 20458 }, { "epoch": 40.918, "grad_norm": 1.304772973060608, "learning_rate": 2e-05, "loss": 0.0519185, "step": 20459 }, { "epoch": 40.92, "grad_norm": 1.78636634349823, "learning_rate": 2e-05, "loss": 0.0445238, "step": 20460 }, { "epoch": 40.922, "grad_norm": 0.9979597926139832, "learning_rate": 2e-05, "loss": 0.04146229, "step": 20461 }, { "epoch": 40.924, "grad_norm": 1.3574515581130981, "learning_rate": 2e-05, "loss": 0.03933012, "step": 20462 }, { "epoch": 40.926, "grad_norm": 1.6258811950683594, "learning_rate": 2e-05, "loss": 0.04539134, "step": 20463 }, { "epoch": 40.928, "grad_norm": 1.1740225553512573, "learning_rate": 2e-05, "loss": 0.06119473, "step": 20464 }, { "epoch": 40.93, "grad_norm": 1.2412153482437134, "learning_rate": 2e-05, "loss": 0.03917379, "step": 20465 }, { "epoch": 40.932, "grad_norm": 1.28252375125885, "learning_rate": 2e-05, "loss": 0.04034043, "step": 20466 }, { "epoch": 40.934, "grad_norm": 1.1120308637619019, "learning_rate": 2e-05, "loss": 0.04544591, "step": 20467 }, { "epoch": 40.936, "grad_norm": 0.901611864566803, "learning_rate": 2e-05, "loss": 0.03679845, "step": 20468 }, { "epoch": 40.938, "grad_norm": 0.7476286292076111, "learning_rate": 2e-05, "loss": 0.02964001, "step": 20469 }, { "epoch": 40.94, "grad_norm": 1.7955683469772339, "learning_rate": 2e-05, "loss": 0.04134537, "step": 20470 }, { "epoch": 40.942, "grad_norm": 1.1641381978988647, "learning_rate": 2e-05, "loss": 0.0516721, "step": 20471 }, { "epoch": 40.944, "grad_norm": 1.11546790599823, "learning_rate": 2e-05, "loss": 0.03695212, "step": 20472 }, { "epoch": 40.946, "grad_norm": 1.0544157028198242, "learning_rate": 2e-05, "loss": 0.04138238, "step": 20473 }, { "epoch": 40.948, "grad_norm": 1.0051969289779663, "learning_rate": 2e-05, "loss": 0.04057425, "step": 20474 }, { "epoch": 40.95, "grad_norm": 1.157500982284546, "learning_rate": 2e-05, "loss": 0.05225903, "step": 20475 }, { "epoch": 40.952, "grad_norm": 0.9261643886566162, "learning_rate": 2e-05, "loss": 0.03012765, "step": 20476 }, { "epoch": 40.954, "grad_norm": 1.2186371088027954, "learning_rate": 2e-05, "loss": 0.05180175, "step": 20477 }, { "epoch": 40.956, "grad_norm": 0.8971396088600159, "learning_rate": 2e-05, "loss": 0.03211908, "step": 20478 }, { "epoch": 40.958, "grad_norm": 0.95932537317276, "learning_rate": 2e-05, "loss": 0.02468094, "step": 20479 }, { "epoch": 40.96, "grad_norm": 1.1031442880630493, "learning_rate": 2e-05, "loss": 0.04307536, "step": 20480 }, { "epoch": 40.962, "grad_norm": 1.4132716655731201, "learning_rate": 2e-05, "loss": 0.06279135, "step": 20481 }, { "epoch": 40.964, "grad_norm": 1.695801854133606, "learning_rate": 2e-05, "loss": 0.0440927, "step": 20482 }, { "epoch": 40.966, "grad_norm": 1.3766049146652222, "learning_rate": 2e-05, "loss": 0.07142211, "step": 20483 }, { "epoch": 40.968, "grad_norm": 1.0872586965560913, "learning_rate": 2e-05, "loss": 0.04075128, "step": 20484 }, { "epoch": 40.97, "grad_norm": 1.5925564765930176, "learning_rate": 2e-05, "loss": 0.03312077, "step": 20485 }, { "epoch": 40.972, "grad_norm": 1.041412353515625, "learning_rate": 2e-05, "loss": 0.03111326, "step": 20486 }, { "epoch": 40.974, "grad_norm": 1.2051217555999756, "learning_rate": 2e-05, "loss": 0.05554478, "step": 20487 }, { "epoch": 40.976, "grad_norm": 0.9818574786186218, "learning_rate": 2e-05, "loss": 0.0446533, "step": 20488 }, { "epoch": 40.978, "grad_norm": 1.0997881889343262, "learning_rate": 2e-05, "loss": 0.05166684, "step": 20489 }, { "epoch": 40.98, "grad_norm": 1.2988731861114502, "learning_rate": 2e-05, "loss": 0.05395452, "step": 20490 }, { "epoch": 40.982, "grad_norm": 1.221101999282837, "learning_rate": 2e-05, "loss": 0.05179112, "step": 20491 }, { "epoch": 40.984, "grad_norm": 1.256781816482544, "learning_rate": 2e-05, "loss": 0.04911101, "step": 20492 }, { "epoch": 40.986, "grad_norm": 1.232703685760498, "learning_rate": 2e-05, "loss": 0.02325133, "step": 20493 }, { "epoch": 40.988, "grad_norm": 1.1104594469070435, "learning_rate": 2e-05, "loss": 0.04168357, "step": 20494 }, { "epoch": 40.99, "grad_norm": 0.9506186842918396, "learning_rate": 2e-05, "loss": 0.04006596, "step": 20495 }, { "epoch": 40.992, "grad_norm": 1.974008560180664, "learning_rate": 2e-05, "loss": 0.04718253, "step": 20496 }, { "epoch": 40.994, "grad_norm": 1.4889172315597534, "learning_rate": 2e-05, "loss": 0.04151162, "step": 20497 }, { "epoch": 40.996, "grad_norm": 0.8789054751396179, "learning_rate": 2e-05, "loss": 0.02894873, "step": 20498 }, { "epoch": 40.998, "grad_norm": 1.4483062028884888, "learning_rate": 2e-05, "loss": 0.05184088, "step": 20499 }, { "epoch": 41.0, "grad_norm": 1.5366476774215698, "learning_rate": 2e-05, "loss": 0.05509026, "step": 20500 }, { "epoch": 41.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.996, "Equal_2": 0.9780439121756487, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9920159680638723, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.994, "Perpendicular_1": 0.996, "Perpendicular_2": 0.992, "Perpendicular_3": 0.8897795591182365, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 226.4051, "eval_samples_per_second": 46.377, "eval_steps_per_second": 0.928, "step": 20500 }, { "epoch": 41.002, "grad_norm": 1.1752159595489502, "learning_rate": 2e-05, "loss": 0.03899279, "step": 20501 }, { "epoch": 41.004, "grad_norm": 0.9792081713676453, "learning_rate": 2e-05, "loss": 0.04593923, "step": 20502 }, { "epoch": 41.006, "grad_norm": 0.9200311899185181, "learning_rate": 2e-05, "loss": 0.02702893, "step": 20503 }, { "epoch": 41.008, "grad_norm": 1.0498852729797363, "learning_rate": 2e-05, "loss": 0.04614846, "step": 20504 }, { "epoch": 41.01, "grad_norm": 1.2764372825622559, "learning_rate": 2e-05, "loss": 0.05538561, "step": 20505 }, { "epoch": 41.012, "grad_norm": 1.2704253196716309, "learning_rate": 2e-05, "loss": 0.03923805, "step": 20506 }, { "epoch": 41.014, "grad_norm": 1.2932484149932861, "learning_rate": 2e-05, "loss": 0.05122381, "step": 20507 }, { "epoch": 41.016, "grad_norm": 1.0859178304672241, "learning_rate": 2e-05, "loss": 0.04439399, "step": 20508 }, { "epoch": 41.018, "grad_norm": 1.0978668928146362, "learning_rate": 2e-05, "loss": 0.04960441, "step": 20509 }, { "epoch": 41.02, "grad_norm": 1.5889049768447876, "learning_rate": 2e-05, "loss": 0.03431728, "step": 20510 }, { "epoch": 41.022, "grad_norm": 0.9453125, "learning_rate": 2e-05, "loss": 0.0500179, "step": 20511 }, { "epoch": 41.024, "grad_norm": 1.9219670295715332, "learning_rate": 2e-05, "loss": 0.03496731, "step": 20512 }, { "epoch": 41.026, "grad_norm": 0.9541072845458984, "learning_rate": 2e-05, "loss": 0.02909797, "step": 20513 }, { "epoch": 41.028, "grad_norm": 1.0788863897323608, "learning_rate": 2e-05, "loss": 0.05720472, "step": 20514 }, { "epoch": 41.03, "grad_norm": 2.2284295558929443, "learning_rate": 2e-05, "loss": 0.05632266, "step": 20515 }, { "epoch": 41.032, "grad_norm": 1.2180745601654053, "learning_rate": 2e-05, "loss": 0.05140374, "step": 20516 }, { "epoch": 41.034, "grad_norm": 1.1025279760360718, "learning_rate": 2e-05, "loss": 0.04529802, "step": 20517 }, { "epoch": 41.036, "grad_norm": 1.345263957977295, "learning_rate": 2e-05, "loss": 0.04551404, "step": 20518 }, { "epoch": 41.038, "grad_norm": 1.0791441202163696, "learning_rate": 2e-05, "loss": 0.02729324, "step": 20519 }, { "epoch": 41.04, "grad_norm": 1.6264829635620117, "learning_rate": 2e-05, "loss": 0.04616492, "step": 20520 }, { "epoch": 41.042, "grad_norm": 1.2201566696166992, "learning_rate": 2e-05, "loss": 0.05348533, "step": 20521 }, { "epoch": 41.044, "grad_norm": 1.4497480392456055, "learning_rate": 2e-05, "loss": 0.04330777, "step": 20522 }, { "epoch": 41.046, "grad_norm": 1.0905252695083618, "learning_rate": 2e-05, "loss": 0.04642132, "step": 20523 }, { "epoch": 41.048, "grad_norm": 1.101463794708252, "learning_rate": 2e-05, "loss": 0.02384229, "step": 20524 }, { "epoch": 41.05, "grad_norm": 0.9577345848083496, "learning_rate": 2e-05, "loss": 0.04101152, "step": 20525 }, { "epoch": 41.052, "grad_norm": 0.8797972798347473, "learning_rate": 2e-05, "loss": 0.03056166, "step": 20526 }, { "epoch": 41.054, "grad_norm": 1.2348829507827759, "learning_rate": 2e-05, "loss": 0.03153551, "step": 20527 }, { "epoch": 41.056, "grad_norm": 1.0962626934051514, "learning_rate": 2e-05, "loss": 0.0503561, "step": 20528 }, { "epoch": 41.058, "grad_norm": 0.9329814314842224, "learning_rate": 2e-05, "loss": 0.04406991, "step": 20529 }, { "epoch": 41.06, "grad_norm": 1.0800416469573975, "learning_rate": 2e-05, "loss": 0.03444954, "step": 20530 }, { "epoch": 41.062, "grad_norm": 1.0321297645568848, "learning_rate": 2e-05, "loss": 0.04285778, "step": 20531 }, { "epoch": 41.064, "grad_norm": 1.6953840255737305, "learning_rate": 2e-05, "loss": 0.06219647, "step": 20532 }, { "epoch": 41.066, "grad_norm": 1.0971157550811768, "learning_rate": 2e-05, "loss": 0.03216353, "step": 20533 }, { "epoch": 41.068, "grad_norm": 1.1552497148513794, "learning_rate": 2e-05, "loss": 0.05734775, "step": 20534 }, { "epoch": 41.07, "grad_norm": 2.304569959640503, "learning_rate": 2e-05, "loss": 0.06357768, "step": 20535 }, { "epoch": 41.072, "grad_norm": 1.5086556673049927, "learning_rate": 2e-05, "loss": 0.0458739, "step": 20536 }, { "epoch": 41.074, "grad_norm": 1.1686569452285767, "learning_rate": 2e-05, "loss": 0.04203654, "step": 20537 }, { "epoch": 41.076, "grad_norm": 3.11472749710083, "learning_rate": 2e-05, "loss": 0.0295115, "step": 20538 }, { "epoch": 41.078, "grad_norm": 0.8483066558837891, "learning_rate": 2e-05, "loss": 0.0356756, "step": 20539 }, { "epoch": 41.08, "grad_norm": 1.066288709640503, "learning_rate": 2e-05, "loss": 0.04389857, "step": 20540 }, { "epoch": 41.082, "grad_norm": 1.6194871664047241, "learning_rate": 2e-05, "loss": 0.05119413, "step": 20541 }, { "epoch": 41.084, "grad_norm": 1.2449779510498047, "learning_rate": 2e-05, "loss": 0.04619698, "step": 20542 }, { "epoch": 41.086, "grad_norm": 0.9331873655319214, "learning_rate": 2e-05, "loss": 0.04620748, "step": 20543 }, { "epoch": 41.088, "grad_norm": 1.256559133529663, "learning_rate": 2e-05, "loss": 0.04425352, "step": 20544 }, { "epoch": 41.09, "grad_norm": 1.0414016246795654, "learning_rate": 2e-05, "loss": 0.03775582, "step": 20545 }, { "epoch": 41.092, "grad_norm": 1.0822330713272095, "learning_rate": 2e-05, "loss": 0.05688352, "step": 20546 }, { "epoch": 41.094, "grad_norm": 1.0823206901550293, "learning_rate": 2e-05, "loss": 0.04782483, "step": 20547 }, { "epoch": 41.096, "grad_norm": 0.8782379031181335, "learning_rate": 2e-05, "loss": 0.03695793, "step": 20548 }, { "epoch": 41.098, "grad_norm": 1.122492790222168, "learning_rate": 2e-05, "loss": 0.04257857, "step": 20549 }, { "epoch": 41.1, "grad_norm": 0.998257040977478, "learning_rate": 2e-05, "loss": 0.03678233, "step": 20550 }, { "epoch": 41.102, "grad_norm": 2.6697378158569336, "learning_rate": 2e-05, "loss": 0.05823192, "step": 20551 }, { "epoch": 41.104, "grad_norm": 1.0141547918319702, "learning_rate": 2e-05, "loss": 0.03868999, "step": 20552 }, { "epoch": 41.106, "grad_norm": 1.1599050760269165, "learning_rate": 2e-05, "loss": 0.05163694, "step": 20553 }, { "epoch": 41.108, "grad_norm": 1.05556321144104, "learning_rate": 2e-05, "loss": 0.04580704, "step": 20554 }, { "epoch": 41.11, "grad_norm": 1.028124213218689, "learning_rate": 2e-05, "loss": 0.04711068, "step": 20555 }, { "epoch": 41.112, "grad_norm": 1.0529882907867432, "learning_rate": 2e-05, "loss": 0.05120657, "step": 20556 }, { "epoch": 41.114, "grad_norm": 1.5760058164596558, "learning_rate": 2e-05, "loss": 0.05620264, "step": 20557 }, { "epoch": 41.116, "grad_norm": 1.2403494119644165, "learning_rate": 2e-05, "loss": 0.04962834, "step": 20558 }, { "epoch": 41.118, "grad_norm": 0.9954540133476257, "learning_rate": 2e-05, "loss": 0.03596257, "step": 20559 }, { "epoch": 41.12, "grad_norm": 1.080809473991394, "learning_rate": 2e-05, "loss": 0.03738142, "step": 20560 }, { "epoch": 41.122, "grad_norm": 1.1035685539245605, "learning_rate": 2e-05, "loss": 0.05174283, "step": 20561 }, { "epoch": 41.124, "grad_norm": 0.9973383545875549, "learning_rate": 2e-05, "loss": 0.03496199, "step": 20562 }, { "epoch": 41.126, "grad_norm": 1.0790691375732422, "learning_rate": 2e-05, "loss": 0.04189206, "step": 20563 }, { "epoch": 41.128, "grad_norm": 1.1612881422042847, "learning_rate": 2e-05, "loss": 0.05318615, "step": 20564 }, { "epoch": 41.13, "grad_norm": 1.1199979782104492, "learning_rate": 2e-05, "loss": 0.04542235, "step": 20565 }, { "epoch": 41.132, "grad_norm": 1.3580987453460693, "learning_rate": 2e-05, "loss": 0.05618509, "step": 20566 }, { "epoch": 41.134, "grad_norm": 1.3449252843856812, "learning_rate": 2e-05, "loss": 0.04725029, "step": 20567 }, { "epoch": 41.136, "grad_norm": 1.1477843523025513, "learning_rate": 2e-05, "loss": 0.04010038, "step": 20568 }, { "epoch": 41.138, "grad_norm": 0.7621534466743469, "learning_rate": 2e-05, "loss": 0.02623776, "step": 20569 }, { "epoch": 41.14, "grad_norm": 1.1275876760482788, "learning_rate": 2e-05, "loss": 0.04556769, "step": 20570 }, { "epoch": 41.142, "grad_norm": 1.151944875717163, "learning_rate": 2e-05, "loss": 0.04492462, "step": 20571 }, { "epoch": 41.144, "grad_norm": 1.3229774236679077, "learning_rate": 2e-05, "loss": 0.05355398, "step": 20572 }, { "epoch": 41.146, "grad_norm": 1.2772973775863647, "learning_rate": 2e-05, "loss": 0.05419679, "step": 20573 }, { "epoch": 41.148, "grad_norm": 0.9079604148864746, "learning_rate": 2e-05, "loss": 0.03263656, "step": 20574 }, { "epoch": 41.15, "grad_norm": 1.1510071754455566, "learning_rate": 2e-05, "loss": 0.04873609, "step": 20575 }, { "epoch": 41.152, "grad_norm": 1.5132709741592407, "learning_rate": 2e-05, "loss": 0.05605687, "step": 20576 }, { "epoch": 41.154, "grad_norm": 2.4248769283294678, "learning_rate": 2e-05, "loss": 0.05075894, "step": 20577 }, { "epoch": 41.156, "grad_norm": 1.040242075920105, "learning_rate": 2e-05, "loss": 0.04025246, "step": 20578 }, { "epoch": 41.158, "grad_norm": 1.104422688484192, "learning_rate": 2e-05, "loss": 0.05011573, "step": 20579 }, { "epoch": 41.16, "grad_norm": 1.0211313962936401, "learning_rate": 2e-05, "loss": 0.02762742, "step": 20580 }, { "epoch": 41.162, "grad_norm": 0.9110880494117737, "learning_rate": 2e-05, "loss": 0.030357, "step": 20581 }, { "epoch": 41.164, "grad_norm": 0.9832329154014587, "learning_rate": 2e-05, "loss": 0.04165021, "step": 20582 }, { "epoch": 41.166, "grad_norm": 0.9635384678840637, "learning_rate": 2e-05, "loss": 0.04531468, "step": 20583 }, { "epoch": 41.168, "grad_norm": 0.9904593825340271, "learning_rate": 2e-05, "loss": 0.03031628, "step": 20584 }, { "epoch": 41.17, "grad_norm": 0.9031853675842285, "learning_rate": 2e-05, "loss": 0.03609793, "step": 20585 }, { "epoch": 41.172, "grad_norm": 1.0318148136138916, "learning_rate": 2e-05, "loss": 0.03451787, "step": 20586 }, { "epoch": 41.174, "grad_norm": 3.0381386280059814, "learning_rate": 2e-05, "loss": 0.05401284, "step": 20587 }, { "epoch": 41.176, "grad_norm": 1.4268327951431274, "learning_rate": 2e-05, "loss": 0.05504726, "step": 20588 }, { "epoch": 41.178, "grad_norm": 1.0400723218917847, "learning_rate": 2e-05, "loss": 0.03523021, "step": 20589 }, { "epoch": 41.18, "grad_norm": 1.0813912153244019, "learning_rate": 2e-05, "loss": 0.0528181, "step": 20590 }, { "epoch": 41.182, "grad_norm": 0.8123153448104858, "learning_rate": 2e-05, "loss": 0.03293756, "step": 20591 }, { "epoch": 41.184, "grad_norm": 0.9970216751098633, "learning_rate": 2e-05, "loss": 0.04257615, "step": 20592 }, { "epoch": 41.186, "grad_norm": 1.540540099143982, "learning_rate": 2e-05, "loss": 0.03905299, "step": 20593 }, { "epoch": 41.188, "grad_norm": 1.072322964668274, "learning_rate": 2e-05, "loss": 0.04118071, "step": 20594 }, { "epoch": 41.19, "grad_norm": 0.9854333400726318, "learning_rate": 2e-05, "loss": 0.03762934, "step": 20595 }, { "epoch": 41.192, "grad_norm": 1.0515074729919434, "learning_rate": 2e-05, "loss": 0.03707988, "step": 20596 }, { "epoch": 41.194, "grad_norm": 1.1947723627090454, "learning_rate": 2e-05, "loss": 0.05113932, "step": 20597 }, { "epoch": 41.196, "grad_norm": 1.0891680717468262, "learning_rate": 2e-05, "loss": 0.04927924, "step": 20598 }, { "epoch": 41.198, "grad_norm": 0.9481536149978638, "learning_rate": 2e-05, "loss": 0.03629138, "step": 20599 }, { "epoch": 41.2, "grad_norm": 0.9856846928596497, "learning_rate": 2e-05, "loss": 0.03278907, "step": 20600 }, { "epoch": 41.202, "grad_norm": 2.11319899559021, "learning_rate": 2e-05, "loss": 0.05194512, "step": 20601 }, { "epoch": 41.204, "grad_norm": 1.0762548446655273, "learning_rate": 2e-05, "loss": 0.04117026, "step": 20602 }, { "epoch": 41.206, "grad_norm": 1.205059289932251, "learning_rate": 2e-05, "loss": 0.04383096, "step": 20603 }, { "epoch": 41.208, "grad_norm": 1.2322077751159668, "learning_rate": 2e-05, "loss": 0.05283987, "step": 20604 }, { "epoch": 41.21, "grad_norm": 1.7733515501022339, "learning_rate": 2e-05, "loss": 0.0541972, "step": 20605 }, { "epoch": 41.212, "grad_norm": 1.3311035633087158, "learning_rate": 2e-05, "loss": 0.05341347, "step": 20606 }, { "epoch": 41.214, "grad_norm": 1.309351921081543, "learning_rate": 2e-05, "loss": 0.05550125, "step": 20607 }, { "epoch": 41.216, "grad_norm": 1.1941022872924805, "learning_rate": 2e-05, "loss": 0.04291428, "step": 20608 }, { "epoch": 41.218, "grad_norm": 1.220374345779419, "learning_rate": 2e-05, "loss": 0.04967535, "step": 20609 }, { "epoch": 41.22, "grad_norm": 0.949939489364624, "learning_rate": 2e-05, "loss": 0.03427394, "step": 20610 }, { "epoch": 41.222, "grad_norm": 1.136901617050171, "learning_rate": 2e-05, "loss": 0.04591855, "step": 20611 }, { "epoch": 41.224, "grad_norm": 1.409404993057251, "learning_rate": 2e-05, "loss": 0.05190839, "step": 20612 }, { "epoch": 41.226, "grad_norm": 1.1430186033248901, "learning_rate": 2e-05, "loss": 0.04684822, "step": 20613 }, { "epoch": 41.228, "grad_norm": 1.1918002367019653, "learning_rate": 2e-05, "loss": 0.06113784, "step": 20614 }, { "epoch": 41.23, "grad_norm": 1.1107643842697144, "learning_rate": 2e-05, "loss": 0.04524104, "step": 20615 }, { "epoch": 41.232, "grad_norm": 1.0151389837265015, "learning_rate": 2e-05, "loss": 0.04117039, "step": 20616 }, { "epoch": 41.234, "grad_norm": 2.2921926975250244, "learning_rate": 2e-05, "loss": 0.0493741, "step": 20617 }, { "epoch": 41.236, "grad_norm": 1.197996735572815, "learning_rate": 2e-05, "loss": 0.02896024, "step": 20618 }, { "epoch": 41.238, "grad_norm": 1.2182585000991821, "learning_rate": 2e-05, "loss": 0.04462679, "step": 20619 }, { "epoch": 41.24, "grad_norm": 1.1110912561416626, "learning_rate": 2e-05, "loss": 0.04487909, "step": 20620 }, { "epoch": 41.242, "grad_norm": 1.156976580619812, "learning_rate": 2e-05, "loss": 0.03767486, "step": 20621 }, { "epoch": 41.244, "grad_norm": 1.3635921478271484, "learning_rate": 2e-05, "loss": 0.0324076, "step": 20622 }, { "epoch": 41.246, "grad_norm": 1.2377933263778687, "learning_rate": 2e-05, "loss": 0.04386463, "step": 20623 }, { "epoch": 41.248, "grad_norm": 1.6715030670166016, "learning_rate": 2e-05, "loss": 0.05209588, "step": 20624 }, { "epoch": 41.25, "grad_norm": 1.8078083992004395, "learning_rate": 2e-05, "loss": 0.054166, "step": 20625 }, { "epoch": 41.252, "grad_norm": 1.0819824934005737, "learning_rate": 2e-05, "loss": 0.0507945, "step": 20626 }, { "epoch": 41.254, "grad_norm": 1.5130536556243896, "learning_rate": 2e-05, "loss": 0.03794984, "step": 20627 }, { "epoch": 41.256, "grad_norm": 1.3413506746292114, "learning_rate": 2e-05, "loss": 0.06259332, "step": 20628 }, { "epoch": 41.258, "grad_norm": 1.5190750360488892, "learning_rate": 2e-05, "loss": 0.03456144, "step": 20629 }, { "epoch": 41.26, "grad_norm": 1.2614845037460327, "learning_rate": 2e-05, "loss": 0.04486147, "step": 20630 }, { "epoch": 41.262, "grad_norm": 1.1689585447311401, "learning_rate": 2e-05, "loss": 0.05254706, "step": 20631 }, { "epoch": 41.264, "grad_norm": 0.9998854398727417, "learning_rate": 2e-05, "loss": 0.0446326, "step": 20632 }, { "epoch": 41.266, "grad_norm": 1.8488187789916992, "learning_rate": 2e-05, "loss": 0.04145909, "step": 20633 }, { "epoch": 41.268, "grad_norm": 1.0122228860855103, "learning_rate": 2e-05, "loss": 0.03260951, "step": 20634 }, { "epoch": 41.27, "grad_norm": 0.8525946140289307, "learning_rate": 2e-05, "loss": 0.0354874, "step": 20635 }, { "epoch": 41.272, "grad_norm": 1.0827605724334717, "learning_rate": 2e-05, "loss": 0.04743937, "step": 20636 }, { "epoch": 41.274, "grad_norm": 1.1498291492462158, "learning_rate": 2e-05, "loss": 0.04752085, "step": 20637 }, { "epoch": 41.276, "grad_norm": 2.417128324508667, "learning_rate": 2e-05, "loss": 0.0421588, "step": 20638 }, { "epoch": 41.278, "grad_norm": 1.2661513090133667, "learning_rate": 2e-05, "loss": 0.0348086, "step": 20639 }, { "epoch": 41.28, "grad_norm": 1.0205615758895874, "learning_rate": 2e-05, "loss": 0.03307385, "step": 20640 }, { "epoch": 41.282, "grad_norm": 1.0398824214935303, "learning_rate": 2e-05, "loss": 0.03663192, "step": 20641 }, { "epoch": 41.284, "grad_norm": 1.3204070329666138, "learning_rate": 2e-05, "loss": 0.06029632, "step": 20642 }, { "epoch": 41.286, "grad_norm": 1.0178258419036865, "learning_rate": 2e-05, "loss": 0.0341905, "step": 20643 }, { "epoch": 41.288, "grad_norm": 1.5648425817489624, "learning_rate": 2e-05, "loss": 0.05868941, "step": 20644 }, { "epoch": 41.29, "grad_norm": 1.488700032234192, "learning_rate": 2e-05, "loss": 0.03783103, "step": 20645 }, { "epoch": 41.292, "grad_norm": 1.0668686628341675, "learning_rate": 2e-05, "loss": 0.04475045, "step": 20646 }, { "epoch": 41.294, "grad_norm": 0.9764789342880249, "learning_rate": 2e-05, "loss": 0.03576362, "step": 20647 }, { "epoch": 41.296, "grad_norm": 1.047412395477295, "learning_rate": 2e-05, "loss": 0.03986953, "step": 20648 }, { "epoch": 41.298, "grad_norm": 0.9892702102661133, "learning_rate": 2e-05, "loss": 0.04575176, "step": 20649 }, { "epoch": 41.3, "grad_norm": 1.2654653787612915, "learning_rate": 2e-05, "loss": 0.06948332, "step": 20650 }, { "epoch": 41.302, "grad_norm": 0.9757238626480103, "learning_rate": 2e-05, "loss": 0.03083865, "step": 20651 }, { "epoch": 41.304, "grad_norm": 1.033657431602478, "learning_rate": 2e-05, "loss": 0.04321042, "step": 20652 }, { "epoch": 41.306, "grad_norm": 0.8696503043174744, "learning_rate": 2e-05, "loss": 0.02460985, "step": 20653 }, { "epoch": 41.308, "grad_norm": 0.9584022164344788, "learning_rate": 2e-05, "loss": 0.04210975, "step": 20654 }, { "epoch": 41.31, "grad_norm": 2.958937168121338, "learning_rate": 2e-05, "loss": 0.06439521, "step": 20655 }, { "epoch": 41.312, "grad_norm": 1.2409955263137817, "learning_rate": 2e-05, "loss": 0.05130623, "step": 20656 }, { "epoch": 41.314, "grad_norm": 0.9272419810295105, "learning_rate": 2e-05, "loss": 0.02946955, "step": 20657 }, { "epoch": 41.316, "grad_norm": 1.2462546825408936, "learning_rate": 2e-05, "loss": 0.04740245, "step": 20658 }, { "epoch": 41.318, "grad_norm": 0.9752036929130554, "learning_rate": 2e-05, "loss": 0.03310652, "step": 20659 }, { "epoch": 41.32, "grad_norm": 1.153749942779541, "learning_rate": 2e-05, "loss": 0.04058155, "step": 20660 }, { "epoch": 41.322, "grad_norm": 1.3941954374313354, "learning_rate": 2e-05, "loss": 0.03330812, "step": 20661 }, { "epoch": 41.324, "grad_norm": 4.4235358238220215, "learning_rate": 2e-05, "loss": 0.04352191, "step": 20662 }, { "epoch": 41.326, "grad_norm": 1.1032634973526, "learning_rate": 2e-05, "loss": 0.04730144, "step": 20663 }, { "epoch": 41.328, "grad_norm": 1.3173725605010986, "learning_rate": 2e-05, "loss": 0.04392434, "step": 20664 }, { "epoch": 41.33, "grad_norm": 1.8754007816314697, "learning_rate": 2e-05, "loss": 0.06019153, "step": 20665 }, { "epoch": 41.332, "grad_norm": 1.1408106088638306, "learning_rate": 2e-05, "loss": 0.04356267, "step": 20666 }, { "epoch": 41.334, "grad_norm": 1.436516284942627, "learning_rate": 2e-05, "loss": 0.0340773, "step": 20667 }, { "epoch": 41.336, "grad_norm": 1.074461817741394, "learning_rate": 2e-05, "loss": 0.03728072, "step": 20668 }, { "epoch": 41.338, "grad_norm": 0.8441005349159241, "learning_rate": 2e-05, "loss": 0.02749786, "step": 20669 }, { "epoch": 41.34, "grad_norm": 1.0273375511169434, "learning_rate": 2e-05, "loss": 0.04149678, "step": 20670 }, { "epoch": 41.342, "grad_norm": 0.9038931131362915, "learning_rate": 2e-05, "loss": 0.0318678, "step": 20671 }, { "epoch": 41.344, "grad_norm": 1.103920340538025, "learning_rate": 2e-05, "loss": 0.03695157, "step": 20672 }, { "epoch": 41.346, "grad_norm": 1.2127559185028076, "learning_rate": 2e-05, "loss": 0.0501823, "step": 20673 }, { "epoch": 41.348, "grad_norm": 1.1173603534698486, "learning_rate": 2e-05, "loss": 0.05356177, "step": 20674 }, { "epoch": 41.35, "grad_norm": 1.7376880645751953, "learning_rate": 2e-05, "loss": 0.04446875, "step": 20675 }, { "epoch": 41.352, "grad_norm": 2.3294689655303955, "learning_rate": 2e-05, "loss": 0.03136361, "step": 20676 }, { "epoch": 41.354, "grad_norm": 1.0580710172653198, "learning_rate": 2e-05, "loss": 0.03815735, "step": 20677 }, { "epoch": 41.356, "grad_norm": 1.1311051845550537, "learning_rate": 2e-05, "loss": 0.04486468, "step": 20678 }, { "epoch": 41.358, "grad_norm": 0.9962402582168579, "learning_rate": 2e-05, "loss": 0.03182549, "step": 20679 }, { "epoch": 41.36, "grad_norm": 1.0098371505737305, "learning_rate": 2e-05, "loss": 0.03638168, "step": 20680 }, { "epoch": 41.362, "grad_norm": 1.0357531309127808, "learning_rate": 2e-05, "loss": 0.0432513, "step": 20681 }, { "epoch": 41.364, "grad_norm": 1.1440521478652954, "learning_rate": 2e-05, "loss": 0.03904852, "step": 20682 }, { "epoch": 41.366, "grad_norm": 1.3614921569824219, "learning_rate": 2e-05, "loss": 0.06886282, "step": 20683 }, { "epoch": 41.368, "grad_norm": 0.9397119283676147, "learning_rate": 2e-05, "loss": 0.0345154, "step": 20684 }, { "epoch": 41.37, "grad_norm": 1.876137375831604, "learning_rate": 2e-05, "loss": 0.03610831, "step": 20685 }, { "epoch": 41.372, "grad_norm": 1.1376396417617798, "learning_rate": 2e-05, "loss": 0.03773122, "step": 20686 }, { "epoch": 41.374, "grad_norm": 3.1791579723358154, "learning_rate": 2e-05, "loss": 0.03760811, "step": 20687 }, { "epoch": 41.376, "grad_norm": 1.2201827764511108, "learning_rate": 2e-05, "loss": 0.03734774, "step": 20688 }, { "epoch": 41.378, "grad_norm": 1.5238878726959229, "learning_rate": 2e-05, "loss": 0.04331724, "step": 20689 }, { "epoch": 41.38, "grad_norm": 0.9636351466178894, "learning_rate": 2e-05, "loss": 0.02805255, "step": 20690 }, { "epoch": 41.382, "grad_norm": 1.2400437593460083, "learning_rate": 2e-05, "loss": 0.04462072, "step": 20691 }, { "epoch": 41.384, "grad_norm": 1.6112223863601685, "learning_rate": 2e-05, "loss": 0.05172848, "step": 20692 }, { "epoch": 41.386, "grad_norm": 0.8748649954795837, "learning_rate": 2e-05, "loss": 0.03488109, "step": 20693 }, { "epoch": 41.388, "grad_norm": 1.3202226161956787, "learning_rate": 2e-05, "loss": 0.03648947, "step": 20694 }, { "epoch": 41.39, "grad_norm": 0.9771537184715271, "learning_rate": 2e-05, "loss": 0.03210083, "step": 20695 }, { "epoch": 41.392, "grad_norm": 0.9823686480522156, "learning_rate": 2e-05, "loss": 0.03255215, "step": 20696 }, { "epoch": 41.394, "grad_norm": 1.1647320985794067, "learning_rate": 2e-05, "loss": 0.03608672, "step": 20697 }, { "epoch": 41.396, "grad_norm": 0.9532713294029236, "learning_rate": 2e-05, "loss": 0.03947323, "step": 20698 }, { "epoch": 41.398, "grad_norm": 1.1587069034576416, "learning_rate": 2e-05, "loss": 0.0508494, "step": 20699 }, { "epoch": 41.4, "grad_norm": 1.1718406677246094, "learning_rate": 2e-05, "loss": 0.05115548, "step": 20700 }, { "epoch": 41.402, "grad_norm": 1.0641463994979858, "learning_rate": 2e-05, "loss": 0.03510954, "step": 20701 }, { "epoch": 41.404, "grad_norm": 0.9249027371406555, "learning_rate": 2e-05, "loss": 0.02431588, "step": 20702 }, { "epoch": 41.406, "grad_norm": 1.4727370738983154, "learning_rate": 2e-05, "loss": 0.0352139, "step": 20703 }, { "epoch": 41.408, "grad_norm": 1.4310762882232666, "learning_rate": 2e-05, "loss": 0.05583765, "step": 20704 }, { "epoch": 41.41, "grad_norm": 1.6732020378112793, "learning_rate": 2e-05, "loss": 0.06241651, "step": 20705 }, { "epoch": 41.412, "grad_norm": 2.2327206134796143, "learning_rate": 2e-05, "loss": 0.03740877, "step": 20706 }, { "epoch": 41.414, "grad_norm": 0.9565412998199463, "learning_rate": 2e-05, "loss": 0.03825046, "step": 20707 }, { "epoch": 41.416, "grad_norm": 1.3158801794052124, "learning_rate": 2e-05, "loss": 0.05537631, "step": 20708 }, { "epoch": 41.418, "grad_norm": 1.242192029953003, "learning_rate": 2e-05, "loss": 0.04609145, "step": 20709 }, { "epoch": 41.42, "grad_norm": 0.9001498222351074, "learning_rate": 2e-05, "loss": 0.02980818, "step": 20710 }, { "epoch": 41.422, "grad_norm": 1.0207964181900024, "learning_rate": 2e-05, "loss": 0.03326299, "step": 20711 }, { "epoch": 41.424, "grad_norm": 1.2749321460723877, "learning_rate": 2e-05, "loss": 0.06560112, "step": 20712 }, { "epoch": 41.426, "grad_norm": 1.305778980255127, "learning_rate": 2e-05, "loss": 0.04633905, "step": 20713 }, { "epoch": 41.428, "grad_norm": 1.5584181547164917, "learning_rate": 2e-05, "loss": 0.04485441, "step": 20714 }, { "epoch": 41.43, "grad_norm": 1.2931816577911377, "learning_rate": 2e-05, "loss": 0.04488099, "step": 20715 }, { "epoch": 41.432, "grad_norm": 1.17573881149292, "learning_rate": 2e-05, "loss": 0.04513192, "step": 20716 }, { "epoch": 41.434, "grad_norm": 1.1898726224899292, "learning_rate": 2e-05, "loss": 0.05149073, "step": 20717 }, { "epoch": 41.436, "grad_norm": 0.9323304891586304, "learning_rate": 2e-05, "loss": 0.03184193, "step": 20718 }, { "epoch": 41.438, "grad_norm": 1.4126533269882202, "learning_rate": 2e-05, "loss": 0.059554, "step": 20719 }, { "epoch": 41.44, "grad_norm": 0.9055007696151733, "learning_rate": 2e-05, "loss": 0.03545699, "step": 20720 }, { "epoch": 41.442, "grad_norm": 1.1232517957687378, "learning_rate": 2e-05, "loss": 0.04234304, "step": 20721 }, { "epoch": 41.444, "grad_norm": 2.0118048191070557, "learning_rate": 2e-05, "loss": 0.05148489, "step": 20722 }, { "epoch": 41.446, "grad_norm": 1.0636597871780396, "learning_rate": 2e-05, "loss": 0.05337211, "step": 20723 }, { "epoch": 41.448, "grad_norm": 1.2172831296920776, "learning_rate": 2e-05, "loss": 0.04693911, "step": 20724 }, { "epoch": 41.45, "grad_norm": 0.984148383140564, "learning_rate": 2e-05, "loss": 0.04481329, "step": 20725 }, { "epoch": 41.452, "grad_norm": 1.1510989665985107, "learning_rate": 2e-05, "loss": 0.03622846, "step": 20726 }, { "epoch": 41.454, "grad_norm": 1.4569380283355713, "learning_rate": 2e-05, "loss": 0.04784452, "step": 20727 }, { "epoch": 41.456, "grad_norm": 1.2640044689178467, "learning_rate": 2e-05, "loss": 0.03096916, "step": 20728 }, { "epoch": 41.458, "grad_norm": 1.239412784576416, "learning_rate": 2e-05, "loss": 0.04210846, "step": 20729 }, { "epoch": 41.46, "grad_norm": 0.955824077129364, "learning_rate": 2e-05, "loss": 0.04110126, "step": 20730 }, { "epoch": 41.462, "grad_norm": 1.167603850364685, "learning_rate": 2e-05, "loss": 0.04445145, "step": 20731 }, { "epoch": 41.464, "grad_norm": 1.7117555141448975, "learning_rate": 2e-05, "loss": 0.04161194, "step": 20732 }, { "epoch": 41.466, "grad_norm": 2.15871000289917, "learning_rate": 2e-05, "loss": 0.04580892, "step": 20733 }, { "epoch": 41.468, "grad_norm": 0.9395105242729187, "learning_rate": 2e-05, "loss": 0.03346226, "step": 20734 }, { "epoch": 41.47, "grad_norm": 1.4968886375427246, "learning_rate": 2e-05, "loss": 0.04359198, "step": 20735 }, { "epoch": 41.472, "grad_norm": 0.8363569974899292, "learning_rate": 2e-05, "loss": 0.02399542, "step": 20736 }, { "epoch": 41.474, "grad_norm": 1.2012196779251099, "learning_rate": 2e-05, "loss": 0.03669053, "step": 20737 }, { "epoch": 41.476, "grad_norm": 1.4188225269317627, "learning_rate": 2e-05, "loss": 0.03868242, "step": 20738 }, { "epoch": 41.478, "grad_norm": 1.0157170295715332, "learning_rate": 2e-05, "loss": 0.04028923, "step": 20739 }, { "epoch": 41.48, "grad_norm": 2.330000162124634, "learning_rate": 2e-05, "loss": 0.03906672, "step": 20740 }, { "epoch": 41.482, "grad_norm": 0.9743142127990723, "learning_rate": 2e-05, "loss": 0.02972276, "step": 20741 }, { "epoch": 41.484, "grad_norm": 1.8396464586257935, "learning_rate": 2e-05, "loss": 0.06036944, "step": 20742 }, { "epoch": 41.486, "grad_norm": 0.8992801904678345, "learning_rate": 2e-05, "loss": 0.02712942, "step": 20743 }, { "epoch": 41.488, "grad_norm": 0.9363061785697937, "learning_rate": 2e-05, "loss": 0.03278707, "step": 20744 }, { "epoch": 41.49, "grad_norm": 1.4655288457870483, "learning_rate": 2e-05, "loss": 0.04940777, "step": 20745 }, { "epoch": 41.492, "grad_norm": 0.9582664370536804, "learning_rate": 2e-05, "loss": 0.03304306, "step": 20746 }, { "epoch": 41.494, "grad_norm": 1.1195136308670044, "learning_rate": 2e-05, "loss": 0.039149, "step": 20747 }, { "epoch": 41.496, "grad_norm": 0.9670563340187073, "learning_rate": 2e-05, "loss": 0.03032694, "step": 20748 }, { "epoch": 41.498, "grad_norm": 1.4808894395828247, "learning_rate": 2e-05, "loss": 0.04291428, "step": 20749 }, { "epoch": 41.5, "grad_norm": 1.3408920764923096, "learning_rate": 2e-05, "loss": 0.03967678, "step": 20750 }, { "epoch": 41.502, "grad_norm": 1.1638193130493164, "learning_rate": 2e-05, "loss": 0.05072079, "step": 20751 }, { "epoch": 41.504, "grad_norm": 1.1642886400222778, "learning_rate": 2e-05, "loss": 0.03893583, "step": 20752 }, { "epoch": 41.506, "grad_norm": 0.9524499773979187, "learning_rate": 2e-05, "loss": 0.03851137, "step": 20753 }, { "epoch": 41.508, "grad_norm": 1.1314964294433594, "learning_rate": 2e-05, "loss": 0.05143006, "step": 20754 }, { "epoch": 41.51, "grad_norm": 0.8275223970413208, "learning_rate": 2e-05, "loss": 0.02350607, "step": 20755 }, { "epoch": 41.512, "grad_norm": 1.5361905097961426, "learning_rate": 2e-05, "loss": 0.04791206, "step": 20756 }, { "epoch": 41.514, "grad_norm": 1.4745361804962158, "learning_rate": 2e-05, "loss": 0.04464521, "step": 20757 }, { "epoch": 41.516, "grad_norm": 1.1392444372177124, "learning_rate": 2e-05, "loss": 0.04364313, "step": 20758 }, { "epoch": 41.518, "grad_norm": 1.4460992813110352, "learning_rate": 2e-05, "loss": 0.03048528, "step": 20759 }, { "epoch": 41.52, "grad_norm": 1.8597825765609741, "learning_rate": 2e-05, "loss": 0.03991501, "step": 20760 }, { "epoch": 41.522, "grad_norm": 1.0091418027877808, "learning_rate": 2e-05, "loss": 0.03937285, "step": 20761 }, { "epoch": 41.524, "grad_norm": 1.0891869068145752, "learning_rate": 2e-05, "loss": 0.04469266, "step": 20762 }, { "epoch": 41.526, "grad_norm": 0.9165951609611511, "learning_rate": 2e-05, "loss": 0.03308809, "step": 20763 }, { "epoch": 41.528, "grad_norm": 1.7730425596237183, "learning_rate": 2e-05, "loss": 0.06262781, "step": 20764 }, { "epoch": 41.53, "grad_norm": 0.9417017698287964, "learning_rate": 2e-05, "loss": 0.03575491, "step": 20765 }, { "epoch": 41.532, "grad_norm": 1.0658694505691528, "learning_rate": 2e-05, "loss": 0.04831742, "step": 20766 }, { "epoch": 41.534, "grad_norm": 1.0815049409866333, "learning_rate": 2e-05, "loss": 0.04623729, "step": 20767 }, { "epoch": 41.536, "grad_norm": 5.615219593048096, "learning_rate": 2e-05, "loss": 0.06060535, "step": 20768 }, { "epoch": 41.538, "grad_norm": 0.9815958142280579, "learning_rate": 2e-05, "loss": 0.03935281, "step": 20769 }, { "epoch": 41.54, "grad_norm": 1.3838926553726196, "learning_rate": 2e-05, "loss": 0.04692396, "step": 20770 }, { "epoch": 41.542, "grad_norm": 0.9471161961555481, "learning_rate": 2e-05, "loss": 0.03126211, "step": 20771 }, { "epoch": 41.544, "grad_norm": 1.6355572938919067, "learning_rate": 2e-05, "loss": 0.05914957, "step": 20772 }, { "epoch": 41.546, "grad_norm": 0.9628200531005859, "learning_rate": 2e-05, "loss": 0.03841096, "step": 20773 }, { "epoch": 41.548, "grad_norm": 1.3092021942138672, "learning_rate": 2e-05, "loss": 0.0363021, "step": 20774 }, { "epoch": 41.55, "grad_norm": 1.1557202339172363, "learning_rate": 2e-05, "loss": 0.03859261, "step": 20775 }, { "epoch": 41.552, "grad_norm": 1.7048753499984741, "learning_rate": 2e-05, "loss": 0.04564775, "step": 20776 }, { "epoch": 41.554, "grad_norm": 1.1887868642807007, "learning_rate": 2e-05, "loss": 0.0470032, "step": 20777 }, { "epoch": 41.556, "grad_norm": 1.4324625730514526, "learning_rate": 2e-05, "loss": 0.04969774, "step": 20778 }, { "epoch": 41.558, "grad_norm": 1.5543866157531738, "learning_rate": 2e-05, "loss": 0.04761218, "step": 20779 }, { "epoch": 41.56, "grad_norm": 1.446258783340454, "learning_rate": 2e-05, "loss": 0.04409954, "step": 20780 }, { "epoch": 41.562, "grad_norm": 1.793944001197815, "learning_rate": 2e-05, "loss": 0.03818799, "step": 20781 }, { "epoch": 41.564, "grad_norm": 1.148276686668396, "learning_rate": 2e-05, "loss": 0.05467147, "step": 20782 }, { "epoch": 41.566, "grad_norm": 1.350179672241211, "learning_rate": 2e-05, "loss": 0.05654674, "step": 20783 }, { "epoch": 41.568, "grad_norm": 2.61437726020813, "learning_rate": 2e-05, "loss": 0.04888039, "step": 20784 }, { "epoch": 41.57, "grad_norm": 2.044057607650757, "learning_rate": 2e-05, "loss": 0.04739808, "step": 20785 }, { "epoch": 41.572, "grad_norm": 3.246558904647827, "learning_rate": 2e-05, "loss": 0.04292437, "step": 20786 }, { "epoch": 41.574, "grad_norm": 1.3394795656204224, "learning_rate": 2e-05, "loss": 0.05286704, "step": 20787 }, { "epoch": 41.576, "grad_norm": 1.1341111660003662, "learning_rate": 2e-05, "loss": 0.03911719, "step": 20788 }, { "epoch": 41.578, "grad_norm": 1.082596778869629, "learning_rate": 2e-05, "loss": 0.0426783, "step": 20789 }, { "epoch": 41.58, "grad_norm": 1.0886067152023315, "learning_rate": 2e-05, "loss": 0.03294317, "step": 20790 }, { "epoch": 41.582, "grad_norm": 1.0874122381210327, "learning_rate": 2e-05, "loss": 0.03647999, "step": 20791 }, { "epoch": 41.584, "grad_norm": 1.662583589553833, "learning_rate": 2e-05, "loss": 0.0584047, "step": 20792 }, { "epoch": 41.586, "grad_norm": 1.6352686882019043, "learning_rate": 2e-05, "loss": 0.05701789, "step": 20793 }, { "epoch": 41.588, "grad_norm": 1.0350384712219238, "learning_rate": 2e-05, "loss": 0.03102995, "step": 20794 }, { "epoch": 41.59, "grad_norm": 0.9820771217346191, "learning_rate": 2e-05, "loss": 0.03655577, "step": 20795 }, { "epoch": 41.592, "grad_norm": 1.1517174243927002, "learning_rate": 2e-05, "loss": 0.04654104, "step": 20796 }, { "epoch": 41.594, "grad_norm": 1.323981523513794, "learning_rate": 2e-05, "loss": 0.03900374, "step": 20797 }, { "epoch": 41.596, "grad_norm": 0.9745594263076782, "learning_rate": 2e-05, "loss": 0.04128006, "step": 20798 }, { "epoch": 41.598, "grad_norm": 1.2357794046401978, "learning_rate": 2e-05, "loss": 0.04828186, "step": 20799 }, { "epoch": 41.6, "grad_norm": 1.5480984449386597, "learning_rate": 2e-05, "loss": 0.04227243, "step": 20800 }, { "epoch": 41.602, "grad_norm": 2.931015968322754, "learning_rate": 2e-05, "loss": 0.03564053, "step": 20801 }, { "epoch": 41.604, "grad_norm": 0.997758150100708, "learning_rate": 2e-05, "loss": 0.02863617, "step": 20802 }, { "epoch": 41.606, "grad_norm": 1.4064468145370483, "learning_rate": 2e-05, "loss": 0.04132503, "step": 20803 }, { "epoch": 41.608, "grad_norm": 1.7065881490707397, "learning_rate": 2e-05, "loss": 0.05113444, "step": 20804 }, { "epoch": 41.61, "grad_norm": 1.0230133533477783, "learning_rate": 2e-05, "loss": 0.04917635, "step": 20805 }, { "epoch": 41.612, "grad_norm": 1.1202157735824585, "learning_rate": 2e-05, "loss": 0.03878956, "step": 20806 }, { "epoch": 41.614, "grad_norm": 0.9461963772773743, "learning_rate": 2e-05, "loss": 0.03378967, "step": 20807 }, { "epoch": 41.616, "grad_norm": 1.0084055662155151, "learning_rate": 2e-05, "loss": 0.03634596, "step": 20808 }, { "epoch": 41.618, "grad_norm": 1.1045643091201782, "learning_rate": 2e-05, "loss": 0.04202381, "step": 20809 }, { "epoch": 41.62, "grad_norm": 2.6237292289733887, "learning_rate": 2e-05, "loss": 0.04369119, "step": 20810 }, { "epoch": 41.622, "grad_norm": 0.849744439125061, "learning_rate": 2e-05, "loss": 0.03903548, "step": 20811 }, { "epoch": 41.624, "grad_norm": 1.2093498706817627, "learning_rate": 2e-05, "loss": 0.0482057, "step": 20812 }, { "epoch": 41.626, "grad_norm": 1.0627022981643677, "learning_rate": 2e-05, "loss": 0.04712895, "step": 20813 }, { "epoch": 41.628, "grad_norm": 0.9130725264549255, "learning_rate": 2e-05, "loss": 0.02564049, "step": 20814 }, { "epoch": 41.63, "grad_norm": 1.5451396703720093, "learning_rate": 2e-05, "loss": 0.04636061, "step": 20815 }, { "epoch": 41.632, "grad_norm": 1.9981565475463867, "learning_rate": 2e-05, "loss": 0.05007055, "step": 20816 }, { "epoch": 41.634, "grad_norm": 0.9765691161155701, "learning_rate": 2e-05, "loss": 0.02837576, "step": 20817 }, { "epoch": 41.636, "grad_norm": 1.0249717235565186, "learning_rate": 2e-05, "loss": 0.04148117, "step": 20818 }, { "epoch": 41.638, "grad_norm": 1.2271143198013306, "learning_rate": 2e-05, "loss": 0.04340891, "step": 20819 }, { "epoch": 41.64, "grad_norm": 1.133543848991394, "learning_rate": 2e-05, "loss": 0.03988294, "step": 20820 }, { "epoch": 41.642, "grad_norm": 1.3893015384674072, "learning_rate": 2e-05, "loss": 0.05528411, "step": 20821 }, { "epoch": 41.644, "grad_norm": 1.1386867761611938, "learning_rate": 2e-05, "loss": 0.05518597, "step": 20822 }, { "epoch": 41.646, "grad_norm": 1.8968818187713623, "learning_rate": 2e-05, "loss": 0.04474929, "step": 20823 }, { "epoch": 41.648, "grad_norm": 1.8063867092132568, "learning_rate": 2e-05, "loss": 0.05708254, "step": 20824 }, { "epoch": 41.65, "grad_norm": 1.022626280784607, "learning_rate": 2e-05, "loss": 0.04096928, "step": 20825 }, { "epoch": 41.652, "grad_norm": 0.9220181107521057, "learning_rate": 2e-05, "loss": 0.03198778, "step": 20826 }, { "epoch": 41.654, "grad_norm": 1.0199060440063477, "learning_rate": 2e-05, "loss": 0.04393545, "step": 20827 }, { "epoch": 41.656, "grad_norm": 1.1878931522369385, "learning_rate": 2e-05, "loss": 0.05717408, "step": 20828 }, { "epoch": 41.658, "grad_norm": 1.1951138973236084, "learning_rate": 2e-05, "loss": 0.04063776, "step": 20829 }, { "epoch": 41.66, "grad_norm": 2.176218032836914, "learning_rate": 2e-05, "loss": 0.04411135, "step": 20830 }, { "epoch": 41.662, "grad_norm": 1.0734684467315674, "learning_rate": 2e-05, "loss": 0.03676351, "step": 20831 }, { "epoch": 41.664, "grad_norm": 1.0029239654541016, "learning_rate": 2e-05, "loss": 0.03883167, "step": 20832 }, { "epoch": 41.666, "grad_norm": 1.103541612625122, "learning_rate": 2e-05, "loss": 0.04248177, "step": 20833 }, { "epoch": 41.668, "grad_norm": 0.8479019999504089, "learning_rate": 2e-05, "loss": 0.0269597, "step": 20834 }, { "epoch": 41.67, "grad_norm": 0.9064362645149231, "learning_rate": 2e-05, "loss": 0.02488416, "step": 20835 }, { "epoch": 41.672, "grad_norm": 1.1712408065795898, "learning_rate": 2e-05, "loss": 0.0446885, "step": 20836 }, { "epoch": 41.674, "grad_norm": 1.0608561038970947, "learning_rate": 2e-05, "loss": 0.03930969, "step": 20837 }, { "epoch": 41.676, "grad_norm": 1.1592738628387451, "learning_rate": 2e-05, "loss": 0.05664834, "step": 20838 }, { "epoch": 41.678, "grad_norm": 1.1736304759979248, "learning_rate": 2e-05, "loss": 0.04376283, "step": 20839 }, { "epoch": 41.68, "grad_norm": 0.9427878856658936, "learning_rate": 2e-05, "loss": 0.04179813, "step": 20840 }, { "epoch": 41.682, "grad_norm": 1.2310322523117065, "learning_rate": 2e-05, "loss": 0.03930753, "step": 20841 }, { "epoch": 41.684, "grad_norm": 1.094673752784729, "learning_rate": 2e-05, "loss": 0.04307689, "step": 20842 }, { "epoch": 41.686, "grad_norm": 1.4194506406784058, "learning_rate": 2e-05, "loss": 0.05326529, "step": 20843 }, { "epoch": 41.688, "grad_norm": 1.287922739982605, "learning_rate": 2e-05, "loss": 0.04562788, "step": 20844 }, { "epoch": 41.69, "grad_norm": 1.230519413948059, "learning_rate": 2e-05, "loss": 0.04851618, "step": 20845 }, { "epoch": 41.692, "grad_norm": 1.1211730241775513, "learning_rate": 2e-05, "loss": 0.04277118, "step": 20846 }, { "epoch": 41.694, "grad_norm": 0.8667340278625488, "learning_rate": 2e-05, "loss": 0.03346185, "step": 20847 }, { "epoch": 41.696, "grad_norm": 1.2957584857940674, "learning_rate": 2e-05, "loss": 0.05518442, "step": 20848 }, { "epoch": 41.698, "grad_norm": 1.406126856803894, "learning_rate": 2e-05, "loss": 0.03924958, "step": 20849 }, { "epoch": 41.7, "grad_norm": 1.0627779960632324, "learning_rate": 2e-05, "loss": 0.04351301, "step": 20850 }, { "epoch": 41.702, "grad_norm": 1.552672028541565, "learning_rate": 2e-05, "loss": 0.04476134, "step": 20851 }, { "epoch": 41.704, "grad_norm": 1.3620758056640625, "learning_rate": 2e-05, "loss": 0.04386076, "step": 20852 }, { "epoch": 41.706, "grad_norm": 1.2800829410552979, "learning_rate": 2e-05, "loss": 0.05514955, "step": 20853 }, { "epoch": 41.708, "grad_norm": 1.3499983549118042, "learning_rate": 2e-05, "loss": 0.05110916, "step": 20854 }, { "epoch": 41.71, "grad_norm": 1.1043922901153564, "learning_rate": 2e-05, "loss": 0.04525407, "step": 20855 }, { "epoch": 41.712, "grad_norm": 0.9730817675590515, "learning_rate": 2e-05, "loss": 0.03713318, "step": 20856 }, { "epoch": 41.714, "grad_norm": 1.1892083883285522, "learning_rate": 2e-05, "loss": 0.06424329, "step": 20857 }, { "epoch": 41.716, "grad_norm": 1.120700716972351, "learning_rate": 2e-05, "loss": 0.03769391, "step": 20858 }, { "epoch": 41.718, "grad_norm": 1.0524564981460571, "learning_rate": 2e-05, "loss": 0.03986463, "step": 20859 }, { "epoch": 41.72, "grad_norm": 1.3511396646499634, "learning_rate": 2e-05, "loss": 0.04143517, "step": 20860 }, { "epoch": 41.722, "grad_norm": 1.1678919792175293, "learning_rate": 2e-05, "loss": 0.05457103, "step": 20861 }, { "epoch": 41.724, "grad_norm": 1.2981938123703003, "learning_rate": 2e-05, "loss": 0.04231258, "step": 20862 }, { "epoch": 41.726, "grad_norm": 1.0791761875152588, "learning_rate": 2e-05, "loss": 0.04140547, "step": 20863 }, { "epoch": 41.728, "grad_norm": 0.9181410074234009, "learning_rate": 2e-05, "loss": 0.02735843, "step": 20864 }, { "epoch": 41.73, "grad_norm": 1.0648291110992432, "learning_rate": 2e-05, "loss": 0.04785656, "step": 20865 }, { "epoch": 41.732, "grad_norm": 0.9138433337211609, "learning_rate": 2e-05, "loss": 0.03066853, "step": 20866 }, { "epoch": 41.734, "grad_norm": 1.181591272354126, "learning_rate": 2e-05, "loss": 0.04857711, "step": 20867 }, { "epoch": 41.736, "grad_norm": 1.5955355167388916, "learning_rate": 2e-05, "loss": 0.05916894, "step": 20868 }, { "epoch": 41.738, "grad_norm": 1.0371613502502441, "learning_rate": 2e-05, "loss": 0.05294357, "step": 20869 }, { "epoch": 41.74, "grad_norm": 1.0835472345352173, "learning_rate": 2e-05, "loss": 0.0469273, "step": 20870 }, { "epoch": 41.742, "grad_norm": 1.1428213119506836, "learning_rate": 2e-05, "loss": 0.03998315, "step": 20871 }, { "epoch": 41.744, "grad_norm": 0.974422037601471, "learning_rate": 2e-05, "loss": 0.03290579, "step": 20872 }, { "epoch": 41.746, "grad_norm": 1.1686307191848755, "learning_rate": 2e-05, "loss": 0.0401299, "step": 20873 }, { "epoch": 41.748, "grad_norm": 0.939756453037262, "learning_rate": 2e-05, "loss": 0.03547032, "step": 20874 }, { "epoch": 41.75, "grad_norm": 1.3355270624160767, "learning_rate": 2e-05, "loss": 0.05449144, "step": 20875 }, { "epoch": 41.752, "grad_norm": 1.930415391921997, "learning_rate": 2e-05, "loss": 0.0459896, "step": 20876 }, { "epoch": 41.754, "grad_norm": 1.2636489868164062, "learning_rate": 2e-05, "loss": 0.04372173, "step": 20877 }, { "epoch": 41.756, "grad_norm": 0.951036810874939, "learning_rate": 2e-05, "loss": 0.03102885, "step": 20878 }, { "epoch": 41.758, "grad_norm": 1.0059030055999756, "learning_rate": 2e-05, "loss": 0.0447848, "step": 20879 }, { "epoch": 41.76, "grad_norm": 1.3128039836883545, "learning_rate": 2e-05, "loss": 0.05759876, "step": 20880 }, { "epoch": 41.762, "grad_norm": 0.9333274960517883, "learning_rate": 2e-05, "loss": 0.03614888, "step": 20881 }, { "epoch": 41.764, "grad_norm": 1.0617960691452026, "learning_rate": 2e-05, "loss": 0.04992104, "step": 20882 }, { "epoch": 41.766, "grad_norm": 1.1370936632156372, "learning_rate": 2e-05, "loss": 0.04910323, "step": 20883 }, { "epoch": 41.768, "grad_norm": 1.4621853828430176, "learning_rate": 2e-05, "loss": 0.05997195, "step": 20884 }, { "epoch": 41.77, "grad_norm": 2.124908685684204, "learning_rate": 2e-05, "loss": 0.05527842, "step": 20885 }, { "epoch": 41.772, "grad_norm": 1.221514344215393, "learning_rate": 2e-05, "loss": 0.04463506, "step": 20886 }, { "epoch": 41.774, "grad_norm": 1.7790493965148926, "learning_rate": 2e-05, "loss": 0.05189645, "step": 20887 }, { "epoch": 41.776, "grad_norm": 1.173425555229187, "learning_rate": 2e-05, "loss": 0.04163071, "step": 20888 }, { "epoch": 41.778, "grad_norm": 1.3737986087799072, "learning_rate": 2e-05, "loss": 0.05653045, "step": 20889 }, { "epoch": 41.78, "grad_norm": 1.0248064994812012, "learning_rate": 2e-05, "loss": 0.04548271, "step": 20890 }, { "epoch": 41.782, "grad_norm": 1.1362992525100708, "learning_rate": 2e-05, "loss": 0.05279531, "step": 20891 }, { "epoch": 41.784, "grad_norm": 0.9178828597068787, "learning_rate": 2e-05, "loss": 0.03632183, "step": 20892 }, { "epoch": 41.786, "grad_norm": 1.2301510572433472, "learning_rate": 2e-05, "loss": 0.06399968, "step": 20893 }, { "epoch": 41.788, "grad_norm": 1.7078642845153809, "learning_rate": 2e-05, "loss": 0.04200523, "step": 20894 }, { "epoch": 41.79, "grad_norm": 1.1228187084197998, "learning_rate": 2e-05, "loss": 0.04934014, "step": 20895 }, { "epoch": 41.792, "grad_norm": 1.0995523929595947, "learning_rate": 2e-05, "loss": 0.0416133, "step": 20896 }, { "epoch": 41.794, "grad_norm": 1.058327078819275, "learning_rate": 2e-05, "loss": 0.03580619, "step": 20897 }, { "epoch": 41.796, "grad_norm": 1.1876635551452637, "learning_rate": 2e-05, "loss": 0.04260703, "step": 20898 }, { "epoch": 41.798, "grad_norm": 1.1622138023376465, "learning_rate": 2e-05, "loss": 0.05246913, "step": 20899 }, { "epoch": 41.8, "grad_norm": 1.0099878311157227, "learning_rate": 2e-05, "loss": 0.03317257, "step": 20900 }, { "epoch": 41.802, "grad_norm": 0.9585049152374268, "learning_rate": 2e-05, "loss": 0.04319292, "step": 20901 }, { "epoch": 41.804, "grad_norm": 0.9762246608734131, "learning_rate": 2e-05, "loss": 0.03809467, "step": 20902 }, { "epoch": 41.806, "grad_norm": 1.2723373174667358, "learning_rate": 2e-05, "loss": 0.0479826, "step": 20903 }, { "epoch": 41.808, "grad_norm": 1.0886563062667847, "learning_rate": 2e-05, "loss": 0.04964135, "step": 20904 }, { "epoch": 41.81, "grad_norm": 1.202476143836975, "learning_rate": 2e-05, "loss": 0.04373112, "step": 20905 }, { "epoch": 41.812, "grad_norm": 1.1264097690582275, "learning_rate": 2e-05, "loss": 0.05073802, "step": 20906 }, { "epoch": 41.814, "grad_norm": 1.0894871950149536, "learning_rate": 2e-05, "loss": 0.0430168, "step": 20907 }, { "epoch": 41.816, "grad_norm": 1.0725767612457275, "learning_rate": 2e-05, "loss": 0.0500539, "step": 20908 }, { "epoch": 41.818, "grad_norm": 0.8513020277023315, "learning_rate": 2e-05, "loss": 0.027557, "step": 20909 }, { "epoch": 41.82, "grad_norm": 1.081506609916687, "learning_rate": 2e-05, "loss": 0.03523725, "step": 20910 }, { "epoch": 41.822, "grad_norm": 0.957490861415863, "learning_rate": 2e-05, "loss": 0.03168899, "step": 20911 }, { "epoch": 41.824, "grad_norm": 2.2306346893310547, "learning_rate": 2e-05, "loss": 0.05726989, "step": 20912 }, { "epoch": 41.826, "grad_norm": 1.9513038396835327, "learning_rate": 2e-05, "loss": 0.04481925, "step": 20913 }, { "epoch": 41.828, "grad_norm": 1.337936520576477, "learning_rate": 2e-05, "loss": 0.0413946, "step": 20914 }, { "epoch": 41.83, "grad_norm": 1.0816913843154907, "learning_rate": 2e-05, "loss": 0.0409156, "step": 20915 }, { "epoch": 41.832, "grad_norm": 1.6926850080490112, "learning_rate": 2e-05, "loss": 0.04283394, "step": 20916 }, { "epoch": 41.834, "grad_norm": 0.8666943311691284, "learning_rate": 2e-05, "loss": 0.03543434, "step": 20917 }, { "epoch": 41.836, "grad_norm": 1.53314208984375, "learning_rate": 2e-05, "loss": 0.03278365, "step": 20918 }, { "epoch": 41.838, "grad_norm": 1.1249909400939941, "learning_rate": 2e-05, "loss": 0.05102055, "step": 20919 }, { "epoch": 41.84, "grad_norm": 0.9457797408103943, "learning_rate": 2e-05, "loss": 0.02586297, "step": 20920 }, { "epoch": 41.842, "grad_norm": 1.2952580451965332, "learning_rate": 2e-05, "loss": 0.0418021, "step": 20921 }, { "epoch": 41.844, "grad_norm": 1.9576411247253418, "learning_rate": 2e-05, "loss": 0.0378026, "step": 20922 }, { "epoch": 41.846, "grad_norm": 1.1403639316558838, "learning_rate": 2e-05, "loss": 0.04702896, "step": 20923 }, { "epoch": 41.848, "grad_norm": 1.0152126550674438, "learning_rate": 2e-05, "loss": 0.0330511, "step": 20924 }, { "epoch": 41.85, "grad_norm": 1.406144142150879, "learning_rate": 2e-05, "loss": 0.0480487, "step": 20925 }, { "epoch": 41.852, "grad_norm": 1.6961252689361572, "learning_rate": 2e-05, "loss": 0.04056136, "step": 20926 }, { "epoch": 41.854, "grad_norm": 0.9696795344352722, "learning_rate": 2e-05, "loss": 0.03754342, "step": 20927 }, { "epoch": 41.856, "grad_norm": 1.3198596239089966, "learning_rate": 2e-05, "loss": 0.05370624, "step": 20928 }, { "epoch": 41.858, "grad_norm": 1.0098850727081299, "learning_rate": 2e-05, "loss": 0.04108997, "step": 20929 }, { "epoch": 41.86, "grad_norm": 3.122553825378418, "learning_rate": 2e-05, "loss": 0.04812506, "step": 20930 }, { "epoch": 41.862, "grad_norm": 1.554870843887329, "learning_rate": 2e-05, "loss": 0.06438936, "step": 20931 }, { "epoch": 41.864, "grad_norm": 1.5811493396759033, "learning_rate": 2e-05, "loss": 0.07912888, "step": 20932 }, { "epoch": 41.866, "grad_norm": 1.4951846599578857, "learning_rate": 2e-05, "loss": 0.05585081, "step": 20933 }, { "epoch": 41.868, "grad_norm": 1.5642513036727905, "learning_rate": 2e-05, "loss": 0.05575603, "step": 20934 }, { "epoch": 41.87, "grad_norm": 1.4420052766799927, "learning_rate": 2e-05, "loss": 0.06080664, "step": 20935 }, { "epoch": 41.872, "grad_norm": 2.5895018577575684, "learning_rate": 2e-05, "loss": 0.04570051, "step": 20936 }, { "epoch": 41.874, "grad_norm": 1.0213804244995117, "learning_rate": 2e-05, "loss": 0.04081128, "step": 20937 }, { "epoch": 41.876, "grad_norm": 1.988159418106079, "learning_rate": 2e-05, "loss": 0.05070598, "step": 20938 }, { "epoch": 41.878, "grad_norm": 0.8260414004325867, "learning_rate": 2e-05, "loss": 0.02705194, "step": 20939 }, { "epoch": 41.88, "grad_norm": 1.0088437795639038, "learning_rate": 2e-05, "loss": 0.03486571, "step": 20940 }, { "epoch": 41.882, "grad_norm": 1.3227852582931519, "learning_rate": 2e-05, "loss": 0.04520843, "step": 20941 }, { "epoch": 41.884, "grad_norm": 1.4335944652557373, "learning_rate": 2e-05, "loss": 0.05580106, "step": 20942 }, { "epoch": 41.886, "grad_norm": 3.1642167568206787, "learning_rate": 2e-05, "loss": 0.0365737, "step": 20943 }, { "epoch": 41.888, "grad_norm": 0.9014084935188293, "learning_rate": 2e-05, "loss": 0.03885418, "step": 20944 }, { "epoch": 41.89, "grad_norm": 1.367006778717041, "learning_rate": 2e-05, "loss": 0.03637488, "step": 20945 }, { "epoch": 41.892, "grad_norm": 1.2565900087356567, "learning_rate": 2e-05, "loss": 0.05151416, "step": 20946 }, { "epoch": 41.894, "grad_norm": 1.170021414756775, "learning_rate": 2e-05, "loss": 0.04252572, "step": 20947 }, { "epoch": 41.896, "grad_norm": 1.1905535459518433, "learning_rate": 2e-05, "loss": 0.0438934, "step": 20948 }, { "epoch": 41.898, "grad_norm": 1.5759869813919067, "learning_rate": 2e-05, "loss": 0.05521838, "step": 20949 }, { "epoch": 41.9, "grad_norm": 1.1321731805801392, "learning_rate": 2e-05, "loss": 0.04660878, "step": 20950 }, { "epoch": 41.902, "grad_norm": 0.9967470169067383, "learning_rate": 2e-05, "loss": 0.04041508, "step": 20951 }, { "epoch": 41.904, "grad_norm": 0.8493726849555969, "learning_rate": 2e-05, "loss": 0.02656239, "step": 20952 }, { "epoch": 41.906, "grad_norm": 0.9367610216140747, "learning_rate": 2e-05, "loss": 0.02886939, "step": 20953 }, { "epoch": 41.908, "grad_norm": 1.219017744064331, "learning_rate": 2e-05, "loss": 0.04774205, "step": 20954 }, { "epoch": 41.91, "grad_norm": 3.759397506713867, "learning_rate": 2e-05, "loss": 0.0436502, "step": 20955 }, { "epoch": 41.912, "grad_norm": 1.1110494136810303, "learning_rate": 2e-05, "loss": 0.04167279, "step": 20956 }, { "epoch": 41.914, "grad_norm": 1.0108627080917358, "learning_rate": 2e-05, "loss": 0.04047691, "step": 20957 }, { "epoch": 41.916, "grad_norm": 0.9685512185096741, "learning_rate": 2e-05, "loss": 0.03863754, "step": 20958 }, { "epoch": 41.918, "grad_norm": 1.0660525560379028, "learning_rate": 2e-05, "loss": 0.04429211, "step": 20959 }, { "epoch": 41.92, "grad_norm": 0.9616813659667969, "learning_rate": 2e-05, "loss": 0.03093532, "step": 20960 }, { "epoch": 41.922, "grad_norm": 1.5262398719787598, "learning_rate": 2e-05, "loss": 0.04905374, "step": 20961 }, { "epoch": 41.924, "grad_norm": 1.9401978254318237, "learning_rate": 2e-05, "loss": 0.03105143, "step": 20962 }, { "epoch": 41.926, "grad_norm": 1.2638821601867676, "learning_rate": 2e-05, "loss": 0.0589263, "step": 20963 }, { "epoch": 41.928, "grad_norm": 1.8972457647323608, "learning_rate": 2e-05, "loss": 0.05652375, "step": 20964 }, { "epoch": 41.93, "grad_norm": 0.9049991369247437, "learning_rate": 2e-05, "loss": 0.03386841, "step": 20965 }, { "epoch": 41.932, "grad_norm": 3.3856403827667236, "learning_rate": 2e-05, "loss": 0.05998956, "step": 20966 }, { "epoch": 41.934, "grad_norm": 0.913093090057373, "learning_rate": 2e-05, "loss": 0.04485841, "step": 20967 }, { "epoch": 41.936, "grad_norm": 1.2677350044250488, "learning_rate": 2e-05, "loss": 0.0360848, "step": 20968 }, { "epoch": 41.938, "grad_norm": 1.3628301620483398, "learning_rate": 2e-05, "loss": 0.05783501, "step": 20969 }, { "epoch": 41.94, "grad_norm": 1.1209462881088257, "learning_rate": 2e-05, "loss": 0.05803783, "step": 20970 }, { "epoch": 41.942, "grad_norm": 1.0790148973464966, "learning_rate": 2e-05, "loss": 0.03558634, "step": 20971 }, { "epoch": 41.944, "grad_norm": 1.4616005420684814, "learning_rate": 2e-05, "loss": 0.04337046, "step": 20972 }, { "epoch": 41.946, "grad_norm": 1.051166296005249, "learning_rate": 2e-05, "loss": 0.04958903, "step": 20973 }, { "epoch": 41.948, "grad_norm": 0.9287793040275574, "learning_rate": 2e-05, "loss": 0.03815935, "step": 20974 }, { "epoch": 41.95, "grad_norm": 0.8097259402275085, "learning_rate": 2e-05, "loss": 0.03195767, "step": 20975 }, { "epoch": 41.952, "grad_norm": 1.1887547969818115, "learning_rate": 2e-05, "loss": 0.04624657, "step": 20976 }, { "epoch": 41.954, "grad_norm": 1.1385581493377686, "learning_rate": 2e-05, "loss": 0.04004433, "step": 20977 }, { "epoch": 41.956, "grad_norm": 3.9162113666534424, "learning_rate": 2e-05, "loss": 0.04150792, "step": 20978 }, { "epoch": 41.958, "grad_norm": 0.8918132781982422, "learning_rate": 2e-05, "loss": 0.03976586, "step": 20979 }, { "epoch": 41.96, "grad_norm": 0.9254766702651978, "learning_rate": 2e-05, "loss": 0.02376895, "step": 20980 }, { "epoch": 41.962, "grad_norm": 1.211701512336731, "learning_rate": 2e-05, "loss": 0.04438786, "step": 20981 }, { "epoch": 41.964, "grad_norm": 0.9405471086502075, "learning_rate": 2e-05, "loss": 0.03331154, "step": 20982 }, { "epoch": 41.966, "grad_norm": 1.0620412826538086, "learning_rate": 2e-05, "loss": 0.04143971, "step": 20983 }, { "epoch": 41.968, "grad_norm": 0.9507395029067993, "learning_rate": 2e-05, "loss": 0.026652, "step": 20984 }, { "epoch": 41.97, "grad_norm": 2.2306034564971924, "learning_rate": 2e-05, "loss": 0.04445065, "step": 20985 }, { "epoch": 41.972, "grad_norm": 1.3297299146652222, "learning_rate": 2e-05, "loss": 0.04595707, "step": 20986 }, { "epoch": 41.974, "grad_norm": 1.1317402124404907, "learning_rate": 2e-05, "loss": 0.04801337, "step": 20987 }, { "epoch": 41.976, "grad_norm": 1.0323189496994019, "learning_rate": 2e-05, "loss": 0.02511585, "step": 20988 }, { "epoch": 41.978, "grad_norm": 1.0890284776687622, "learning_rate": 2e-05, "loss": 0.03176987, "step": 20989 }, { "epoch": 41.98, "grad_norm": 1.049621820449829, "learning_rate": 2e-05, "loss": 0.04191526, "step": 20990 }, { "epoch": 41.982, "grad_norm": 1.137843370437622, "learning_rate": 2e-05, "loss": 0.05723912, "step": 20991 }, { "epoch": 41.984, "grad_norm": 1.898020625114441, "learning_rate": 2e-05, "loss": 0.04660844, "step": 20992 }, { "epoch": 41.986, "grad_norm": 1.6505845785140991, "learning_rate": 2e-05, "loss": 0.04325987, "step": 20993 }, { "epoch": 41.988, "grad_norm": 1.9067316055297852, "learning_rate": 2e-05, "loss": 0.0376289, "step": 20994 }, { "epoch": 41.99, "grad_norm": 1.0495880842208862, "learning_rate": 2e-05, "loss": 0.0497958, "step": 20995 }, { "epoch": 41.992, "grad_norm": 2.131425142288208, "learning_rate": 2e-05, "loss": 0.06406991, "step": 20996 }, { "epoch": 41.994, "grad_norm": 0.9507582187652588, "learning_rate": 2e-05, "loss": 0.039012, "step": 20997 }, { "epoch": 41.996, "grad_norm": 1.319658875465393, "learning_rate": 2e-05, "loss": 0.05224126, "step": 20998 }, { "epoch": 41.998, "grad_norm": 1.2337360382080078, "learning_rate": 2e-05, "loss": 0.04224138, "step": 20999 }, { "epoch": 42.0, "grad_norm": 1.0197802782058716, "learning_rate": 2e-05, "loss": 0.04422561, "step": 21000 }, { "epoch": 42.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9840319361277445, "Equal_1": 0.998, "Equal_2": 0.9780439121756487, "Equal_3": 0.9820359281437125, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.996, "Perpendicular_1": 0.996, "Perpendicular_2": 0.996, "Perpendicular_3": 0.8837675350701403, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 227.1575, "eval_samples_per_second": 46.223, "eval_steps_per_second": 0.924, "step": 21000 }, { "epoch": 42.002, "grad_norm": 0.9700936675071716, "learning_rate": 2e-05, "loss": 0.04102673, "step": 21001 }, { "epoch": 42.004, "grad_norm": 1.8220040798187256, "learning_rate": 2e-05, "loss": 0.06315922, "step": 21002 }, { "epoch": 42.006, "grad_norm": 1.2287662029266357, "learning_rate": 2e-05, "loss": 0.0398781, "step": 21003 }, { "epoch": 42.008, "grad_norm": 0.9383965134620667, "learning_rate": 2e-05, "loss": 0.03527122, "step": 21004 }, { "epoch": 42.01, "grad_norm": 0.9732562899589539, "learning_rate": 2e-05, "loss": 0.03858916, "step": 21005 }, { "epoch": 42.012, "grad_norm": 1.17026686668396, "learning_rate": 2e-05, "loss": 0.04955617, "step": 21006 }, { "epoch": 42.014, "grad_norm": 1.6266382932662964, "learning_rate": 2e-05, "loss": 0.04872269, "step": 21007 }, { "epoch": 42.016, "grad_norm": 0.9640478491783142, "learning_rate": 2e-05, "loss": 0.03840442, "step": 21008 }, { "epoch": 42.018, "grad_norm": 1.1582939624786377, "learning_rate": 2e-05, "loss": 0.03703425, "step": 21009 }, { "epoch": 42.02, "grad_norm": 1.4761836528778076, "learning_rate": 2e-05, "loss": 0.04561071, "step": 21010 }, { "epoch": 42.022, "grad_norm": 1.597904086112976, "learning_rate": 2e-05, "loss": 0.06836455, "step": 21011 }, { "epoch": 42.024, "grad_norm": 1.023079514503479, "learning_rate": 2e-05, "loss": 0.04420719, "step": 21012 }, { "epoch": 42.026, "grad_norm": 1.405423879623413, "learning_rate": 2e-05, "loss": 0.04396185, "step": 21013 }, { "epoch": 42.028, "grad_norm": 1.0431057214736938, "learning_rate": 2e-05, "loss": 0.0322412, "step": 21014 }, { "epoch": 42.03, "grad_norm": 1.7780261039733887, "learning_rate": 2e-05, "loss": 0.06664103, "step": 21015 }, { "epoch": 42.032, "grad_norm": 1.1880748271942139, "learning_rate": 2e-05, "loss": 0.05870494, "step": 21016 }, { "epoch": 42.034, "grad_norm": 1.646074891090393, "learning_rate": 2e-05, "loss": 0.04139625, "step": 21017 }, { "epoch": 42.036, "grad_norm": 1.0631011724472046, "learning_rate": 2e-05, "loss": 0.04643429, "step": 21018 }, { "epoch": 42.038, "grad_norm": 0.9517710208892822, "learning_rate": 2e-05, "loss": 0.0355818, "step": 21019 }, { "epoch": 42.04, "grad_norm": 1.2955372333526611, "learning_rate": 2e-05, "loss": 0.04379638, "step": 21020 }, { "epoch": 42.042, "grad_norm": 0.9679491519927979, "learning_rate": 2e-05, "loss": 0.03325777, "step": 21021 }, { "epoch": 42.044, "grad_norm": 2.3602988719940186, "learning_rate": 2e-05, "loss": 0.08220077, "step": 21022 }, { "epoch": 42.046, "grad_norm": 2.193296194076538, "learning_rate": 2e-05, "loss": 0.05768355, "step": 21023 }, { "epoch": 42.048, "grad_norm": 1.1946171522140503, "learning_rate": 2e-05, "loss": 0.05033978, "step": 21024 }, { "epoch": 42.05, "grad_norm": 1.1059051752090454, "learning_rate": 2e-05, "loss": 0.04694241, "step": 21025 }, { "epoch": 42.052, "grad_norm": 1.0268399715423584, "learning_rate": 2e-05, "loss": 0.04262309, "step": 21026 }, { "epoch": 42.054, "grad_norm": 1.427384853363037, "learning_rate": 2e-05, "loss": 0.03287687, "step": 21027 }, { "epoch": 42.056, "grad_norm": 1.0758668184280396, "learning_rate": 2e-05, "loss": 0.05435067, "step": 21028 }, { "epoch": 42.058, "grad_norm": 1.2161868810653687, "learning_rate": 2e-05, "loss": 0.03697609, "step": 21029 }, { "epoch": 42.06, "grad_norm": 1.1664695739746094, "learning_rate": 2e-05, "loss": 0.03136274, "step": 21030 }, { "epoch": 42.062, "grad_norm": 1.1363751888275146, "learning_rate": 2e-05, "loss": 0.03785175, "step": 21031 }, { "epoch": 42.064, "grad_norm": 1.7126713991165161, "learning_rate": 2e-05, "loss": 0.03423917, "step": 21032 }, { "epoch": 42.066, "grad_norm": 0.939664363861084, "learning_rate": 2e-05, "loss": 0.03695014, "step": 21033 }, { "epoch": 42.068, "grad_norm": 1.6145817041397095, "learning_rate": 2e-05, "loss": 0.05989834, "step": 21034 }, { "epoch": 42.07, "grad_norm": 1.6542831659317017, "learning_rate": 2e-05, "loss": 0.05004215, "step": 21035 }, { "epoch": 42.072, "grad_norm": 1.4790773391723633, "learning_rate": 2e-05, "loss": 0.02785717, "step": 21036 }, { "epoch": 42.074, "grad_norm": 1.0851222276687622, "learning_rate": 2e-05, "loss": 0.02934862, "step": 21037 }, { "epoch": 42.076, "grad_norm": 0.9405733346939087, "learning_rate": 2e-05, "loss": 0.03904282, "step": 21038 }, { "epoch": 42.078, "grad_norm": 0.9045794606208801, "learning_rate": 2e-05, "loss": 0.03518179, "step": 21039 }, { "epoch": 42.08, "grad_norm": 3.382800817489624, "learning_rate": 2e-05, "loss": 0.05047537, "step": 21040 }, { "epoch": 42.082, "grad_norm": 1.201008915901184, "learning_rate": 2e-05, "loss": 0.03728542, "step": 21041 }, { "epoch": 42.084, "grad_norm": 1.0134788751602173, "learning_rate": 2e-05, "loss": 0.04909302, "step": 21042 }, { "epoch": 42.086, "grad_norm": 1.0823715925216675, "learning_rate": 2e-05, "loss": 0.04391014, "step": 21043 }, { "epoch": 42.088, "grad_norm": 1.714924931526184, "learning_rate": 2e-05, "loss": 0.04097553, "step": 21044 }, { "epoch": 42.09, "grad_norm": 1.4282069206237793, "learning_rate": 2e-05, "loss": 0.05695514, "step": 21045 }, { "epoch": 42.092, "grad_norm": 1.5105711221694946, "learning_rate": 2e-05, "loss": 0.03432512, "step": 21046 }, { "epoch": 42.094, "grad_norm": 0.8120563626289368, "learning_rate": 2e-05, "loss": 0.03040627, "step": 21047 }, { "epoch": 42.096, "grad_norm": 1.0581409931182861, "learning_rate": 2e-05, "loss": 0.03944409, "step": 21048 }, { "epoch": 42.098, "grad_norm": 1.2121249437332153, "learning_rate": 2e-05, "loss": 0.04270823, "step": 21049 }, { "epoch": 42.1, "grad_norm": 1.083536982536316, "learning_rate": 2e-05, "loss": 0.0478663, "step": 21050 }, { "epoch": 42.102, "grad_norm": 1.0249124765396118, "learning_rate": 2e-05, "loss": 0.03888548, "step": 21051 }, { "epoch": 42.104, "grad_norm": 1.125734567642212, "learning_rate": 2e-05, "loss": 0.0552579, "step": 21052 }, { "epoch": 42.106, "grad_norm": 1.1718311309814453, "learning_rate": 2e-05, "loss": 0.05276578, "step": 21053 }, { "epoch": 42.108, "grad_norm": 0.9595328569412231, "learning_rate": 2e-05, "loss": 0.03661241, "step": 21054 }, { "epoch": 42.11, "grad_norm": 0.9728356599807739, "learning_rate": 2e-05, "loss": 0.03744029, "step": 21055 }, { "epoch": 42.112, "grad_norm": 1.6119277477264404, "learning_rate": 2e-05, "loss": 0.04139771, "step": 21056 }, { "epoch": 42.114, "grad_norm": 1.4242968559265137, "learning_rate": 2e-05, "loss": 0.04021486, "step": 21057 }, { "epoch": 42.116, "grad_norm": 1.1543713808059692, "learning_rate": 2e-05, "loss": 0.04840346, "step": 21058 }, { "epoch": 42.118, "grad_norm": 1.0290127992630005, "learning_rate": 2e-05, "loss": 0.03487246, "step": 21059 }, { "epoch": 42.12, "grad_norm": 1.3864765167236328, "learning_rate": 2e-05, "loss": 0.03932028, "step": 21060 }, { "epoch": 42.122, "grad_norm": 0.9179838299751282, "learning_rate": 2e-05, "loss": 0.03239363, "step": 21061 }, { "epoch": 42.124, "grad_norm": 1.2617764472961426, "learning_rate": 2e-05, "loss": 0.06271095, "step": 21062 }, { "epoch": 42.126, "grad_norm": 2.0444753170013428, "learning_rate": 2e-05, "loss": 0.05909558, "step": 21063 }, { "epoch": 42.128, "grad_norm": 0.9492307901382446, "learning_rate": 2e-05, "loss": 0.03118101, "step": 21064 }, { "epoch": 42.13, "grad_norm": 0.935184895992279, "learning_rate": 2e-05, "loss": 0.03229182, "step": 21065 }, { "epoch": 42.132, "grad_norm": 1.1390889883041382, "learning_rate": 2e-05, "loss": 0.04746659, "step": 21066 }, { "epoch": 42.134, "grad_norm": 1.634655237197876, "learning_rate": 2e-05, "loss": 0.03468912, "step": 21067 }, { "epoch": 42.136, "grad_norm": 0.9589168429374695, "learning_rate": 2e-05, "loss": 0.03686056, "step": 21068 }, { "epoch": 42.138, "grad_norm": 1.663724422454834, "learning_rate": 2e-05, "loss": 0.03854396, "step": 21069 }, { "epoch": 42.14, "grad_norm": 2.2323434352874756, "learning_rate": 2e-05, "loss": 0.06560966, "step": 21070 }, { "epoch": 42.142, "grad_norm": 1.3438870906829834, "learning_rate": 2e-05, "loss": 0.04548727, "step": 21071 }, { "epoch": 42.144, "grad_norm": 1.0526059865951538, "learning_rate": 2e-05, "loss": 0.05486565, "step": 21072 }, { "epoch": 42.146, "grad_norm": 1.1061373949050903, "learning_rate": 2e-05, "loss": 0.02296307, "step": 21073 }, { "epoch": 42.148, "grad_norm": 1.0177232027053833, "learning_rate": 2e-05, "loss": 0.03018361, "step": 21074 }, { "epoch": 42.15, "grad_norm": 1.1827335357666016, "learning_rate": 2e-05, "loss": 0.04629721, "step": 21075 }, { "epoch": 42.152, "grad_norm": 1.390011191368103, "learning_rate": 2e-05, "loss": 0.03084483, "step": 21076 }, { "epoch": 42.154, "grad_norm": 1.2264175415039062, "learning_rate": 2e-05, "loss": 0.06162011, "step": 21077 }, { "epoch": 42.156, "grad_norm": 1.0796946287155151, "learning_rate": 2e-05, "loss": 0.04227725, "step": 21078 }, { "epoch": 42.158, "grad_norm": 1.0251507759094238, "learning_rate": 2e-05, "loss": 0.04384975, "step": 21079 }, { "epoch": 42.16, "grad_norm": 0.7584781050682068, "learning_rate": 2e-05, "loss": 0.02092627, "step": 21080 }, { "epoch": 42.162, "grad_norm": 1.0241891145706177, "learning_rate": 2e-05, "loss": 0.03699641, "step": 21081 }, { "epoch": 42.164, "grad_norm": 1.2963296175003052, "learning_rate": 2e-05, "loss": 0.04642354, "step": 21082 }, { "epoch": 42.166, "grad_norm": 1.0979738235473633, "learning_rate": 2e-05, "loss": 0.05109094, "step": 21083 }, { "epoch": 42.168, "grad_norm": 1.276710867881775, "learning_rate": 2e-05, "loss": 0.04463599, "step": 21084 }, { "epoch": 42.17, "grad_norm": 1.2307254076004028, "learning_rate": 2e-05, "loss": 0.06557526, "step": 21085 }, { "epoch": 42.172, "grad_norm": 0.9141935110092163, "learning_rate": 2e-05, "loss": 0.03489286, "step": 21086 }, { "epoch": 42.174, "grad_norm": 1.4897748231887817, "learning_rate": 2e-05, "loss": 0.0413481, "step": 21087 }, { "epoch": 42.176, "grad_norm": 1.6471000909805298, "learning_rate": 2e-05, "loss": 0.04396588, "step": 21088 }, { "epoch": 42.178, "grad_norm": 0.95074063539505, "learning_rate": 2e-05, "loss": 0.03235657, "step": 21089 }, { "epoch": 42.18, "grad_norm": 1.1178929805755615, "learning_rate": 2e-05, "loss": 0.04940484, "step": 21090 }, { "epoch": 42.182, "grad_norm": 1.112227439880371, "learning_rate": 2e-05, "loss": 0.04996863, "step": 21091 }, { "epoch": 42.184, "grad_norm": 1.1329551935195923, "learning_rate": 2e-05, "loss": 0.03411829, "step": 21092 }, { "epoch": 42.186, "grad_norm": 1.2849723100662231, "learning_rate": 2e-05, "loss": 0.04866448, "step": 21093 }, { "epoch": 42.188, "grad_norm": 0.9496929049491882, "learning_rate": 2e-05, "loss": 0.03476998, "step": 21094 }, { "epoch": 42.19, "grad_norm": 1.1436532735824585, "learning_rate": 2e-05, "loss": 0.03605236, "step": 21095 }, { "epoch": 42.192, "grad_norm": 1.7389308214187622, "learning_rate": 2e-05, "loss": 0.0442098, "step": 21096 }, { "epoch": 42.194, "grad_norm": 1.0475610494613647, "learning_rate": 2e-05, "loss": 0.0451545, "step": 21097 }, { "epoch": 42.196, "grad_norm": 1.2059231996536255, "learning_rate": 2e-05, "loss": 0.04617982, "step": 21098 }, { "epoch": 42.198, "grad_norm": 2.191673517227173, "learning_rate": 2e-05, "loss": 0.05115597, "step": 21099 }, { "epoch": 42.2, "grad_norm": 2.4983537197113037, "learning_rate": 2e-05, "loss": 0.05055241, "step": 21100 }, { "epoch": 42.202, "grad_norm": 1.6346322298049927, "learning_rate": 2e-05, "loss": 0.05330176, "step": 21101 }, { "epoch": 42.204, "grad_norm": 1.1973503828048706, "learning_rate": 2e-05, "loss": 0.04431368, "step": 21102 }, { "epoch": 42.206, "grad_norm": 1.0615580081939697, "learning_rate": 2e-05, "loss": 0.04781372, "step": 21103 }, { "epoch": 42.208, "grad_norm": 1.2563520669937134, "learning_rate": 2e-05, "loss": 0.04718297, "step": 21104 }, { "epoch": 42.21, "grad_norm": 1.01656973361969, "learning_rate": 2e-05, "loss": 0.04454514, "step": 21105 }, { "epoch": 42.212, "grad_norm": 1.252347707748413, "learning_rate": 2e-05, "loss": 0.06294603, "step": 21106 }, { "epoch": 42.214, "grad_norm": 1.0774246454238892, "learning_rate": 2e-05, "loss": 0.04096992, "step": 21107 }, { "epoch": 42.216, "grad_norm": 1.358150839805603, "learning_rate": 2e-05, "loss": 0.05995574, "step": 21108 }, { "epoch": 42.218, "grad_norm": 0.9118907451629639, "learning_rate": 2e-05, "loss": 0.03110724, "step": 21109 }, { "epoch": 42.22, "grad_norm": 0.8432122468948364, "learning_rate": 2e-05, "loss": 0.02416144, "step": 21110 }, { "epoch": 42.222, "grad_norm": 1.168723225593567, "learning_rate": 2e-05, "loss": 0.04742262, "step": 21111 }, { "epoch": 42.224, "grad_norm": 0.9975471496582031, "learning_rate": 2e-05, "loss": 0.04188582, "step": 21112 }, { "epoch": 42.226, "grad_norm": 1.0745786428451538, "learning_rate": 2e-05, "loss": 0.04268441, "step": 21113 }, { "epoch": 42.228, "grad_norm": 1.1017282009124756, "learning_rate": 2e-05, "loss": 0.03469285, "step": 21114 }, { "epoch": 42.23, "grad_norm": 1.2630066871643066, "learning_rate": 2e-05, "loss": 0.04869648, "step": 21115 }, { "epoch": 42.232, "grad_norm": 1.5103724002838135, "learning_rate": 2e-05, "loss": 0.06416001, "step": 21116 }, { "epoch": 42.234, "grad_norm": 1.7192751169204712, "learning_rate": 2e-05, "loss": 0.03365299, "step": 21117 }, { "epoch": 42.236, "grad_norm": 1.212378740310669, "learning_rate": 2e-05, "loss": 0.05217704, "step": 21118 }, { "epoch": 42.238, "grad_norm": 1.1511309146881104, "learning_rate": 2e-05, "loss": 0.04432339, "step": 21119 }, { "epoch": 42.24, "grad_norm": 1.1347357034683228, "learning_rate": 2e-05, "loss": 0.05104628, "step": 21120 }, { "epoch": 42.242, "grad_norm": 0.9806762933731079, "learning_rate": 2e-05, "loss": 0.03765688, "step": 21121 }, { "epoch": 42.244, "grad_norm": 1.220115065574646, "learning_rate": 2e-05, "loss": 0.04379454, "step": 21122 }, { "epoch": 42.246, "grad_norm": 1.1913201808929443, "learning_rate": 2e-05, "loss": 0.04290836, "step": 21123 }, { "epoch": 42.248, "grad_norm": 1.1008577346801758, "learning_rate": 2e-05, "loss": 0.04613386, "step": 21124 }, { "epoch": 42.25, "grad_norm": 0.9670965075492859, "learning_rate": 2e-05, "loss": 0.03040414, "step": 21125 }, { "epoch": 42.252, "grad_norm": 1.1135531663894653, "learning_rate": 2e-05, "loss": 0.04591753, "step": 21126 }, { "epoch": 42.254, "grad_norm": 0.9536369442939758, "learning_rate": 2e-05, "loss": 0.03241833, "step": 21127 }, { "epoch": 42.256, "grad_norm": 1.0186667442321777, "learning_rate": 2e-05, "loss": 0.04390181, "step": 21128 }, { "epoch": 42.258, "grad_norm": 0.9894280433654785, "learning_rate": 2e-05, "loss": 0.03453653, "step": 21129 }, { "epoch": 42.26, "grad_norm": 1.0800000429153442, "learning_rate": 2e-05, "loss": 0.0472707, "step": 21130 }, { "epoch": 42.262, "grad_norm": 1.1280932426452637, "learning_rate": 2e-05, "loss": 0.06085643, "step": 21131 }, { "epoch": 42.264, "grad_norm": 1.1370179653167725, "learning_rate": 2e-05, "loss": 0.05333015, "step": 21132 }, { "epoch": 42.266, "grad_norm": 1.2249078750610352, "learning_rate": 2e-05, "loss": 0.05581835, "step": 21133 }, { "epoch": 42.268, "grad_norm": 0.9061068296432495, "learning_rate": 2e-05, "loss": 0.03749494, "step": 21134 }, { "epoch": 42.27, "grad_norm": 1.4686170816421509, "learning_rate": 2e-05, "loss": 0.05820844, "step": 21135 }, { "epoch": 42.272, "grad_norm": 2.6312410831451416, "learning_rate": 2e-05, "loss": 0.05187429, "step": 21136 }, { "epoch": 42.274, "grad_norm": 1.0056737661361694, "learning_rate": 2e-05, "loss": 0.04137519, "step": 21137 }, { "epoch": 42.276, "grad_norm": 1.0142886638641357, "learning_rate": 2e-05, "loss": 0.03543835, "step": 21138 }, { "epoch": 42.278, "grad_norm": 0.9780080318450928, "learning_rate": 2e-05, "loss": 0.02519486, "step": 21139 }, { "epoch": 42.28, "grad_norm": 1.9837703704833984, "learning_rate": 2e-05, "loss": 0.05705058, "step": 21140 }, { "epoch": 42.282, "grad_norm": 1.2139025926589966, "learning_rate": 2e-05, "loss": 0.04807317, "step": 21141 }, { "epoch": 42.284, "grad_norm": 0.900611937046051, "learning_rate": 2e-05, "loss": 0.03369123, "step": 21142 }, { "epoch": 42.286, "grad_norm": 1.080986738204956, "learning_rate": 2e-05, "loss": 0.05582296, "step": 21143 }, { "epoch": 42.288, "grad_norm": 0.9722895622253418, "learning_rate": 2e-05, "loss": 0.03935083, "step": 21144 }, { "epoch": 42.29, "grad_norm": 0.9214039444923401, "learning_rate": 2e-05, "loss": 0.03636289, "step": 21145 }, { "epoch": 42.292, "grad_norm": 1.2674576044082642, "learning_rate": 2e-05, "loss": 0.05230562, "step": 21146 }, { "epoch": 42.294, "grad_norm": 1.5401294231414795, "learning_rate": 2e-05, "loss": 0.03503991, "step": 21147 }, { "epoch": 42.296, "grad_norm": 0.9755216836929321, "learning_rate": 2e-05, "loss": 0.03444564, "step": 21148 }, { "epoch": 42.298, "grad_norm": 1.0938457250595093, "learning_rate": 2e-05, "loss": 0.04485642, "step": 21149 }, { "epoch": 42.3, "grad_norm": 1.2076703310012817, "learning_rate": 2e-05, "loss": 0.05830783, "step": 21150 }, { "epoch": 42.302, "grad_norm": 1.0168300867080688, "learning_rate": 2e-05, "loss": 0.03578954, "step": 21151 }, { "epoch": 42.304, "grad_norm": 1.1618481874465942, "learning_rate": 2e-05, "loss": 0.04232055, "step": 21152 }, { "epoch": 42.306, "grad_norm": 0.8926101326942444, "learning_rate": 2e-05, "loss": 0.03830426, "step": 21153 }, { "epoch": 42.308, "grad_norm": 1.1208243370056152, "learning_rate": 2e-05, "loss": 0.03801634, "step": 21154 }, { "epoch": 42.31, "grad_norm": 2.151414394378662, "learning_rate": 2e-05, "loss": 0.05238803, "step": 21155 }, { "epoch": 42.312, "grad_norm": 1.3091636896133423, "learning_rate": 2e-05, "loss": 0.05875758, "step": 21156 }, { "epoch": 42.314, "grad_norm": 0.889733612537384, "learning_rate": 2e-05, "loss": 0.03338432, "step": 21157 }, { "epoch": 42.316, "grad_norm": 1.1013990640640259, "learning_rate": 2e-05, "loss": 0.0384525, "step": 21158 }, { "epoch": 42.318, "grad_norm": 1.7319785356521606, "learning_rate": 2e-05, "loss": 0.03697237, "step": 21159 }, { "epoch": 42.32, "grad_norm": 1.0585217475891113, "learning_rate": 2e-05, "loss": 0.05202343, "step": 21160 }, { "epoch": 42.322, "grad_norm": 2.4183876514434814, "learning_rate": 2e-05, "loss": 0.03342888, "step": 21161 }, { "epoch": 42.324, "grad_norm": 0.9897639751434326, "learning_rate": 2e-05, "loss": 0.04326984, "step": 21162 }, { "epoch": 42.326, "grad_norm": 1.1259437799453735, "learning_rate": 2e-05, "loss": 0.04162682, "step": 21163 }, { "epoch": 42.328, "grad_norm": 1.0117073059082031, "learning_rate": 2e-05, "loss": 0.03367231, "step": 21164 }, { "epoch": 42.33, "grad_norm": 0.8733380436897278, "learning_rate": 2e-05, "loss": 0.03393552, "step": 21165 }, { "epoch": 42.332, "grad_norm": 1.0278092622756958, "learning_rate": 2e-05, "loss": 0.03779173, "step": 21166 }, { "epoch": 42.334, "grad_norm": 2.138655185699463, "learning_rate": 2e-05, "loss": 0.05433659, "step": 21167 }, { "epoch": 42.336, "grad_norm": 1.1313689947128296, "learning_rate": 2e-05, "loss": 0.04018377, "step": 21168 }, { "epoch": 42.338, "grad_norm": 1.3012096881866455, "learning_rate": 2e-05, "loss": 0.05722218, "step": 21169 }, { "epoch": 42.34, "grad_norm": 2.0462048053741455, "learning_rate": 2e-05, "loss": 0.055285, "step": 21170 }, { "epoch": 42.342, "grad_norm": 1.0033764839172363, "learning_rate": 2e-05, "loss": 0.0428298, "step": 21171 }, { "epoch": 42.344, "grad_norm": 2.019235372543335, "learning_rate": 2e-05, "loss": 0.04404132, "step": 21172 }, { "epoch": 42.346, "grad_norm": 1.5573660135269165, "learning_rate": 2e-05, "loss": 0.05267327, "step": 21173 }, { "epoch": 42.348, "grad_norm": 0.9642741680145264, "learning_rate": 2e-05, "loss": 0.03583, "step": 21174 }, { "epoch": 42.35, "grad_norm": 0.9922071099281311, "learning_rate": 2e-05, "loss": 0.05025107, "step": 21175 }, { "epoch": 42.352, "grad_norm": 1.9734573364257812, "learning_rate": 2e-05, "loss": 0.03667674, "step": 21176 }, { "epoch": 42.354, "grad_norm": 1.0124297142028809, "learning_rate": 2e-05, "loss": 0.04356598, "step": 21177 }, { "epoch": 42.356, "grad_norm": 1.1000895500183105, "learning_rate": 2e-05, "loss": 0.04918827, "step": 21178 }, { "epoch": 42.358, "grad_norm": 1.5460973978042603, "learning_rate": 2e-05, "loss": 0.06218104, "step": 21179 }, { "epoch": 42.36, "grad_norm": 1.0991007089614868, "learning_rate": 2e-05, "loss": 0.0417368, "step": 21180 }, { "epoch": 42.362, "grad_norm": 1.6482762098312378, "learning_rate": 2e-05, "loss": 0.04593588, "step": 21181 }, { "epoch": 42.364, "grad_norm": 1.0452308654785156, "learning_rate": 2e-05, "loss": 0.04160969, "step": 21182 }, { "epoch": 42.366, "grad_norm": 1.0658129453659058, "learning_rate": 2e-05, "loss": 0.03999516, "step": 21183 }, { "epoch": 42.368, "grad_norm": 2.2565982341766357, "learning_rate": 2e-05, "loss": 0.04459252, "step": 21184 }, { "epoch": 42.37, "grad_norm": 1.4622379541397095, "learning_rate": 2e-05, "loss": 0.03448307, "step": 21185 }, { "epoch": 42.372, "grad_norm": 1.4955030679702759, "learning_rate": 2e-05, "loss": 0.04232459, "step": 21186 }, { "epoch": 42.374, "grad_norm": 1.5576457977294922, "learning_rate": 2e-05, "loss": 0.04733102, "step": 21187 }, { "epoch": 42.376, "grad_norm": 0.9006845355033875, "learning_rate": 2e-05, "loss": 0.02608144, "step": 21188 }, { "epoch": 42.378, "grad_norm": 1.17303466796875, "learning_rate": 2e-05, "loss": 0.04056245, "step": 21189 }, { "epoch": 42.38, "grad_norm": 1.1064560413360596, "learning_rate": 2e-05, "loss": 0.04857829, "step": 21190 }, { "epoch": 42.382, "grad_norm": 1.5094066858291626, "learning_rate": 2e-05, "loss": 0.05230471, "step": 21191 }, { "epoch": 42.384, "grad_norm": 1.2277864217758179, "learning_rate": 2e-05, "loss": 0.05143297, "step": 21192 }, { "epoch": 42.386, "grad_norm": 1.2100130319595337, "learning_rate": 2e-05, "loss": 0.03008468, "step": 21193 }, { "epoch": 42.388, "grad_norm": 1.0798557996749878, "learning_rate": 2e-05, "loss": 0.03852563, "step": 21194 }, { "epoch": 42.39, "grad_norm": 1.3700233697891235, "learning_rate": 2e-05, "loss": 0.05415892, "step": 21195 }, { "epoch": 42.392, "grad_norm": 0.9260060787200928, "learning_rate": 2e-05, "loss": 0.03694741, "step": 21196 }, { "epoch": 42.394, "grad_norm": 1.2805750370025635, "learning_rate": 2e-05, "loss": 0.03712608, "step": 21197 }, { "epoch": 42.396, "grad_norm": 2.9281117916107178, "learning_rate": 2e-05, "loss": 0.04453873, "step": 21198 }, { "epoch": 42.398, "grad_norm": 1.7376784086227417, "learning_rate": 2e-05, "loss": 0.04942546, "step": 21199 }, { "epoch": 42.4, "grad_norm": 1.7635715007781982, "learning_rate": 2e-05, "loss": 0.06329477, "step": 21200 }, { "epoch": 42.402, "grad_norm": 1.080694556236267, "learning_rate": 2e-05, "loss": 0.04729369, "step": 21201 }, { "epoch": 42.404, "grad_norm": 1.1469027996063232, "learning_rate": 2e-05, "loss": 0.0460621, "step": 21202 }, { "epoch": 42.406, "grad_norm": 1.0239918231964111, "learning_rate": 2e-05, "loss": 0.05489516, "step": 21203 }, { "epoch": 42.408, "grad_norm": 0.9778455495834351, "learning_rate": 2e-05, "loss": 0.03829395, "step": 21204 }, { "epoch": 42.41, "grad_norm": 1.499243974685669, "learning_rate": 2e-05, "loss": 0.05702853, "step": 21205 }, { "epoch": 42.412, "grad_norm": 2.7449965476989746, "learning_rate": 2e-05, "loss": 0.0657511, "step": 21206 }, { "epoch": 42.414, "grad_norm": 1.0279121398925781, "learning_rate": 2e-05, "loss": 0.03782829, "step": 21207 }, { "epoch": 42.416, "grad_norm": 1.3109943866729736, "learning_rate": 2e-05, "loss": 0.04936379, "step": 21208 }, { "epoch": 42.418, "grad_norm": 1.9790655374526978, "learning_rate": 2e-05, "loss": 0.05053977, "step": 21209 }, { "epoch": 42.42, "grad_norm": 1.1504151821136475, "learning_rate": 2e-05, "loss": 0.04175428, "step": 21210 }, { "epoch": 42.422, "grad_norm": 4.053497791290283, "learning_rate": 2e-05, "loss": 0.04726787, "step": 21211 }, { "epoch": 42.424, "grad_norm": 0.8798936605453491, "learning_rate": 2e-05, "loss": 0.03242673, "step": 21212 }, { "epoch": 42.426, "grad_norm": 1.67676842212677, "learning_rate": 2e-05, "loss": 0.05705649, "step": 21213 }, { "epoch": 42.428, "grad_norm": 1.4373422861099243, "learning_rate": 2e-05, "loss": 0.05072725, "step": 21214 }, { "epoch": 42.43, "grad_norm": 1.1524293422698975, "learning_rate": 2e-05, "loss": 0.04345901, "step": 21215 }, { "epoch": 42.432, "grad_norm": 1.0945552587509155, "learning_rate": 2e-05, "loss": 0.02674374, "step": 21216 }, { "epoch": 42.434, "grad_norm": 1.2958787679672241, "learning_rate": 2e-05, "loss": 0.0402573, "step": 21217 }, { "epoch": 42.436, "grad_norm": 1.0399460792541504, "learning_rate": 2e-05, "loss": 0.03993837, "step": 21218 }, { "epoch": 42.438, "grad_norm": 2.2416980266571045, "learning_rate": 2e-05, "loss": 0.04407566, "step": 21219 }, { "epoch": 42.44, "grad_norm": 1.6053255796432495, "learning_rate": 2e-05, "loss": 0.05510966, "step": 21220 }, { "epoch": 42.442, "grad_norm": 1.261043906211853, "learning_rate": 2e-05, "loss": 0.05519258, "step": 21221 }, { "epoch": 42.444, "grad_norm": 1.0750794410705566, "learning_rate": 2e-05, "loss": 0.03906538, "step": 21222 }, { "epoch": 42.446, "grad_norm": 1.2748488187789917, "learning_rate": 2e-05, "loss": 0.04742753, "step": 21223 }, { "epoch": 42.448, "grad_norm": 1.5726701021194458, "learning_rate": 2e-05, "loss": 0.03484065, "step": 21224 }, { "epoch": 42.45, "grad_norm": 1.0977160930633545, "learning_rate": 2e-05, "loss": 0.04690384, "step": 21225 }, { "epoch": 42.452, "grad_norm": 1.0515812635421753, "learning_rate": 2e-05, "loss": 0.04405387, "step": 21226 }, { "epoch": 42.454, "grad_norm": 1.2291545867919922, "learning_rate": 2e-05, "loss": 0.04371041, "step": 21227 }, { "epoch": 42.456, "grad_norm": 2.0006918907165527, "learning_rate": 2e-05, "loss": 0.04751487, "step": 21228 }, { "epoch": 42.458, "grad_norm": 1.6243239641189575, "learning_rate": 2e-05, "loss": 0.05855416, "step": 21229 }, { "epoch": 42.46, "grad_norm": 1.1468603610992432, "learning_rate": 2e-05, "loss": 0.0470052, "step": 21230 }, { "epoch": 42.462, "grad_norm": 1.0569524765014648, "learning_rate": 2e-05, "loss": 0.03764968, "step": 21231 }, { "epoch": 42.464, "grad_norm": 0.9820374846458435, "learning_rate": 2e-05, "loss": 0.04854225, "step": 21232 }, { "epoch": 42.466, "grad_norm": 1.8697410821914673, "learning_rate": 2e-05, "loss": 0.05665092, "step": 21233 }, { "epoch": 42.468, "grad_norm": 1.1022251844406128, "learning_rate": 2e-05, "loss": 0.04963892, "step": 21234 }, { "epoch": 42.47, "grad_norm": 1.0656312704086304, "learning_rate": 2e-05, "loss": 0.04445424, "step": 21235 }, { "epoch": 42.472, "grad_norm": 0.8905332684516907, "learning_rate": 2e-05, "loss": 0.03065421, "step": 21236 }, { "epoch": 42.474, "grad_norm": 0.9474940896034241, "learning_rate": 2e-05, "loss": 0.03434924, "step": 21237 }, { "epoch": 42.476, "grad_norm": 2.2391600608825684, "learning_rate": 2e-05, "loss": 0.03059935, "step": 21238 }, { "epoch": 42.478, "grad_norm": 1.3333183526992798, "learning_rate": 2e-05, "loss": 0.03306688, "step": 21239 }, { "epoch": 42.48, "grad_norm": 1.4091469049453735, "learning_rate": 2e-05, "loss": 0.04927116, "step": 21240 }, { "epoch": 42.482, "grad_norm": 1.0060395002365112, "learning_rate": 2e-05, "loss": 0.04168249, "step": 21241 }, { "epoch": 42.484, "grad_norm": 1.0464057922363281, "learning_rate": 2e-05, "loss": 0.03569805, "step": 21242 }, { "epoch": 42.486, "grad_norm": 0.9568635821342468, "learning_rate": 2e-05, "loss": 0.04906178, "step": 21243 }, { "epoch": 42.488, "grad_norm": 1.7549073696136475, "learning_rate": 2e-05, "loss": 0.04567682, "step": 21244 }, { "epoch": 42.49, "grad_norm": 1.2280941009521484, "learning_rate": 2e-05, "loss": 0.03924225, "step": 21245 }, { "epoch": 42.492, "grad_norm": 1.113108515739441, "learning_rate": 2e-05, "loss": 0.04862555, "step": 21246 }, { "epoch": 42.494, "grad_norm": 1.37856924533844, "learning_rate": 2e-05, "loss": 0.03998582, "step": 21247 }, { "epoch": 42.496, "grad_norm": 1.4711673259735107, "learning_rate": 2e-05, "loss": 0.0462206, "step": 21248 }, { "epoch": 42.498, "grad_norm": 1.006584882736206, "learning_rate": 2e-05, "loss": 0.04498261, "step": 21249 }, { "epoch": 42.5, "grad_norm": 0.9918103814125061, "learning_rate": 2e-05, "loss": 0.03464493, "step": 21250 }, { "epoch": 42.502, "grad_norm": 1.080779790878296, "learning_rate": 2e-05, "loss": 0.04217354, "step": 21251 }, { "epoch": 42.504, "grad_norm": 1.1811786890029907, "learning_rate": 2e-05, "loss": 0.04957807, "step": 21252 }, { "epoch": 42.506, "grad_norm": 1.1711009740829468, "learning_rate": 2e-05, "loss": 0.04837216, "step": 21253 }, { "epoch": 42.508, "grad_norm": 0.9602867960929871, "learning_rate": 2e-05, "loss": 0.04472505, "step": 21254 }, { "epoch": 42.51, "grad_norm": 1.148273229598999, "learning_rate": 2e-05, "loss": 0.05760022, "step": 21255 }, { "epoch": 42.512, "grad_norm": 1.0308644771575928, "learning_rate": 2e-05, "loss": 0.04777342, "step": 21256 }, { "epoch": 42.514, "grad_norm": 0.9910358786582947, "learning_rate": 2e-05, "loss": 0.04250629, "step": 21257 }, { "epoch": 42.516, "grad_norm": 1.1304292678833008, "learning_rate": 2e-05, "loss": 0.03749695, "step": 21258 }, { "epoch": 42.518, "grad_norm": 1.06964111328125, "learning_rate": 2e-05, "loss": 0.04924249, "step": 21259 }, { "epoch": 42.52, "grad_norm": 1.0749262571334839, "learning_rate": 2e-05, "loss": 0.04087149, "step": 21260 }, { "epoch": 42.522, "grad_norm": 0.985001802444458, "learning_rate": 2e-05, "loss": 0.03024037, "step": 21261 }, { "epoch": 42.524, "grad_norm": 0.848243236541748, "learning_rate": 2e-05, "loss": 0.03122064, "step": 21262 }, { "epoch": 42.526, "grad_norm": 0.9843360185623169, "learning_rate": 2e-05, "loss": 0.03658064, "step": 21263 }, { "epoch": 42.528, "grad_norm": 1.0113308429718018, "learning_rate": 2e-05, "loss": 0.04521782, "step": 21264 }, { "epoch": 42.53, "grad_norm": 1.1392583847045898, "learning_rate": 2e-05, "loss": 0.04456335, "step": 21265 }, { "epoch": 42.532, "grad_norm": 1.189031720161438, "learning_rate": 2e-05, "loss": 0.04095358, "step": 21266 }, { "epoch": 42.534, "grad_norm": 0.9462743997573853, "learning_rate": 2e-05, "loss": 0.03962483, "step": 21267 }, { "epoch": 42.536, "grad_norm": 0.952099621295929, "learning_rate": 2e-05, "loss": 0.03354198, "step": 21268 }, { "epoch": 42.538, "grad_norm": 1.1713591814041138, "learning_rate": 2e-05, "loss": 0.04736177, "step": 21269 }, { "epoch": 42.54, "grad_norm": 1.527622938156128, "learning_rate": 2e-05, "loss": 0.06469256, "step": 21270 }, { "epoch": 42.542, "grad_norm": 1.0787570476531982, "learning_rate": 2e-05, "loss": 0.04891206, "step": 21271 }, { "epoch": 42.544, "grad_norm": 1.1135772466659546, "learning_rate": 2e-05, "loss": 0.04695529, "step": 21272 }, { "epoch": 42.546, "grad_norm": 1.249792218208313, "learning_rate": 2e-05, "loss": 0.04677644, "step": 21273 }, { "epoch": 42.548, "grad_norm": 1.034311294555664, "learning_rate": 2e-05, "loss": 0.04172734, "step": 21274 }, { "epoch": 42.55, "grad_norm": 0.9746313095092773, "learning_rate": 2e-05, "loss": 0.0426308, "step": 21275 }, { "epoch": 42.552, "grad_norm": 1.1242482662200928, "learning_rate": 2e-05, "loss": 0.04126712, "step": 21276 }, { "epoch": 42.554, "grad_norm": 1.168648362159729, "learning_rate": 2e-05, "loss": 0.04978744, "step": 21277 }, { "epoch": 42.556, "grad_norm": 0.9454513788223267, "learning_rate": 2e-05, "loss": 0.03711028, "step": 21278 }, { "epoch": 42.558, "grad_norm": 1.0927700996398926, "learning_rate": 2e-05, "loss": 0.0501483, "step": 21279 }, { "epoch": 42.56, "grad_norm": 0.9728769063949585, "learning_rate": 2e-05, "loss": 0.03897431, "step": 21280 }, { "epoch": 42.562, "grad_norm": 1.1005795001983643, "learning_rate": 2e-05, "loss": 0.04550515, "step": 21281 }, { "epoch": 42.564, "grad_norm": 0.987808108329773, "learning_rate": 2e-05, "loss": 0.04153454, "step": 21282 }, { "epoch": 42.566, "grad_norm": 0.9164482951164246, "learning_rate": 2e-05, "loss": 0.03474952, "step": 21283 }, { "epoch": 42.568, "grad_norm": 1.0651296377182007, "learning_rate": 2e-05, "loss": 0.04627332, "step": 21284 }, { "epoch": 42.57, "grad_norm": 1.0586228370666504, "learning_rate": 2e-05, "loss": 0.05105269, "step": 21285 }, { "epoch": 42.572, "grad_norm": 1.3569300174713135, "learning_rate": 2e-05, "loss": 0.04308255, "step": 21286 }, { "epoch": 42.574, "grad_norm": 1.1029856204986572, "learning_rate": 2e-05, "loss": 0.05305447, "step": 21287 }, { "epoch": 42.576, "grad_norm": 1.1028412580490112, "learning_rate": 2e-05, "loss": 0.04933077, "step": 21288 }, { "epoch": 42.578, "grad_norm": 1.2743582725524902, "learning_rate": 2e-05, "loss": 0.05241363, "step": 21289 }, { "epoch": 42.58, "grad_norm": 1.1509772539138794, "learning_rate": 2e-05, "loss": 0.03854637, "step": 21290 }, { "epoch": 42.582, "grad_norm": 1.0635356903076172, "learning_rate": 2e-05, "loss": 0.05608759, "step": 21291 }, { "epoch": 42.584, "grad_norm": 1.134521484375, "learning_rate": 2e-05, "loss": 0.04343338, "step": 21292 }, { "epoch": 42.586, "grad_norm": 1.8006287813186646, "learning_rate": 2e-05, "loss": 0.05853735, "step": 21293 }, { "epoch": 42.588, "grad_norm": 1.3524338006973267, "learning_rate": 2e-05, "loss": 0.04346161, "step": 21294 }, { "epoch": 42.59, "grad_norm": 0.7984457015991211, "learning_rate": 2e-05, "loss": 0.02500311, "step": 21295 }, { "epoch": 42.592, "grad_norm": 1.4152253866195679, "learning_rate": 2e-05, "loss": 0.03079014, "step": 21296 }, { "epoch": 42.594, "grad_norm": 0.8737123608589172, "learning_rate": 2e-05, "loss": 0.03268551, "step": 21297 }, { "epoch": 42.596, "grad_norm": 1.0920144319534302, "learning_rate": 2e-05, "loss": 0.03899828, "step": 21298 }, { "epoch": 42.598, "grad_norm": 1.1119940280914307, "learning_rate": 2e-05, "loss": 0.04411807, "step": 21299 }, { "epoch": 42.6, "grad_norm": 1.0842735767364502, "learning_rate": 2e-05, "loss": 0.04794773, "step": 21300 }, { "epoch": 42.602, "grad_norm": 3.1000618934631348, "learning_rate": 2e-05, "loss": 0.0384191, "step": 21301 }, { "epoch": 42.604, "grad_norm": 1.286641001701355, "learning_rate": 2e-05, "loss": 0.0585029, "step": 21302 }, { "epoch": 42.606, "grad_norm": 1.0066334009170532, "learning_rate": 2e-05, "loss": 0.03213748, "step": 21303 }, { "epoch": 42.608, "grad_norm": 1.1060172319412231, "learning_rate": 2e-05, "loss": 0.03273823, "step": 21304 }, { "epoch": 42.61, "grad_norm": 1.255589246749878, "learning_rate": 2e-05, "loss": 0.03794422, "step": 21305 }, { "epoch": 42.612, "grad_norm": 1.1036614179611206, "learning_rate": 2e-05, "loss": 0.06087472, "step": 21306 }, { "epoch": 42.614, "grad_norm": 1.1182001829147339, "learning_rate": 2e-05, "loss": 0.05106495, "step": 21307 }, { "epoch": 42.616, "grad_norm": 1.0609266757965088, "learning_rate": 2e-05, "loss": 0.03429703, "step": 21308 }, { "epoch": 42.618, "grad_norm": 1.2208373546600342, "learning_rate": 2e-05, "loss": 0.04771691, "step": 21309 }, { "epoch": 42.62, "grad_norm": 1.0098849534988403, "learning_rate": 2e-05, "loss": 0.04837553, "step": 21310 }, { "epoch": 42.622, "grad_norm": 1.2071741819381714, "learning_rate": 2e-05, "loss": 0.04618914, "step": 21311 }, { "epoch": 42.624, "grad_norm": 1.147689938545227, "learning_rate": 2e-05, "loss": 0.03351322, "step": 21312 }, { "epoch": 42.626, "grad_norm": 0.8061649799346924, "learning_rate": 2e-05, "loss": 0.02755425, "step": 21313 }, { "epoch": 42.628, "grad_norm": 1.537559151649475, "learning_rate": 2e-05, "loss": 0.06420283, "step": 21314 }, { "epoch": 42.63, "grad_norm": 1.0959607362747192, "learning_rate": 2e-05, "loss": 0.04984133, "step": 21315 }, { "epoch": 42.632, "grad_norm": 1.1591609716415405, "learning_rate": 2e-05, "loss": 0.03763618, "step": 21316 }, { "epoch": 42.634, "grad_norm": 1.754645586013794, "learning_rate": 2e-05, "loss": 0.05017209, "step": 21317 }, { "epoch": 42.636, "grad_norm": 2.0186855792999268, "learning_rate": 2e-05, "loss": 0.03558879, "step": 21318 }, { "epoch": 42.638, "grad_norm": 1.020376205444336, "learning_rate": 2e-05, "loss": 0.04305244, "step": 21319 }, { "epoch": 42.64, "grad_norm": 1.73893141746521, "learning_rate": 2e-05, "loss": 0.06790554, "step": 21320 }, { "epoch": 42.642, "grad_norm": 1.8202763795852661, "learning_rate": 2e-05, "loss": 0.05019502, "step": 21321 }, { "epoch": 42.644, "grad_norm": 1.043605923652649, "learning_rate": 2e-05, "loss": 0.04786765, "step": 21322 }, { "epoch": 42.646, "grad_norm": 1.0043532848358154, "learning_rate": 2e-05, "loss": 0.03402919, "step": 21323 }, { "epoch": 42.648, "grad_norm": 0.7921276092529297, "learning_rate": 2e-05, "loss": 0.02800082, "step": 21324 }, { "epoch": 42.65, "grad_norm": 1.357438564300537, "learning_rate": 2e-05, "loss": 0.05154531, "step": 21325 }, { "epoch": 42.652, "grad_norm": 0.9726096987724304, "learning_rate": 2e-05, "loss": 0.0334789, "step": 21326 }, { "epoch": 42.654, "grad_norm": 3.2434630393981934, "learning_rate": 2e-05, "loss": 0.05153171, "step": 21327 }, { "epoch": 42.656, "grad_norm": 1.2478336095809937, "learning_rate": 2e-05, "loss": 0.07013831, "step": 21328 }, { "epoch": 42.658, "grad_norm": 1.9664303064346313, "learning_rate": 2e-05, "loss": 0.04062814, "step": 21329 }, { "epoch": 42.66, "grad_norm": 1.2268818616867065, "learning_rate": 2e-05, "loss": 0.04824457, "step": 21330 }, { "epoch": 42.662, "grad_norm": 1.1204721927642822, "learning_rate": 2e-05, "loss": 0.03650754, "step": 21331 }, { "epoch": 42.664, "grad_norm": 0.7986552715301514, "learning_rate": 2e-05, "loss": 0.02231879, "step": 21332 }, { "epoch": 42.666, "grad_norm": 2.5757551193237305, "learning_rate": 2e-05, "loss": 0.03182121, "step": 21333 }, { "epoch": 42.668, "grad_norm": 0.7458001971244812, "learning_rate": 2e-05, "loss": 0.0281298, "step": 21334 }, { "epoch": 42.67, "grad_norm": 1.1458609104156494, "learning_rate": 2e-05, "loss": 0.03544441, "step": 21335 }, { "epoch": 42.672, "grad_norm": 1.7446544170379639, "learning_rate": 2e-05, "loss": 0.03087436, "step": 21336 }, { "epoch": 42.674, "grad_norm": 1.0553362369537354, "learning_rate": 2e-05, "loss": 0.03725977, "step": 21337 }, { "epoch": 42.676, "grad_norm": 1.188696265220642, "learning_rate": 2e-05, "loss": 0.04746586, "step": 21338 }, { "epoch": 42.678, "grad_norm": 0.9805550575256348, "learning_rate": 2e-05, "loss": 0.03549175, "step": 21339 }, { "epoch": 42.68, "grad_norm": 1.335976004600525, "learning_rate": 2e-05, "loss": 0.0446689, "step": 21340 }, { "epoch": 42.682, "grad_norm": 0.978948175907135, "learning_rate": 2e-05, "loss": 0.0349609, "step": 21341 }, { "epoch": 42.684, "grad_norm": 1.1237258911132812, "learning_rate": 2e-05, "loss": 0.04196231, "step": 21342 }, { "epoch": 42.686, "grad_norm": 1.04066002368927, "learning_rate": 2e-05, "loss": 0.03497246, "step": 21343 }, { "epoch": 42.688, "grad_norm": 1.1830928325653076, "learning_rate": 2e-05, "loss": 0.06363607, "step": 21344 }, { "epoch": 42.69, "grad_norm": 1.5703403949737549, "learning_rate": 2e-05, "loss": 0.04827712, "step": 21345 }, { "epoch": 42.692, "grad_norm": 1.1111451387405396, "learning_rate": 2e-05, "loss": 0.04399043, "step": 21346 }, { "epoch": 42.694, "grad_norm": 2.0086216926574707, "learning_rate": 2e-05, "loss": 0.05659229, "step": 21347 }, { "epoch": 42.696, "grad_norm": 1.04426109790802, "learning_rate": 2e-05, "loss": 0.0373329, "step": 21348 }, { "epoch": 42.698, "grad_norm": 0.9672964215278625, "learning_rate": 2e-05, "loss": 0.03778827, "step": 21349 }, { "epoch": 42.7, "grad_norm": 1.0639363527297974, "learning_rate": 2e-05, "loss": 0.04429337, "step": 21350 }, { "epoch": 42.702, "grad_norm": 1.6714627742767334, "learning_rate": 2e-05, "loss": 0.05253733, "step": 21351 }, { "epoch": 42.704, "grad_norm": 0.9796875715255737, "learning_rate": 2e-05, "loss": 0.0411361, "step": 21352 }, { "epoch": 42.706, "grad_norm": 1.141188383102417, "learning_rate": 2e-05, "loss": 0.04487546, "step": 21353 }, { "epoch": 42.708, "grad_norm": 1.9173147678375244, "learning_rate": 2e-05, "loss": 0.03961648, "step": 21354 }, { "epoch": 42.71, "grad_norm": 0.9901758432388306, "learning_rate": 2e-05, "loss": 0.03932144, "step": 21355 }, { "epoch": 42.712, "grad_norm": 0.9668986797332764, "learning_rate": 2e-05, "loss": 0.03574202, "step": 21356 }, { "epoch": 42.714, "grad_norm": 1.068825364112854, "learning_rate": 2e-05, "loss": 0.04652916, "step": 21357 }, { "epoch": 42.716, "grad_norm": 1.309280276298523, "learning_rate": 2e-05, "loss": 0.03779301, "step": 21358 }, { "epoch": 42.718, "grad_norm": 1.0045759677886963, "learning_rate": 2e-05, "loss": 0.03695038, "step": 21359 }, { "epoch": 42.72, "grad_norm": 1.842631459236145, "learning_rate": 2e-05, "loss": 0.03737037, "step": 21360 }, { "epoch": 42.722, "grad_norm": 1.136857032775879, "learning_rate": 2e-05, "loss": 0.04677048, "step": 21361 }, { "epoch": 42.724, "grad_norm": 0.8244582414627075, "learning_rate": 2e-05, "loss": 0.025115, "step": 21362 }, { "epoch": 42.726, "grad_norm": 1.0085093975067139, "learning_rate": 2e-05, "loss": 0.03924392, "step": 21363 }, { "epoch": 42.728, "grad_norm": 1.217607021331787, "learning_rate": 2e-05, "loss": 0.04002892, "step": 21364 }, { "epoch": 42.73, "grad_norm": 1.2852460145950317, "learning_rate": 2e-05, "loss": 0.05665496, "step": 21365 }, { "epoch": 42.732, "grad_norm": 1.36598801612854, "learning_rate": 2e-05, "loss": 0.03184102, "step": 21366 }, { "epoch": 42.734, "grad_norm": 0.9200010895729065, "learning_rate": 2e-05, "loss": 0.04184512, "step": 21367 }, { "epoch": 42.736, "grad_norm": 0.9530177712440491, "learning_rate": 2e-05, "loss": 0.03544679, "step": 21368 }, { "epoch": 42.738, "grad_norm": 1.4244446754455566, "learning_rate": 2e-05, "loss": 0.0660693, "step": 21369 }, { "epoch": 42.74, "grad_norm": 1.3922539949417114, "learning_rate": 2e-05, "loss": 0.04483814, "step": 21370 }, { "epoch": 42.742, "grad_norm": 1.0140858888626099, "learning_rate": 2e-05, "loss": 0.03909463, "step": 21371 }, { "epoch": 42.744, "grad_norm": 1.066416621208191, "learning_rate": 2e-05, "loss": 0.0501445, "step": 21372 }, { "epoch": 42.746, "grad_norm": 1.2194859981536865, "learning_rate": 2e-05, "loss": 0.03849475, "step": 21373 }, { "epoch": 42.748, "grad_norm": 1.0662126541137695, "learning_rate": 2e-05, "loss": 0.04596983, "step": 21374 }, { "epoch": 42.75, "grad_norm": 1.2220911979675293, "learning_rate": 2e-05, "loss": 0.03389343, "step": 21375 }, { "epoch": 42.752, "grad_norm": 1.2995012998580933, "learning_rate": 2e-05, "loss": 0.0537604, "step": 21376 }, { "epoch": 42.754, "grad_norm": 0.9738687872886658, "learning_rate": 2e-05, "loss": 0.03609116, "step": 21377 }, { "epoch": 42.756, "grad_norm": 1.242716670036316, "learning_rate": 2e-05, "loss": 0.04677413, "step": 21378 }, { "epoch": 42.758, "grad_norm": 1.2014180421829224, "learning_rate": 2e-05, "loss": 0.05662605, "step": 21379 }, { "epoch": 42.76, "grad_norm": 1.1085035800933838, "learning_rate": 2e-05, "loss": 0.05397139, "step": 21380 }, { "epoch": 42.762, "grad_norm": 0.9372961521148682, "learning_rate": 2e-05, "loss": 0.02678905, "step": 21381 }, { "epoch": 42.764, "grad_norm": 2.219107151031494, "learning_rate": 2e-05, "loss": 0.04180562, "step": 21382 }, { "epoch": 42.766, "grad_norm": 0.9510572552680969, "learning_rate": 2e-05, "loss": 0.04218516, "step": 21383 }, { "epoch": 42.768, "grad_norm": 1.774696946144104, "learning_rate": 2e-05, "loss": 0.05132428, "step": 21384 }, { "epoch": 42.77, "grad_norm": 0.9764688014984131, "learning_rate": 2e-05, "loss": 0.03274138, "step": 21385 }, { "epoch": 42.772, "grad_norm": 1.3046088218688965, "learning_rate": 2e-05, "loss": 0.02933467, "step": 21386 }, { "epoch": 42.774, "grad_norm": 2.027909278869629, "learning_rate": 2e-05, "loss": 0.04343503, "step": 21387 }, { "epoch": 42.776, "grad_norm": 1.1519170999526978, "learning_rate": 2e-05, "loss": 0.05434699, "step": 21388 }, { "epoch": 42.778, "grad_norm": 3.7713587284088135, "learning_rate": 2e-05, "loss": 0.05753239, "step": 21389 }, { "epoch": 42.78, "grad_norm": 0.9936582446098328, "learning_rate": 2e-05, "loss": 0.03911688, "step": 21390 }, { "epoch": 42.782, "grad_norm": 1.6034986972808838, "learning_rate": 2e-05, "loss": 0.04731909, "step": 21391 }, { "epoch": 42.784, "grad_norm": 1.1250642538070679, "learning_rate": 2e-05, "loss": 0.04768999, "step": 21392 }, { "epoch": 42.786, "grad_norm": 1.2233883142471313, "learning_rate": 2e-05, "loss": 0.05830994, "step": 21393 }, { "epoch": 42.788, "grad_norm": 1.0575405359268188, "learning_rate": 2e-05, "loss": 0.04625095, "step": 21394 }, { "epoch": 42.79, "grad_norm": 1.0830788612365723, "learning_rate": 2e-05, "loss": 0.05283836, "step": 21395 }, { "epoch": 42.792, "grad_norm": 1.7933377027511597, "learning_rate": 2e-05, "loss": 0.05274478, "step": 21396 }, { "epoch": 42.794, "grad_norm": 1.1126435995101929, "learning_rate": 2e-05, "loss": 0.05141603, "step": 21397 }, { "epoch": 42.796, "grad_norm": 1.465794324874878, "learning_rate": 2e-05, "loss": 0.06318197, "step": 21398 }, { "epoch": 42.798, "grad_norm": 1.0046237707138062, "learning_rate": 2e-05, "loss": 0.0381258, "step": 21399 }, { "epoch": 42.8, "grad_norm": 0.8223246932029724, "learning_rate": 2e-05, "loss": 0.0240345, "step": 21400 }, { "epoch": 42.802, "grad_norm": 1.1046738624572754, "learning_rate": 2e-05, "loss": 0.03899305, "step": 21401 }, { "epoch": 42.804, "grad_norm": 1.4683109521865845, "learning_rate": 2e-05, "loss": 0.05575843, "step": 21402 }, { "epoch": 42.806, "grad_norm": 1.2445991039276123, "learning_rate": 2e-05, "loss": 0.04129289, "step": 21403 }, { "epoch": 42.808, "grad_norm": 1.1447534561157227, "learning_rate": 2e-05, "loss": 0.04119815, "step": 21404 }, { "epoch": 42.81, "grad_norm": 1.0184522867202759, "learning_rate": 2e-05, "loss": 0.04055436, "step": 21405 }, { "epoch": 42.812, "grad_norm": 1.3136247396469116, "learning_rate": 2e-05, "loss": 0.04421426, "step": 21406 }, { "epoch": 42.814, "grad_norm": 1.3091551065444946, "learning_rate": 2e-05, "loss": 0.03514631, "step": 21407 }, { "epoch": 42.816, "grad_norm": 1.0712436437606812, "learning_rate": 2e-05, "loss": 0.03913643, "step": 21408 }, { "epoch": 42.818, "grad_norm": 2.389177083969116, "learning_rate": 2e-05, "loss": 0.0521326, "step": 21409 }, { "epoch": 42.82, "grad_norm": 0.8904063701629639, "learning_rate": 2e-05, "loss": 0.03759852, "step": 21410 }, { "epoch": 42.822, "grad_norm": 0.9511417150497437, "learning_rate": 2e-05, "loss": 0.03394198, "step": 21411 }, { "epoch": 42.824, "grad_norm": 0.9193059802055359, "learning_rate": 2e-05, "loss": 0.03371462, "step": 21412 }, { "epoch": 42.826, "grad_norm": 0.9347977042198181, "learning_rate": 2e-05, "loss": 0.03795308, "step": 21413 }, { "epoch": 42.828, "grad_norm": 1.1205661296844482, "learning_rate": 2e-05, "loss": 0.03589653, "step": 21414 }, { "epoch": 42.83, "grad_norm": 1.3715908527374268, "learning_rate": 2e-05, "loss": 0.03769862, "step": 21415 }, { "epoch": 42.832, "grad_norm": 1.636546015739441, "learning_rate": 2e-05, "loss": 0.04890811, "step": 21416 }, { "epoch": 42.834, "grad_norm": 1.7343817949295044, "learning_rate": 2e-05, "loss": 0.03954367, "step": 21417 }, { "epoch": 42.836, "grad_norm": 1.5449166297912598, "learning_rate": 2e-05, "loss": 0.05331591, "step": 21418 }, { "epoch": 42.838, "grad_norm": 1.5310888290405273, "learning_rate": 2e-05, "loss": 0.03996238, "step": 21419 }, { "epoch": 42.84, "grad_norm": 1.1800297498703003, "learning_rate": 2e-05, "loss": 0.04429355, "step": 21420 }, { "epoch": 42.842, "grad_norm": 1.1037722826004028, "learning_rate": 2e-05, "loss": 0.0450694, "step": 21421 }, { "epoch": 42.844, "grad_norm": 1.0636318922042847, "learning_rate": 2e-05, "loss": 0.04528022, "step": 21422 }, { "epoch": 42.846, "grad_norm": 1.1653640270233154, "learning_rate": 2e-05, "loss": 0.04300493, "step": 21423 }, { "epoch": 42.848, "grad_norm": 1.2080070972442627, "learning_rate": 2e-05, "loss": 0.04970691, "step": 21424 }, { "epoch": 42.85, "grad_norm": 1.0542407035827637, "learning_rate": 2e-05, "loss": 0.04026613, "step": 21425 }, { "epoch": 42.852, "grad_norm": 1.0462920665740967, "learning_rate": 2e-05, "loss": 0.03686008, "step": 21426 }, { "epoch": 42.854, "grad_norm": 1.3161240816116333, "learning_rate": 2e-05, "loss": 0.0503227, "step": 21427 }, { "epoch": 42.856, "grad_norm": 1.0535399913787842, "learning_rate": 2e-05, "loss": 0.03271418, "step": 21428 }, { "epoch": 42.858, "grad_norm": 2.791760206222534, "learning_rate": 2e-05, "loss": 0.0443115, "step": 21429 }, { "epoch": 42.86, "grad_norm": 1.4205018281936646, "learning_rate": 2e-05, "loss": 0.04642691, "step": 21430 }, { "epoch": 42.862, "grad_norm": 1.014106035232544, "learning_rate": 2e-05, "loss": 0.04128919, "step": 21431 }, { "epoch": 42.864, "grad_norm": 1.2442071437835693, "learning_rate": 2e-05, "loss": 0.03398401, "step": 21432 }, { "epoch": 42.866, "grad_norm": 0.9644742608070374, "learning_rate": 2e-05, "loss": 0.03220072, "step": 21433 }, { "epoch": 42.868, "grad_norm": 0.9383495450019836, "learning_rate": 2e-05, "loss": 0.03303047, "step": 21434 }, { "epoch": 42.87, "grad_norm": 1.2647031545639038, "learning_rate": 2e-05, "loss": 0.03338578, "step": 21435 }, { "epoch": 42.872, "grad_norm": 1.1294792890548706, "learning_rate": 2e-05, "loss": 0.04752643, "step": 21436 }, { "epoch": 42.874, "grad_norm": 0.9950034618377686, "learning_rate": 2e-05, "loss": 0.03106094, "step": 21437 }, { "epoch": 42.876, "grad_norm": 0.9773361086845398, "learning_rate": 2e-05, "loss": 0.04474681, "step": 21438 }, { "epoch": 42.878, "grad_norm": 0.9458305239677429, "learning_rate": 2e-05, "loss": 0.03223486, "step": 21439 }, { "epoch": 42.88, "grad_norm": 1.4514533281326294, "learning_rate": 2e-05, "loss": 0.05990266, "step": 21440 }, { "epoch": 42.882, "grad_norm": 1.3891021013259888, "learning_rate": 2e-05, "loss": 0.04283629, "step": 21441 }, { "epoch": 42.884, "grad_norm": 0.8657371401786804, "learning_rate": 2e-05, "loss": 0.0279903, "step": 21442 }, { "epoch": 42.886, "grad_norm": 1.2504229545593262, "learning_rate": 2e-05, "loss": 0.05416631, "step": 21443 }, { "epoch": 42.888, "grad_norm": 4.913718223571777, "learning_rate": 2e-05, "loss": 0.03611604, "step": 21444 }, { "epoch": 42.89, "grad_norm": 1.2434426546096802, "learning_rate": 2e-05, "loss": 0.04722682, "step": 21445 }, { "epoch": 42.892, "grad_norm": 1.3431973457336426, "learning_rate": 2e-05, "loss": 0.03858235, "step": 21446 }, { "epoch": 42.894, "grad_norm": 1.3316370248794556, "learning_rate": 2e-05, "loss": 0.06466392, "step": 21447 }, { "epoch": 42.896, "grad_norm": 1.109006643295288, "learning_rate": 2e-05, "loss": 0.02522644, "step": 21448 }, { "epoch": 42.898, "grad_norm": 1.0290035009384155, "learning_rate": 2e-05, "loss": 0.03545004, "step": 21449 }, { "epoch": 42.9, "grad_norm": 1.3004398345947266, "learning_rate": 2e-05, "loss": 0.04687661, "step": 21450 }, { "epoch": 42.902, "grad_norm": 1.601977825164795, "learning_rate": 2e-05, "loss": 0.04422747, "step": 21451 }, { "epoch": 42.904, "grad_norm": 1.092894434928894, "learning_rate": 2e-05, "loss": 0.05042407, "step": 21452 }, { "epoch": 42.906, "grad_norm": 1.2732504606246948, "learning_rate": 2e-05, "loss": 0.03294954, "step": 21453 }, { "epoch": 42.908, "grad_norm": 0.9969822764396667, "learning_rate": 2e-05, "loss": 0.03621193, "step": 21454 }, { "epoch": 42.91, "grad_norm": 0.9931317567825317, "learning_rate": 2e-05, "loss": 0.03277697, "step": 21455 }, { "epoch": 42.912, "grad_norm": 1.042848825454712, "learning_rate": 2e-05, "loss": 0.04085307, "step": 21456 }, { "epoch": 42.914, "grad_norm": 6.913528919219971, "learning_rate": 2e-05, "loss": 0.0584168, "step": 21457 }, { "epoch": 42.916, "grad_norm": 1.1711221933364868, "learning_rate": 2e-05, "loss": 0.03455652, "step": 21458 }, { "epoch": 42.918, "grad_norm": 1.063004493713379, "learning_rate": 2e-05, "loss": 0.03290723, "step": 21459 }, { "epoch": 42.92, "grad_norm": 0.9021233916282654, "learning_rate": 2e-05, "loss": 0.02424634, "step": 21460 }, { "epoch": 42.922, "grad_norm": 1.5889981985092163, "learning_rate": 2e-05, "loss": 0.04993482, "step": 21461 }, { "epoch": 42.924, "grad_norm": 2.0074312686920166, "learning_rate": 2e-05, "loss": 0.0558555, "step": 21462 }, { "epoch": 42.926, "grad_norm": 1.0905903577804565, "learning_rate": 2e-05, "loss": 0.05450784, "step": 21463 }, { "epoch": 42.928, "grad_norm": 2.750641345977783, "learning_rate": 2e-05, "loss": 0.05204348, "step": 21464 }, { "epoch": 42.93, "grad_norm": 1.5680136680603027, "learning_rate": 2e-05, "loss": 0.04504358, "step": 21465 }, { "epoch": 42.932, "grad_norm": 1.9544981718063354, "learning_rate": 2e-05, "loss": 0.05047791, "step": 21466 }, { "epoch": 42.934, "grad_norm": 1.3666836023330688, "learning_rate": 2e-05, "loss": 0.04208256, "step": 21467 }, { "epoch": 42.936, "grad_norm": 1.016249418258667, "learning_rate": 2e-05, "loss": 0.02993571, "step": 21468 }, { "epoch": 42.938, "grad_norm": 1.0648640394210815, "learning_rate": 2e-05, "loss": 0.04894018, "step": 21469 }, { "epoch": 42.94, "grad_norm": 1.2745357751846313, "learning_rate": 2e-05, "loss": 0.05232563, "step": 21470 }, { "epoch": 42.942, "grad_norm": 1.3927181959152222, "learning_rate": 2e-05, "loss": 0.04997092, "step": 21471 }, { "epoch": 42.944, "grad_norm": 0.8759673833847046, "learning_rate": 2e-05, "loss": 0.02804527, "step": 21472 }, { "epoch": 42.946, "grad_norm": 1.3150873184204102, "learning_rate": 2e-05, "loss": 0.04038583, "step": 21473 }, { "epoch": 42.948, "grad_norm": 1.202315092086792, "learning_rate": 2e-05, "loss": 0.05017352, "step": 21474 }, { "epoch": 42.95, "grad_norm": 1.2554532289505005, "learning_rate": 2e-05, "loss": 0.05457747, "step": 21475 }, { "epoch": 42.952, "grad_norm": 1.044266700744629, "learning_rate": 2e-05, "loss": 0.05660079, "step": 21476 }, { "epoch": 42.954, "grad_norm": 1.81424081325531, "learning_rate": 2e-05, "loss": 0.05007328, "step": 21477 }, { "epoch": 42.956, "grad_norm": 1.0985548496246338, "learning_rate": 2e-05, "loss": 0.03858521, "step": 21478 }, { "epoch": 42.958, "grad_norm": 1.0454684495925903, "learning_rate": 2e-05, "loss": 0.04611177, "step": 21479 }, { "epoch": 42.96, "grad_norm": 1.794240117073059, "learning_rate": 2e-05, "loss": 0.03934781, "step": 21480 }, { "epoch": 42.962, "grad_norm": 1.3604718446731567, "learning_rate": 2e-05, "loss": 0.04623352, "step": 21481 }, { "epoch": 42.964, "grad_norm": 1.2288618087768555, "learning_rate": 2e-05, "loss": 0.05231085, "step": 21482 }, { "epoch": 42.966, "grad_norm": 2.0184102058410645, "learning_rate": 2e-05, "loss": 0.04680885, "step": 21483 }, { "epoch": 42.968, "grad_norm": 1.0228439569473267, "learning_rate": 2e-05, "loss": 0.03343446, "step": 21484 }, { "epoch": 42.97, "grad_norm": 1.1432167291641235, "learning_rate": 2e-05, "loss": 0.04577513, "step": 21485 }, { "epoch": 42.972, "grad_norm": 1.189196228981018, "learning_rate": 2e-05, "loss": 0.04577956, "step": 21486 }, { "epoch": 42.974, "grad_norm": 1.0387612581253052, "learning_rate": 2e-05, "loss": 0.04349749, "step": 21487 }, { "epoch": 42.976, "grad_norm": 1.079666018486023, "learning_rate": 2e-05, "loss": 0.02546114, "step": 21488 }, { "epoch": 42.978, "grad_norm": 1.4964441061019897, "learning_rate": 2e-05, "loss": 0.05020229, "step": 21489 }, { "epoch": 42.98, "grad_norm": 1.493005394935608, "learning_rate": 2e-05, "loss": 0.04636772, "step": 21490 }, { "epoch": 42.982, "grad_norm": 1.5666682720184326, "learning_rate": 2e-05, "loss": 0.03906295, "step": 21491 }, { "epoch": 42.984, "grad_norm": 2.3162825107574463, "learning_rate": 2e-05, "loss": 0.05698483, "step": 21492 }, { "epoch": 42.986, "grad_norm": 1.2752532958984375, "learning_rate": 2e-05, "loss": 0.0493651, "step": 21493 }, { "epoch": 42.988, "grad_norm": 0.8096239566802979, "learning_rate": 2e-05, "loss": 0.01962134, "step": 21494 }, { "epoch": 42.99, "grad_norm": 0.8358118534088135, "learning_rate": 2e-05, "loss": 0.02985613, "step": 21495 }, { "epoch": 42.992, "grad_norm": 0.9539084434509277, "learning_rate": 2e-05, "loss": 0.03570447, "step": 21496 }, { "epoch": 42.994, "grad_norm": 1.0816057920455933, "learning_rate": 2e-05, "loss": 0.04599762, "step": 21497 }, { "epoch": 42.996, "grad_norm": 1.0575499534606934, "learning_rate": 2e-05, "loss": 0.04695037, "step": 21498 }, { "epoch": 42.998, "grad_norm": 1.0272645950317383, "learning_rate": 2e-05, "loss": 0.04532609, "step": 21499 }, { "epoch": 43.0, "grad_norm": 3.272545337677002, "learning_rate": 2e-05, "loss": 0.06008004, "step": 21500 }, { "epoch": 43.0, "eval_performance": { "AngleClassification_1": 0.994, "AngleClassification_2": 0.996, "AngleClassification_3": 0.9860279441117764, "Equal_1": 0.998, "Equal_2": 0.9800399201596807, "Equal_3": 0.9900199600798403, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9939879759519038, "Parallel_3": 0.992, "Perpendicular_1": 1.0, "Perpendicular_2": 0.996, "Perpendicular_3": 0.8877755511022044, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.994, "PointLiesOnCircle_3": 0.9916, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9860279441117764 }, "eval_runtime": 225.1483, "eval_samples_per_second": 46.636, "eval_steps_per_second": 0.933, "step": 21500 }, { "epoch": 43.002, "grad_norm": 1.0454511642456055, "learning_rate": 2e-05, "loss": 0.04252189, "step": 21501 }, { "epoch": 43.004, "grad_norm": 1.4177364110946655, "learning_rate": 2e-05, "loss": 0.05265589, "step": 21502 }, { "epoch": 43.006, "grad_norm": 1.1492496728897095, "learning_rate": 2e-05, "loss": 0.04217809, "step": 21503 }, { "epoch": 43.008, "grad_norm": 0.9968222975730896, "learning_rate": 2e-05, "loss": 0.04590034, "step": 21504 }, { "epoch": 43.01, "grad_norm": 1.167055606842041, "learning_rate": 2e-05, "loss": 0.05022173, "step": 21505 }, { "epoch": 43.012, "grad_norm": 0.8603715896606445, "learning_rate": 2e-05, "loss": 0.02986917, "step": 21506 }, { "epoch": 43.014, "grad_norm": 1.1999927759170532, "learning_rate": 2e-05, "loss": 0.04406471, "step": 21507 }, { "epoch": 43.016, "grad_norm": 1.0443135499954224, "learning_rate": 2e-05, "loss": 0.03682392, "step": 21508 }, { "epoch": 43.018, "grad_norm": 2.145573854446411, "learning_rate": 2e-05, "loss": 0.04620774, "step": 21509 }, { "epoch": 43.02, "grad_norm": 1.2781721353530884, "learning_rate": 2e-05, "loss": 0.04780835, "step": 21510 }, { "epoch": 43.022, "grad_norm": 1.127905249595642, "learning_rate": 2e-05, "loss": 0.03601499, "step": 21511 }, { "epoch": 43.024, "grad_norm": 1.292842149734497, "learning_rate": 2e-05, "loss": 0.04630895, "step": 21512 }, { "epoch": 43.026, "grad_norm": 1.2159966230392456, "learning_rate": 2e-05, "loss": 0.0356175, "step": 21513 }, { "epoch": 43.028, "grad_norm": 1.4705806970596313, "learning_rate": 2e-05, "loss": 0.05325876, "step": 21514 }, { "epoch": 43.03, "grad_norm": 0.9386272430419922, "learning_rate": 2e-05, "loss": 0.03543054, "step": 21515 }, { "epoch": 43.032, "grad_norm": 1.213313102722168, "learning_rate": 2e-05, "loss": 0.03027849, "step": 21516 }, { "epoch": 43.034, "grad_norm": 1.2928411960601807, "learning_rate": 2e-05, "loss": 0.05427338, "step": 21517 }, { "epoch": 43.036, "grad_norm": 1.0347918272018433, "learning_rate": 2e-05, "loss": 0.04139549, "step": 21518 }, { "epoch": 43.038, "grad_norm": 0.9764124751091003, "learning_rate": 2e-05, "loss": 0.04017229, "step": 21519 }, { "epoch": 43.04, "grad_norm": 1.049005150794983, "learning_rate": 2e-05, "loss": 0.04937005, "step": 21520 }, { "epoch": 43.042, "grad_norm": 1.1918678283691406, "learning_rate": 2e-05, "loss": 0.04529165, "step": 21521 }, { "epoch": 43.044, "grad_norm": 0.9093082547187805, "learning_rate": 2e-05, "loss": 0.04197581, "step": 21522 }, { "epoch": 43.046, "grad_norm": 0.9634941220283508, "learning_rate": 2e-05, "loss": 0.03913788, "step": 21523 }, { "epoch": 43.048, "grad_norm": 1.3422772884368896, "learning_rate": 2e-05, "loss": 0.03054167, "step": 21524 }, { "epoch": 43.05, "grad_norm": 1.3078621625900269, "learning_rate": 2e-05, "loss": 0.05244038, "step": 21525 }, { "epoch": 43.052, "grad_norm": 1.1020398139953613, "learning_rate": 2e-05, "loss": 0.04323418, "step": 21526 }, { "epoch": 43.054, "grad_norm": 0.8638684749603271, "learning_rate": 2e-05, "loss": 0.02945768, "step": 21527 }, { "epoch": 43.056, "grad_norm": 1.0392653942108154, "learning_rate": 2e-05, "loss": 0.04109567, "step": 21528 }, { "epoch": 43.058, "grad_norm": 1.2084120512008667, "learning_rate": 2e-05, "loss": 0.03830104, "step": 21529 }, { "epoch": 43.06, "grad_norm": 1.077960729598999, "learning_rate": 2e-05, "loss": 0.037576, "step": 21530 }, { "epoch": 43.062, "grad_norm": 1.1066217422485352, "learning_rate": 2e-05, "loss": 0.02993669, "step": 21531 }, { "epoch": 43.064, "grad_norm": 0.8886038661003113, "learning_rate": 2e-05, "loss": 0.03157776, "step": 21532 }, { "epoch": 43.066, "grad_norm": 1.0748250484466553, "learning_rate": 2e-05, "loss": 0.03340712, "step": 21533 }, { "epoch": 43.068, "grad_norm": 1.2448629140853882, "learning_rate": 2e-05, "loss": 0.03834453, "step": 21534 }, { "epoch": 43.07, "grad_norm": 0.978026807308197, "learning_rate": 2e-05, "loss": 0.03877458, "step": 21535 }, { "epoch": 43.072, "grad_norm": 1.1117609739303589, "learning_rate": 2e-05, "loss": 0.03468132, "step": 21536 }, { "epoch": 43.074, "grad_norm": 2.0462303161621094, "learning_rate": 2e-05, "loss": 0.06920994, "step": 21537 }, { "epoch": 43.076, "grad_norm": 1.0488942861557007, "learning_rate": 2e-05, "loss": 0.04019871, "step": 21538 }, { "epoch": 43.078, "grad_norm": 0.9643396735191345, "learning_rate": 2e-05, "loss": 0.03677714, "step": 21539 }, { "epoch": 43.08, "grad_norm": 0.8673763871192932, "learning_rate": 2e-05, "loss": 0.02881166, "step": 21540 }, { "epoch": 43.082, "grad_norm": 1.4893254041671753, "learning_rate": 2e-05, "loss": 0.0467178, "step": 21541 }, { "epoch": 43.084, "grad_norm": 1.528484582901001, "learning_rate": 2e-05, "loss": 0.06043353, "step": 21542 }, { "epoch": 43.086, "grad_norm": 1.0167810916900635, "learning_rate": 2e-05, "loss": 0.03127775, "step": 21543 }, { "epoch": 43.088, "grad_norm": 1.778594970703125, "learning_rate": 2e-05, "loss": 0.04909519, "step": 21544 }, { "epoch": 43.09, "grad_norm": 0.9675803780555725, "learning_rate": 2e-05, "loss": 0.03426045, "step": 21545 }, { "epoch": 43.092, "grad_norm": 1.1658743619918823, "learning_rate": 2e-05, "loss": 0.07016677, "step": 21546 }, { "epoch": 43.094, "grad_norm": 1.9398874044418335, "learning_rate": 2e-05, "loss": 0.04341382, "step": 21547 }, { "epoch": 43.096, "grad_norm": 1.7476240396499634, "learning_rate": 2e-05, "loss": 0.03427904, "step": 21548 }, { "epoch": 43.098, "grad_norm": 1.0302692651748657, "learning_rate": 2e-05, "loss": 0.03532644, "step": 21549 }, { "epoch": 43.1, "grad_norm": 0.9855327010154724, "learning_rate": 2e-05, "loss": 0.04113463, "step": 21550 }, { "epoch": 43.102, "grad_norm": 1.125103235244751, "learning_rate": 2e-05, "loss": 0.03224187, "step": 21551 }, { "epoch": 43.104, "grad_norm": 0.8726584315299988, "learning_rate": 2e-05, "loss": 0.03111587, "step": 21552 }, { "epoch": 43.106, "grad_norm": 1.4993234872817993, "learning_rate": 2e-05, "loss": 0.05311944, "step": 21553 }, { "epoch": 43.108, "grad_norm": 1.0393658876419067, "learning_rate": 2e-05, "loss": 0.04418644, "step": 21554 }, { "epoch": 43.11, "grad_norm": 1.2439035177230835, "learning_rate": 2e-05, "loss": 0.03123252, "step": 21555 }, { "epoch": 43.112, "grad_norm": 1.1800543069839478, "learning_rate": 2e-05, "loss": 0.04986358, "step": 21556 }, { "epoch": 43.114, "grad_norm": 1.0557270050048828, "learning_rate": 2e-05, "loss": 0.038652, "step": 21557 }, { "epoch": 43.116, "grad_norm": 1.0433310270309448, "learning_rate": 2e-05, "loss": 0.02953596, "step": 21558 }, { "epoch": 43.118, "grad_norm": 0.8939655423164368, "learning_rate": 2e-05, "loss": 0.03127585, "step": 21559 }, { "epoch": 43.12, "grad_norm": 1.1106184720993042, "learning_rate": 2e-05, "loss": 0.04145788, "step": 21560 }, { "epoch": 43.122, "grad_norm": 1.1329957246780396, "learning_rate": 2e-05, "loss": 0.0463948, "step": 21561 }, { "epoch": 43.124, "grad_norm": 1.1320699453353882, "learning_rate": 2e-05, "loss": 0.04937724, "step": 21562 }, { "epoch": 43.126, "grad_norm": 1.1371742486953735, "learning_rate": 2e-05, "loss": 0.04805798, "step": 21563 }, { "epoch": 43.128, "grad_norm": 1.1951919794082642, "learning_rate": 2e-05, "loss": 0.04782828, "step": 21564 }, { "epoch": 43.13, "grad_norm": 1.2105640172958374, "learning_rate": 2e-05, "loss": 0.04293704, "step": 21565 }, { "epoch": 43.132, "grad_norm": 1.053952932357788, "learning_rate": 2e-05, "loss": 0.03278841, "step": 21566 }, { "epoch": 43.134, "grad_norm": 1.0530550479888916, "learning_rate": 2e-05, "loss": 0.03740437, "step": 21567 }, { "epoch": 43.136, "grad_norm": 1.252481460571289, "learning_rate": 2e-05, "loss": 0.0551297, "step": 21568 }, { "epoch": 43.138, "grad_norm": 1.0158932209014893, "learning_rate": 2e-05, "loss": 0.04259151, "step": 21569 }, { "epoch": 43.14, "grad_norm": 1.0540589094161987, "learning_rate": 2e-05, "loss": 0.03606081, "step": 21570 }, { "epoch": 43.142, "grad_norm": 1.3601748943328857, "learning_rate": 2e-05, "loss": 0.03485128, "step": 21571 }, { "epoch": 43.144, "grad_norm": 0.9778441190719604, "learning_rate": 2e-05, "loss": 0.03678624, "step": 21572 }, { "epoch": 43.146, "grad_norm": 2.6971757411956787, "learning_rate": 2e-05, "loss": 0.04939865, "step": 21573 }, { "epoch": 43.148, "grad_norm": 0.9736534357070923, "learning_rate": 2e-05, "loss": 0.03956175, "step": 21574 }, { "epoch": 43.15, "grad_norm": 1.913548469543457, "learning_rate": 2e-05, "loss": 0.04778965, "step": 21575 }, { "epoch": 43.152, "grad_norm": 1.2088645696640015, "learning_rate": 2e-05, "loss": 0.04334343, "step": 21576 }, { "epoch": 43.154, "grad_norm": 1.0494822263717651, "learning_rate": 2e-05, "loss": 0.05102647, "step": 21577 }, { "epoch": 43.156, "grad_norm": 1.2444239854812622, "learning_rate": 2e-05, "loss": 0.04848306, "step": 21578 }, { "epoch": 43.158, "grad_norm": 4.869460582733154, "learning_rate": 2e-05, "loss": 0.05027522, "step": 21579 }, { "epoch": 43.16, "grad_norm": 1.2638221979141235, "learning_rate": 2e-05, "loss": 0.05484208, "step": 21580 }, { "epoch": 43.162, "grad_norm": 0.952798068523407, "learning_rate": 2e-05, "loss": 0.03995014, "step": 21581 }, { "epoch": 43.164, "grad_norm": 1.1141804456710815, "learning_rate": 2e-05, "loss": 0.03627685, "step": 21582 }, { "epoch": 43.166, "grad_norm": 0.8950271606445312, "learning_rate": 2e-05, "loss": 0.03761142, "step": 21583 }, { "epoch": 43.168, "grad_norm": 1.1006243228912354, "learning_rate": 2e-05, "loss": 0.04348521, "step": 21584 }, { "epoch": 43.17, "grad_norm": 1.3494913578033447, "learning_rate": 2e-05, "loss": 0.0548263, "step": 21585 }, { "epoch": 43.172, "grad_norm": 1.5218074321746826, "learning_rate": 2e-05, "loss": 0.05485296, "step": 21586 }, { "epoch": 43.174, "grad_norm": 0.992699384689331, "learning_rate": 2e-05, "loss": 0.03646914, "step": 21587 }, { "epoch": 43.176, "grad_norm": 1.52058744430542, "learning_rate": 2e-05, "loss": 0.04382282, "step": 21588 }, { "epoch": 43.178, "grad_norm": 1.4549057483673096, "learning_rate": 2e-05, "loss": 0.04498808, "step": 21589 }, { "epoch": 43.18, "grad_norm": 1.1950576305389404, "learning_rate": 2e-05, "loss": 0.04599179, "step": 21590 }, { "epoch": 43.182, "grad_norm": 1.4135665893554688, "learning_rate": 2e-05, "loss": 0.03723136, "step": 21591 }, { "epoch": 43.184, "grad_norm": 0.9459132552146912, "learning_rate": 2e-05, "loss": 0.02962795, "step": 21592 }, { "epoch": 43.186, "grad_norm": 0.9997537136077881, "learning_rate": 2e-05, "loss": 0.04094714, "step": 21593 }, { "epoch": 43.188, "grad_norm": 1.2770761251449585, "learning_rate": 2e-05, "loss": 0.04436073, "step": 21594 }, { "epoch": 43.19, "grad_norm": 1.0048246383666992, "learning_rate": 2e-05, "loss": 0.04328796, "step": 21595 }, { "epoch": 43.192, "grad_norm": 1.0809087753295898, "learning_rate": 2e-05, "loss": 0.04212854, "step": 21596 }, { "epoch": 43.194, "grad_norm": 0.9423580765724182, "learning_rate": 2e-05, "loss": 0.03866305, "step": 21597 }, { "epoch": 43.196, "grad_norm": 1.1177490949630737, "learning_rate": 2e-05, "loss": 0.04274879, "step": 21598 }, { "epoch": 43.198, "grad_norm": 0.9296929240226746, "learning_rate": 2e-05, "loss": 0.0373172, "step": 21599 }, { "epoch": 43.2, "grad_norm": 1.1421337127685547, "learning_rate": 2e-05, "loss": 0.05034145, "step": 21600 }, { "epoch": 43.202, "grad_norm": 0.9606911540031433, "learning_rate": 2e-05, "loss": 0.04260297, "step": 21601 }, { "epoch": 43.204, "grad_norm": 1.395663857460022, "learning_rate": 2e-05, "loss": 0.05672524, "step": 21602 }, { "epoch": 43.206, "grad_norm": 1.2878978252410889, "learning_rate": 2e-05, "loss": 0.05431881, "step": 21603 }, { "epoch": 43.208, "grad_norm": 1.2168048620224, "learning_rate": 2e-05, "loss": 0.05157124, "step": 21604 }, { "epoch": 43.21, "grad_norm": 1.0812981128692627, "learning_rate": 2e-05, "loss": 0.04211493, "step": 21605 }, { "epoch": 43.212, "grad_norm": 0.8551574349403381, "learning_rate": 2e-05, "loss": 0.03165013, "step": 21606 }, { "epoch": 43.214, "grad_norm": 0.9190029501914978, "learning_rate": 2e-05, "loss": 0.03787802, "step": 21607 }, { "epoch": 43.216, "grad_norm": 1.4801126718521118, "learning_rate": 2e-05, "loss": 0.06118463, "step": 21608 }, { "epoch": 43.218, "grad_norm": 2.2338075637817383, "learning_rate": 2e-05, "loss": 0.05117119, "step": 21609 }, { "epoch": 43.22, "grad_norm": 1.446197271347046, "learning_rate": 2e-05, "loss": 0.05829916, "step": 21610 }, { "epoch": 43.222, "grad_norm": 1.1171116828918457, "learning_rate": 2e-05, "loss": 0.04048864, "step": 21611 }, { "epoch": 43.224, "grad_norm": 1.1655888557434082, "learning_rate": 2e-05, "loss": 0.04986102, "step": 21612 }, { "epoch": 43.226, "grad_norm": 1.0869560241699219, "learning_rate": 2e-05, "loss": 0.03866713, "step": 21613 }, { "epoch": 43.228, "grad_norm": 0.8310866951942444, "learning_rate": 2e-05, "loss": 0.02539867, "step": 21614 }, { "epoch": 43.23, "grad_norm": 1.1562633514404297, "learning_rate": 2e-05, "loss": 0.05863639, "step": 21615 }, { "epoch": 43.232, "grad_norm": 1.4170281887054443, "learning_rate": 2e-05, "loss": 0.05692539, "step": 21616 }, { "epoch": 43.234, "grad_norm": 1.155140995979309, "learning_rate": 2e-05, "loss": 0.03919734, "step": 21617 }, { "epoch": 43.236, "grad_norm": 1.1034564971923828, "learning_rate": 2e-05, "loss": 0.04190123, "step": 21618 }, { "epoch": 43.238, "grad_norm": 1.5870157480239868, "learning_rate": 2e-05, "loss": 0.05102459, "step": 21619 }, { "epoch": 43.24, "grad_norm": 1.3234003782272339, "learning_rate": 2e-05, "loss": 0.04995444, "step": 21620 }, { "epoch": 43.242, "grad_norm": 1.106510043144226, "learning_rate": 2e-05, "loss": 0.046669, "step": 21621 }, { "epoch": 43.244, "grad_norm": 0.9448675513267517, "learning_rate": 2e-05, "loss": 0.02873988, "step": 21622 }, { "epoch": 43.246, "grad_norm": 1.218721628189087, "learning_rate": 2e-05, "loss": 0.04726008, "step": 21623 }, { "epoch": 43.248, "grad_norm": 1.1027477979660034, "learning_rate": 2e-05, "loss": 0.03004866, "step": 21624 }, { "epoch": 43.25, "grad_norm": 0.9498459100723267, "learning_rate": 2e-05, "loss": 0.02968902, "step": 21625 }, { "epoch": 43.252, "grad_norm": 0.9696542024612427, "learning_rate": 2e-05, "loss": 0.03813989, "step": 21626 }, { "epoch": 43.254, "grad_norm": 0.9928730130195618, "learning_rate": 2e-05, "loss": 0.04420563, "step": 21627 }, { "epoch": 43.256, "grad_norm": 0.9979124069213867, "learning_rate": 2e-05, "loss": 0.02616442, "step": 21628 }, { "epoch": 43.258, "grad_norm": 1.0477745532989502, "learning_rate": 2e-05, "loss": 0.04433997, "step": 21629 }, { "epoch": 43.26, "grad_norm": 1.0729262828826904, "learning_rate": 2e-05, "loss": 0.04628151, "step": 21630 }, { "epoch": 43.262, "grad_norm": 1.0048565864562988, "learning_rate": 2e-05, "loss": 0.03667574, "step": 21631 }, { "epoch": 43.264, "grad_norm": 1.2981020212173462, "learning_rate": 2e-05, "loss": 0.04706884, "step": 21632 }, { "epoch": 43.266, "grad_norm": 0.9248092770576477, "learning_rate": 2e-05, "loss": 0.03729512, "step": 21633 }, { "epoch": 43.268, "grad_norm": 1.4935028553009033, "learning_rate": 2e-05, "loss": 0.06463313, "step": 21634 }, { "epoch": 43.27, "grad_norm": 1.230615496635437, "learning_rate": 2e-05, "loss": 0.04967681, "step": 21635 }, { "epoch": 43.272, "grad_norm": 1.043403148651123, "learning_rate": 2e-05, "loss": 0.03828948, "step": 21636 }, { "epoch": 43.274, "grad_norm": 0.8879172205924988, "learning_rate": 2e-05, "loss": 0.03340585, "step": 21637 }, { "epoch": 43.276, "grad_norm": 2.0762460231781006, "learning_rate": 2e-05, "loss": 0.03055327, "step": 21638 }, { "epoch": 43.278, "grad_norm": 1.2311680316925049, "learning_rate": 2e-05, "loss": 0.04112308, "step": 21639 }, { "epoch": 43.28, "grad_norm": 0.9746531248092651, "learning_rate": 2e-05, "loss": 0.0329593, "step": 21640 }, { "epoch": 43.282, "grad_norm": 1.296059250831604, "learning_rate": 2e-05, "loss": 0.04097513, "step": 21641 }, { "epoch": 43.284, "grad_norm": 0.8537666201591492, "learning_rate": 2e-05, "loss": 0.03418196, "step": 21642 }, { "epoch": 43.286, "grad_norm": 1.1524845361709595, "learning_rate": 2e-05, "loss": 0.04692797, "step": 21643 }, { "epoch": 43.288, "grad_norm": 0.8913264870643616, "learning_rate": 2e-05, "loss": 0.02731458, "step": 21644 }, { "epoch": 43.29, "grad_norm": 1.1445558071136475, "learning_rate": 2e-05, "loss": 0.04007136, "step": 21645 }, { "epoch": 43.292, "grad_norm": 1.2915922403335571, "learning_rate": 2e-05, "loss": 0.04177546, "step": 21646 }, { "epoch": 43.294, "grad_norm": 1.1912838220596313, "learning_rate": 2e-05, "loss": 0.04470565, "step": 21647 }, { "epoch": 43.296, "grad_norm": 1.1178830862045288, "learning_rate": 2e-05, "loss": 0.03960251, "step": 21648 }, { "epoch": 43.298, "grad_norm": 1.3779065608978271, "learning_rate": 2e-05, "loss": 0.03376544, "step": 21649 }, { "epoch": 43.3, "grad_norm": 1.2929333448410034, "learning_rate": 2e-05, "loss": 0.05447173, "step": 21650 }, { "epoch": 43.302, "grad_norm": 1.5865455865859985, "learning_rate": 2e-05, "loss": 0.04575078, "step": 21651 }, { "epoch": 43.304, "grad_norm": 2.3212931156158447, "learning_rate": 2e-05, "loss": 0.05826779, "step": 21652 }, { "epoch": 43.306, "grad_norm": 1.0237253904342651, "learning_rate": 2e-05, "loss": 0.03167427, "step": 21653 }, { "epoch": 43.308, "grad_norm": 1.1357048749923706, "learning_rate": 2e-05, "loss": 0.03626086, "step": 21654 }, { "epoch": 43.31, "grad_norm": 1.9722542762756348, "learning_rate": 2e-05, "loss": 0.05172148, "step": 21655 }, { "epoch": 43.312, "grad_norm": 1.1181687116622925, "learning_rate": 2e-05, "loss": 0.03840844, "step": 21656 }, { "epoch": 43.314, "grad_norm": 1.0847325325012207, "learning_rate": 2e-05, "loss": 0.03223992, "step": 21657 }, { "epoch": 43.316, "grad_norm": 1.0498465299606323, "learning_rate": 2e-05, "loss": 0.02764856, "step": 21658 }, { "epoch": 43.318, "grad_norm": 2.2111804485321045, "learning_rate": 2e-05, "loss": 0.06376007, "step": 21659 }, { "epoch": 43.32, "grad_norm": 1.4746514558792114, "learning_rate": 2e-05, "loss": 0.06365299, "step": 21660 }, { "epoch": 43.322, "grad_norm": 1.2095229625701904, "learning_rate": 2e-05, "loss": 0.03778709, "step": 21661 }, { "epoch": 43.324, "grad_norm": 1.045575737953186, "learning_rate": 2e-05, "loss": 0.04611383, "step": 21662 }, { "epoch": 43.326, "grad_norm": 1.139072060585022, "learning_rate": 2e-05, "loss": 0.03993519, "step": 21663 }, { "epoch": 43.328, "grad_norm": 1.2119213342666626, "learning_rate": 2e-05, "loss": 0.04941104, "step": 21664 }, { "epoch": 43.33, "grad_norm": 0.9795631766319275, "learning_rate": 2e-05, "loss": 0.04393193, "step": 21665 }, { "epoch": 43.332, "grad_norm": 1.1787199974060059, "learning_rate": 2e-05, "loss": 0.04019287, "step": 21666 }, { "epoch": 43.334, "grad_norm": 1.283530592918396, "learning_rate": 2e-05, "loss": 0.05504204, "step": 21667 }, { "epoch": 43.336, "grad_norm": 1.1043323278427124, "learning_rate": 2e-05, "loss": 0.0436521, "step": 21668 }, { "epoch": 43.338, "grad_norm": 0.8923149108886719, "learning_rate": 2e-05, "loss": 0.02867976, "step": 21669 }, { "epoch": 43.34, "grad_norm": 1.4097844362258911, "learning_rate": 2e-05, "loss": 0.03659859, "step": 21670 }, { "epoch": 43.342, "grad_norm": 1.314300775527954, "learning_rate": 2e-05, "loss": 0.05667983, "step": 21671 }, { "epoch": 43.344, "grad_norm": 1.087296485900879, "learning_rate": 2e-05, "loss": 0.03836971, "step": 21672 }, { "epoch": 43.346, "grad_norm": 1.3257967233657837, "learning_rate": 2e-05, "loss": 0.04146299, "step": 21673 }, { "epoch": 43.348, "grad_norm": 1.2841472625732422, "learning_rate": 2e-05, "loss": 0.04974665, "step": 21674 }, { "epoch": 43.35, "grad_norm": 0.9989627599716187, "learning_rate": 2e-05, "loss": 0.0318079, "step": 21675 }, { "epoch": 43.352, "grad_norm": 1.2353107929229736, "learning_rate": 2e-05, "loss": 0.03651591, "step": 21676 }, { "epoch": 43.354, "grad_norm": 1.0349856615066528, "learning_rate": 2e-05, "loss": 0.04118809, "step": 21677 }, { "epoch": 43.356, "grad_norm": 0.9092193245887756, "learning_rate": 2e-05, "loss": 0.0327672, "step": 21678 }, { "epoch": 43.358, "grad_norm": 1.047554850578308, "learning_rate": 2e-05, "loss": 0.03438172, "step": 21679 }, { "epoch": 43.36, "grad_norm": 1.1737427711486816, "learning_rate": 2e-05, "loss": 0.04325868, "step": 21680 }, { "epoch": 43.362, "grad_norm": 1.2575112581253052, "learning_rate": 2e-05, "loss": 0.0350428, "step": 21681 }, { "epoch": 43.364, "grad_norm": 1.520871639251709, "learning_rate": 2e-05, "loss": 0.04291663, "step": 21682 }, { "epoch": 43.366, "grad_norm": 5.2581000328063965, "learning_rate": 2e-05, "loss": 0.05253425, "step": 21683 }, { "epoch": 43.368, "grad_norm": 1.2169644832611084, "learning_rate": 2e-05, "loss": 0.04286679, "step": 21684 }, { "epoch": 43.37, "grad_norm": 0.9909608960151672, "learning_rate": 2e-05, "loss": 0.03581897, "step": 21685 }, { "epoch": 43.372, "grad_norm": 1.0688034296035767, "learning_rate": 2e-05, "loss": 0.02948214, "step": 21686 }, { "epoch": 43.374, "grad_norm": 1.1638429164886475, "learning_rate": 2e-05, "loss": 0.05602856, "step": 21687 }, { "epoch": 43.376, "grad_norm": 1.1232125759124756, "learning_rate": 2e-05, "loss": 0.04738471, "step": 21688 }, { "epoch": 43.378, "grad_norm": 1.2474092245101929, "learning_rate": 2e-05, "loss": 0.05545115, "step": 21689 }, { "epoch": 43.38, "grad_norm": 1.4754821062088013, "learning_rate": 2e-05, "loss": 0.05587701, "step": 21690 }, { "epoch": 43.382, "grad_norm": 1.0800909996032715, "learning_rate": 2e-05, "loss": 0.0371982, "step": 21691 }, { "epoch": 43.384, "grad_norm": 1.8042876720428467, "learning_rate": 2e-05, "loss": 0.04256076, "step": 21692 }, { "epoch": 43.386, "grad_norm": 1.065379023551941, "learning_rate": 2e-05, "loss": 0.04420139, "step": 21693 }, { "epoch": 43.388, "grad_norm": 0.9849072098731995, "learning_rate": 2e-05, "loss": 0.02791905, "step": 21694 }, { "epoch": 43.39, "grad_norm": 2.868598461151123, "learning_rate": 2e-05, "loss": 0.04038412, "step": 21695 }, { "epoch": 43.392, "grad_norm": 1.1565625667572021, "learning_rate": 2e-05, "loss": 0.05063523, "step": 21696 }, { "epoch": 43.394, "grad_norm": 1.2647764682769775, "learning_rate": 2e-05, "loss": 0.05144732, "step": 21697 }, { "epoch": 43.396, "grad_norm": 1.4268211126327515, "learning_rate": 2e-05, "loss": 0.04333577, "step": 21698 }, { "epoch": 43.398, "grad_norm": 1.2358442544937134, "learning_rate": 2e-05, "loss": 0.05046186, "step": 21699 }, { "epoch": 43.4, "grad_norm": 1.4524154663085938, "learning_rate": 2e-05, "loss": 0.0610268, "step": 21700 }, { "epoch": 43.402, "grad_norm": 1.1592899560928345, "learning_rate": 2e-05, "loss": 0.04402205, "step": 21701 }, { "epoch": 43.404, "grad_norm": 1.3561992645263672, "learning_rate": 2e-05, "loss": 0.05414577, "step": 21702 }, { "epoch": 43.406, "grad_norm": 2.727219581604004, "learning_rate": 2e-05, "loss": 0.05137745, "step": 21703 }, { "epoch": 43.408, "grad_norm": 1.8588035106658936, "learning_rate": 2e-05, "loss": 0.05819751, "step": 21704 }, { "epoch": 43.41, "grad_norm": 1.1406104564666748, "learning_rate": 2e-05, "loss": 0.05511078, "step": 21705 }, { "epoch": 43.412, "grad_norm": 1.5206453800201416, "learning_rate": 2e-05, "loss": 0.05442363, "step": 21706 }, { "epoch": 43.414, "grad_norm": 1.544252634048462, "learning_rate": 2e-05, "loss": 0.04780327, "step": 21707 }, { "epoch": 43.416, "grad_norm": 1.260754942893982, "learning_rate": 2e-05, "loss": 0.04627555, "step": 21708 }, { "epoch": 43.418, "grad_norm": 0.8099609017372131, "learning_rate": 2e-05, "loss": 0.02288938, "step": 21709 }, { "epoch": 43.42, "grad_norm": 1.08610999584198, "learning_rate": 2e-05, "loss": 0.04557304, "step": 21710 }, { "epoch": 43.422, "grad_norm": 1.3060609102249146, "learning_rate": 2e-05, "loss": 0.03743387, "step": 21711 }, { "epoch": 43.424, "grad_norm": 1.1344839334487915, "learning_rate": 2e-05, "loss": 0.03720674, "step": 21712 }, { "epoch": 43.426, "grad_norm": 1.2623273134231567, "learning_rate": 2e-05, "loss": 0.03892479, "step": 21713 }, { "epoch": 43.428, "grad_norm": 0.7811478972434998, "learning_rate": 2e-05, "loss": 0.02731532, "step": 21714 }, { "epoch": 43.43, "grad_norm": 1.2784897089004517, "learning_rate": 2e-05, "loss": 0.03522071, "step": 21715 }, { "epoch": 43.432, "grad_norm": 1.1297240257263184, "learning_rate": 2e-05, "loss": 0.04025554, "step": 21716 }, { "epoch": 43.434, "grad_norm": 1.4176253080368042, "learning_rate": 2e-05, "loss": 0.04013061, "step": 21717 }, { "epoch": 43.436, "grad_norm": 1.2820394039154053, "learning_rate": 2e-05, "loss": 0.05283058, "step": 21718 }, { "epoch": 43.438, "grad_norm": 0.8809661865234375, "learning_rate": 2e-05, "loss": 0.03102821, "step": 21719 }, { "epoch": 43.44, "grad_norm": 1.249072790145874, "learning_rate": 2e-05, "loss": 0.04824986, "step": 21720 }, { "epoch": 43.442, "grad_norm": 1.2164461612701416, "learning_rate": 2e-05, "loss": 0.0482796, "step": 21721 }, { "epoch": 43.444, "grad_norm": 1.1401503086090088, "learning_rate": 2e-05, "loss": 0.04604597, "step": 21722 }, { "epoch": 43.446, "grad_norm": 0.9395509362220764, "learning_rate": 2e-05, "loss": 0.03536365, "step": 21723 }, { "epoch": 43.448, "grad_norm": 0.957256019115448, "learning_rate": 2e-05, "loss": 0.03761604, "step": 21724 }, { "epoch": 43.45, "grad_norm": 1.6451482772827148, "learning_rate": 2e-05, "loss": 0.04568552, "step": 21725 }, { "epoch": 43.452, "grad_norm": 1.1945124864578247, "learning_rate": 2e-05, "loss": 0.04561944, "step": 21726 }, { "epoch": 43.454, "grad_norm": 1.290170669555664, "learning_rate": 2e-05, "loss": 0.04204362, "step": 21727 }, { "epoch": 43.456, "grad_norm": 1.5128979682922363, "learning_rate": 2e-05, "loss": 0.04468996, "step": 21728 }, { "epoch": 43.458, "grad_norm": 1.0816080570220947, "learning_rate": 2e-05, "loss": 0.03981309, "step": 21729 }, { "epoch": 43.46, "grad_norm": 0.9649284482002258, "learning_rate": 2e-05, "loss": 0.04065269, "step": 21730 }, { "epoch": 43.462, "grad_norm": 1.2103779315948486, "learning_rate": 2e-05, "loss": 0.04154168, "step": 21731 }, { "epoch": 43.464, "grad_norm": 1.2388038635253906, "learning_rate": 2e-05, "loss": 0.04697184, "step": 21732 }, { "epoch": 43.466, "grad_norm": 0.9526742100715637, "learning_rate": 2e-05, "loss": 0.03257825, "step": 21733 }, { "epoch": 43.468, "grad_norm": 2.134641408920288, "learning_rate": 2e-05, "loss": 0.04552689, "step": 21734 }, { "epoch": 43.47, "grad_norm": 1.0463626384735107, "learning_rate": 2e-05, "loss": 0.03647093, "step": 21735 }, { "epoch": 43.472, "grad_norm": 0.8900033235549927, "learning_rate": 2e-05, "loss": 0.03290387, "step": 21736 }, { "epoch": 43.474, "grad_norm": 1.6683579683303833, "learning_rate": 2e-05, "loss": 0.02495382, "step": 21737 }, { "epoch": 43.476, "grad_norm": 0.9944674968719482, "learning_rate": 2e-05, "loss": 0.04280144, "step": 21738 }, { "epoch": 43.478, "grad_norm": 1.255933403968811, "learning_rate": 2e-05, "loss": 0.05521154, "step": 21739 }, { "epoch": 43.48, "grad_norm": 3.067064046859741, "learning_rate": 2e-05, "loss": 0.05089381, "step": 21740 }, { "epoch": 43.482, "grad_norm": 1.3610708713531494, "learning_rate": 2e-05, "loss": 0.04178032, "step": 21741 }, { "epoch": 43.484, "grad_norm": 1.2123537063598633, "learning_rate": 2e-05, "loss": 0.03646653, "step": 21742 }, { "epoch": 43.486, "grad_norm": 1.1001256704330444, "learning_rate": 2e-05, "loss": 0.04126867, "step": 21743 }, { "epoch": 43.488, "grad_norm": 0.9890459775924683, "learning_rate": 2e-05, "loss": 0.03477874, "step": 21744 }, { "epoch": 43.49, "grad_norm": 1.383278489112854, "learning_rate": 2e-05, "loss": 0.05052227, "step": 21745 }, { "epoch": 43.492, "grad_norm": 1.0816317796707153, "learning_rate": 2e-05, "loss": 0.03919977, "step": 21746 }, { "epoch": 43.494, "grad_norm": 0.9450200200080872, "learning_rate": 2e-05, "loss": 0.04087563, "step": 21747 }, { "epoch": 43.496, "grad_norm": 1.3437703847885132, "learning_rate": 2e-05, "loss": 0.04949685, "step": 21748 }, { "epoch": 43.498, "grad_norm": 1.868466854095459, "learning_rate": 2e-05, "loss": 0.03405698, "step": 21749 }, { "epoch": 43.5, "grad_norm": 1.320025086402893, "learning_rate": 2e-05, "loss": 0.05828035, "step": 21750 }, { "epoch": 43.502, "grad_norm": 1.7791790962219238, "learning_rate": 2e-05, "loss": 0.06646048, "step": 21751 }, { "epoch": 43.504, "grad_norm": 1.7059146165847778, "learning_rate": 2e-05, "loss": 0.04614983, "step": 21752 }, { "epoch": 43.506, "grad_norm": 1.1781865358352661, "learning_rate": 2e-05, "loss": 0.04958811, "step": 21753 }, { "epoch": 43.508, "grad_norm": 0.8593549132347107, "learning_rate": 2e-05, "loss": 0.02477839, "step": 21754 }, { "epoch": 43.51, "grad_norm": 0.7692524790763855, "learning_rate": 2e-05, "loss": 0.03094523, "step": 21755 }, { "epoch": 43.512, "grad_norm": 1.12357759475708, "learning_rate": 2e-05, "loss": 0.05283519, "step": 21756 }, { "epoch": 43.514, "grad_norm": 1.3813457489013672, "learning_rate": 2e-05, "loss": 0.05348702, "step": 21757 }, { "epoch": 43.516, "grad_norm": 1.02193021774292, "learning_rate": 2e-05, "loss": 0.04253665, "step": 21758 }, { "epoch": 43.518, "grad_norm": 1.4434438943862915, "learning_rate": 2e-05, "loss": 0.04910342, "step": 21759 }, { "epoch": 43.52, "grad_norm": 0.9858810901641846, "learning_rate": 2e-05, "loss": 0.03432997, "step": 21760 }, { "epoch": 43.522, "grad_norm": 0.8454307913780212, "learning_rate": 2e-05, "loss": 0.02746351, "step": 21761 }, { "epoch": 43.524, "grad_norm": 1.1626834869384766, "learning_rate": 2e-05, "loss": 0.04616697, "step": 21762 }, { "epoch": 43.526, "grad_norm": 1.447304368019104, "learning_rate": 2e-05, "loss": 0.05058809, "step": 21763 }, { "epoch": 43.528, "grad_norm": 1.172178864479065, "learning_rate": 2e-05, "loss": 0.04366866, "step": 21764 }, { "epoch": 43.53, "grad_norm": 0.9994648694992065, "learning_rate": 2e-05, "loss": 0.04859383, "step": 21765 }, { "epoch": 43.532, "grad_norm": 1.1556830406188965, "learning_rate": 2e-05, "loss": 0.05048081, "step": 21766 }, { "epoch": 43.534, "grad_norm": 1.2238110303878784, "learning_rate": 2e-05, "loss": 0.05554388, "step": 21767 }, { "epoch": 43.536, "grad_norm": 1.6330780982971191, "learning_rate": 2e-05, "loss": 0.04079942, "step": 21768 }, { "epoch": 43.538, "grad_norm": 0.9793479442596436, "learning_rate": 2e-05, "loss": 0.03947442, "step": 21769 }, { "epoch": 43.54, "grad_norm": 0.9716880917549133, "learning_rate": 2e-05, "loss": 0.0353773, "step": 21770 }, { "epoch": 43.542, "grad_norm": 1.2932239770889282, "learning_rate": 2e-05, "loss": 0.04241467, "step": 21771 }, { "epoch": 43.544, "grad_norm": 0.9985625743865967, "learning_rate": 2e-05, "loss": 0.03441525, "step": 21772 }, { "epoch": 43.546, "grad_norm": 1.2000640630722046, "learning_rate": 2e-05, "loss": 0.05951509, "step": 21773 }, { "epoch": 43.548, "grad_norm": 1.219389796257019, "learning_rate": 2e-05, "loss": 0.05212791, "step": 21774 }, { "epoch": 43.55, "grad_norm": 1.000905156135559, "learning_rate": 2e-05, "loss": 0.0420983, "step": 21775 }, { "epoch": 43.552, "grad_norm": 1.0114585161209106, "learning_rate": 2e-05, "loss": 0.03611118, "step": 21776 }, { "epoch": 43.554, "grad_norm": 1.2513197660446167, "learning_rate": 2e-05, "loss": 0.04526487, "step": 21777 }, { "epoch": 43.556, "grad_norm": 1.7343202829360962, "learning_rate": 2e-05, "loss": 0.05031293, "step": 21778 }, { "epoch": 43.558, "grad_norm": 0.8882659077644348, "learning_rate": 2e-05, "loss": 0.03567799, "step": 21779 }, { "epoch": 43.56, "grad_norm": 1.1112327575683594, "learning_rate": 2e-05, "loss": 0.04831495, "step": 21780 }, { "epoch": 43.562, "grad_norm": 1.2914454936981201, "learning_rate": 2e-05, "loss": 0.04268849, "step": 21781 }, { "epoch": 43.564, "grad_norm": 1.4719511270523071, "learning_rate": 2e-05, "loss": 0.05118477, "step": 21782 }, { "epoch": 43.566, "grad_norm": 0.9859159588813782, "learning_rate": 2e-05, "loss": 0.04724458, "step": 21783 }, { "epoch": 43.568, "grad_norm": 0.9612816572189331, "learning_rate": 2e-05, "loss": 0.0423895, "step": 21784 }, { "epoch": 43.57, "grad_norm": 1.1279462575912476, "learning_rate": 2e-05, "loss": 0.03721898, "step": 21785 }, { "epoch": 43.572, "grad_norm": 1.3423670530319214, "learning_rate": 2e-05, "loss": 0.05836347, "step": 21786 }, { "epoch": 43.574, "grad_norm": 0.9472774267196655, "learning_rate": 2e-05, "loss": 0.03530859, "step": 21787 }, { "epoch": 43.576, "grad_norm": 0.9310116171836853, "learning_rate": 2e-05, "loss": 0.03498428, "step": 21788 }, { "epoch": 43.578, "grad_norm": 1.9967092275619507, "learning_rate": 2e-05, "loss": 0.04481179, "step": 21789 }, { "epoch": 43.58, "grad_norm": 0.9212583899497986, "learning_rate": 2e-05, "loss": 0.03702111, "step": 21790 }, { "epoch": 43.582, "grad_norm": 1.2178590297698975, "learning_rate": 2e-05, "loss": 0.05494902, "step": 21791 }, { "epoch": 43.584, "grad_norm": 1.0112686157226562, "learning_rate": 2e-05, "loss": 0.03656603, "step": 21792 }, { "epoch": 43.586, "grad_norm": 1.9768187999725342, "learning_rate": 2e-05, "loss": 0.05040358, "step": 21793 }, { "epoch": 43.588, "grad_norm": 1.3178633451461792, "learning_rate": 2e-05, "loss": 0.05968247, "step": 21794 }, { "epoch": 43.59, "grad_norm": 0.8559836149215698, "learning_rate": 2e-05, "loss": 0.02916397, "step": 21795 }, { "epoch": 43.592, "grad_norm": 1.2959657907485962, "learning_rate": 2e-05, "loss": 0.05529212, "step": 21796 }, { "epoch": 43.594, "grad_norm": 1.4697439670562744, "learning_rate": 2e-05, "loss": 0.04537124, "step": 21797 }, { "epoch": 43.596, "grad_norm": 1.1121951341629028, "learning_rate": 2e-05, "loss": 0.04535225, "step": 21798 }, { "epoch": 43.598, "grad_norm": 1.3502434492111206, "learning_rate": 2e-05, "loss": 0.05075352, "step": 21799 }, { "epoch": 43.6, "grad_norm": 1.2131574153900146, "learning_rate": 2e-05, "loss": 0.04932404, "step": 21800 }, { "epoch": 43.602, "grad_norm": 1.6013059616088867, "learning_rate": 2e-05, "loss": 0.05610324, "step": 21801 }, { "epoch": 43.604, "grad_norm": 1.1620149612426758, "learning_rate": 2e-05, "loss": 0.05905971, "step": 21802 }, { "epoch": 43.606, "grad_norm": 1.7699416875839233, "learning_rate": 2e-05, "loss": 0.03485197, "step": 21803 }, { "epoch": 43.608, "grad_norm": 1.2182286977767944, "learning_rate": 2e-05, "loss": 0.03750105, "step": 21804 }, { "epoch": 43.61, "grad_norm": 0.8183603882789612, "learning_rate": 2e-05, "loss": 0.03506579, "step": 21805 }, { "epoch": 43.612, "grad_norm": 1.1043846607208252, "learning_rate": 2e-05, "loss": 0.04602025, "step": 21806 }, { "epoch": 43.614, "grad_norm": 1.2613682746887207, "learning_rate": 2e-05, "loss": 0.04944381, "step": 21807 }, { "epoch": 43.616, "grad_norm": 0.8468272089958191, "learning_rate": 2e-05, "loss": 0.03407397, "step": 21808 }, { "epoch": 43.618, "grad_norm": 0.8806912899017334, "learning_rate": 2e-05, "loss": 0.03383248, "step": 21809 }, { "epoch": 43.62, "grad_norm": 1.175614356994629, "learning_rate": 2e-05, "loss": 0.04646242, "step": 21810 }, { "epoch": 43.622, "grad_norm": 1.1787428855895996, "learning_rate": 2e-05, "loss": 0.02900134, "step": 21811 }, { "epoch": 43.624, "grad_norm": 1.4014700651168823, "learning_rate": 2e-05, "loss": 0.05839483, "step": 21812 }, { "epoch": 43.626, "grad_norm": 1.2850819826126099, "learning_rate": 2e-05, "loss": 0.03855247, "step": 21813 }, { "epoch": 43.628, "grad_norm": 1.497589111328125, "learning_rate": 2e-05, "loss": 0.03659302, "step": 21814 }, { "epoch": 43.63, "grad_norm": 0.9908480048179626, "learning_rate": 2e-05, "loss": 0.02935298, "step": 21815 }, { "epoch": 43.632, "grad_norm": 1.044250726699829, "learning_rate": 2e-05, "loss": 0.04597304, "step": 21816 }, { "epoch": 43.634, "grad_norm": 1.2930546998977661, "learning_rate": 2e-05, "loss": 0.0563747, "step": 21817 }, { "epoch": 43.636, "grad_norm": 0.8633590936660767, "learning_rate": 2e-05, "loss": 0.03106845, "step": 21818 }, { "epoch": 43.638, "grad_norm": 1.0918747186660767, "learning_rate": 2e-05, "loss": 0.05369578, "step": 21819 }, { "epoch": 43.64, "grad_norm": 0.7989838123321533, "learning_rate": 2e-05, "loss": 0.02378467, "step": 21820 }, { "epoch": 43.642, "grad_norm": 0.9942901730537415, "learning_rate": 2e-05, "loss": 0.03918402, "step": 21821 }, { "epoch": 43.644, "grad_norm": 1.0188764333724976, "learning_rate": 2e-05, "loss": 0.05564085, "step": 21822 }, { "epoch": 43.646, "grad_norm": 0.7753763794898987, "learning_rate": 2e-05, "loss": 0.02549123, "step": 21823 }, { "epoch": 43.648, "grad_norm": 1.9414104223251343, "learning_rate": 2e-05, "loss": 0.05547203, "step": 21824 }, { "epoch": 43.65, "grad_norm": 2.122591495513916, "learning_rate": 2e-05, "loss": 0.04889602, "step": 21825 }, { "epoch": 43.652, "grad_norm": 1.2687931060791016, "learning_rate": 2e-05, "loss": 0.04016557, "step": 21826 }, { "epoch": 43.654, "grad_norm": 0.8885032534599304, "learning_rate": 2e-05, "loss": 0.03642484, "step": 21827 }, { "epoch": 43.656, "grad_norm": 0.9670476317405701, "learning_rate": 2e-05, "loss": 0.02972692, "step": 21828 }, { "epoch": 43.658, "grad_norm": 1.1958461999893188, "learning_rate": 2e-05, "loss": 0.05227534, "step": 21829 }, { "epoch": 43.66, "grad_norm": 1.4721581935882568, "learning_rate": 2e-05, "loss": 0.04954873, "step": 21830 }, { "epoch": 43.662, "grad_norm": 1.1529446840286255, "learning_rate": 2e-05, "loss": 0.03985273, "step": 21831 }, { "epoch": 43.664, "grad_norm": 1.7837733030319214, "learning_rate": 2e-05, "loss": 0.05352894, "step": 21832 }, { "epoch": 43.666, "grad_norm": 1.1084831953048706, "learning_rate": 2e-05, "loss": 0.05037396, "step": 21833 }, { "epoch": 43.668, "grad_norm": 1.1543959379196167, "learning_rate": 2e-05, "loss": 0.05929647, "step": 21834 }, { "epoch": 43.67, "grad_norm": 1.0004663467407227, "learning_rate": 2e-05, "loss": 0.04874853, "step": 21835 }, { "epoch": 43.672, "grad_norm": 1.0375447273254395, "learning_rate": 2e-05, "loss": 0.03537313, "step": 21836 }, { "epoch": 43.674, "grad_norm": 1.0306533575057983, "learning_rate": 2e-05, "loss": 0.04168561, "step": 21837 }, { "epoch": 43.676, "grad_norm": 1.5788947343826294, "learning_rate": 2e-05, "loss": 0.05093706, "step": 21838 }, { "epoch": 43.678, "grad_norm": 1.1517823934555054, "learning_rate": 2e-05, "loss": 0.05039099, "step": 21839 }, { "epoch": 43.68, "grad_norm": 1.4033719301223755, "learning_rate": 2e-05, "loss": 0.05700206, "step": 21840 }, { "epoch": 43.682, "grad_norm": 1.3291651010513306, "learning_rate": 2e-05, "loss": 0.05513757, "step": 21841 }, { "epoch": 43.684, "grad_norm": 1.9928884506225586, "learning_rate": 2e-05, "loss": 0.06471011, "step": 21842 }, { "epoch": 43.686, "grad_norm": 6.044139862060547, "learning_rate": 2e-05, "loss": 0.05448518, "step": 21843 }, { "epoch": 43.688, "grad_norm": 1.0079416036605835, "learning_rate": 2e-05, "loss": 0.03121562, "step": 21844 }, { "epoch": 43.69, "grad_norm": 1.1377640962600708, "learning_rate": 2e-05, "loss": 0.04985367, "step": 21845 }, { "epoch": 43.692, "grad_norm": 0.7043952345848083, "learning_rate": 2e-05, "loss": 0.02650774, "step": 21846 }, { "epoch": 43.694, "grad_norm": 1.122690200805664, "learning_rate": 2e-05, "loss": 0.03267936, "step": 21847 }, { "epoch": 43.696, "grad_norm": 1.1491355895996094, "learning_rate": 2e-05, "loss": 0.0425655, "step": 21848 }, { "epoch": 43.698, "grad_norm": 1.2790942192077637, "learning_rate": 2e-05, "loss": 0.05990753, "step": 21849 }, { "epoch": 43.7, "grad_norm": 1.1796038150787354, "learning_rate": 2e-05, "loss": 0.04956497, "step": 21850 }, { "epoch": 43.702, "grad_norm": 0.9713885188102722, "learning_rate": 2e-05, "loss": 0.04288552, "step": 21851 }, { "epoch": 43.704, "grad_norm": 1.2551149129867554, "learning_rate": 2e-05, "loss": 0.04439992, "step": 21852 }, { "epoch": 43.706, "grad_norm": 1.2082798480987549, "learning_rate": 2e-05, "loss": 0.0496681, "step": 21853 }, { "epoch": 43.708, "grad_norm": 1.3678545951843262, "learning_rate": 2e-05, "loss": 0.05777189, "step": 21854 }, { "epoch": 43.71, "grad_norm": 0.9978817105293274, "learning_rate": 2e-05, "loss": 0.04096838, "step": 21855 }, { "epoch": 43.712, "grad_norm": 1.043445348739624, "learning_rate": 2e-05, "loss": 0.0444257, "step": 21856 }, { "epoch": 43.714, "grad_norm": 0.9846399426460266, "learning_rate": 2e-05, "loss": 0.04583818, "step": 21857 }, { "epoch": 43.716, "grad_norm": 1.1316450834274292, "learning_rate": 2e-05, "loss": 0.04214125, "step": 21858 }, { "epoch": 43.718, "grad_norm": 1.3527915477752686, "learning_rate": 2e-05, "loss": 0.04996262, "step": 21859 }, { "epoch": 43.72, "grad_norm": 1.1177865266799927, "learning_rate": 2e-05, "loss": 0.0493018, "step": 21860 }, { "epoch": 43.722, "grad_norm": 1.5726734399795532, "learning_rate": 2e-05, "loss": 0.04863479, "step": 21861 }, { "epoch": 43.724, "grad_norm": 1.10280442237854, "learning_rate": 2e-05, "loss": 0.04966446, "step": 21862 }, { "epoch": 43.726, "grad_norm": 0.9596936702728271, "learning_rate": 2e-05, "loss": 0.02930747, "step": 21863 }, { "epoch": 43.728, "grad_norm": 1.0286372900009155, "learning_rate": 2e-05, "loss": 0.03814653, "step": 21864 }, { "epoch": 43.73, "grad_norm": 0.8003188371658325, "learning_rate": 2e-05, "loss": 0.02703705, "step": 21865 }, { "epoch": 43.732, "grad_norm": 1.323358178138733, "learning_rate": 2e-05, "loss": 0.06609681, "step": 21866 }, { "epoch": 43.734, "grad_norm": 1.1149260997772217, "learning_rate": 2e-05, "loss": 0.05055058, "step": 21867 }, { "epoch": 43.736, "grad_norm": 0.9976716041564941, "learning_rate": 2e-05, "loss": 0.03639464, "step": 21868 }, { "epoch": 43.738, "grad_norm": 1.2667244672775269, "learning_rate": 2e-05, "loss": 0.03071183, "step": 21869 }, { "epoch": 43.74, "grad_norm": 1.075811743736267, "learning_rate": 2e-05, "loss": 0.0523684, "step": 21870 }, { "epoch": 43.742, "grad_norm": 0.9408625364303589, "learning_rate": 2e-05, "loss": 0.04181786, "step": 21871 }, { "epoch": 43.744, "grad_norm": 1.0043747425079346, "learning_rate": 2e-05, "loss": 0.0355611, "step": 21872 }, { "epoch": 43.746, "grad_norm": 0.8746135234832764, "learning_rate": 2e-05, "loss": 0.0288865, "step": 21873 }, { "epoch": 43.748, "grad_norm": 1.1743898391723633, "learning_rate": 2e-05, "loss": 0.03267045, "step": 21874 }, { "epoch": 43.75, "grad_norm": 1.1777573823928833, "learning_rate": 2e-05, "loss": 0.05591404, "step": 21875 }, { "epoch": 43.752, "grad_norm": 1.0189874172210693, "learning_rate": 2e-05, "loss": 0.0265403, "step": 21876 }, { "epoch": 43.754, "grad_norm": 1.3099825382232666, "learning_rate": 2e-05, "loss": 0.04900996, "step": 21877 }, { "epoch": 43.756, "grad_norm": 1.1253808736801147, "learning_rate": 2e-05, "loss": 0.02591961, "step": 21878 }, { "epoch": 43.758, "grad_norm": 0.9855195879936218, "learning_rate": 2e-05, "loss": 0.03809185, "step": 21879 }, { "epoch": 43.76, "grad_norm": 0.9463460445404053, "learning_rate": 2e-05, "loss": 0.041981, "step": 21880 }, { "epoch": 43.762, "grad_norm": 0.814176082611084, "learning_rate": 2e-05, "loss": 0.02800581, "step": 21881 }, { "epoch": 43.764, "grad_norm": 1.8174176216125488, "learning_rate": 2e-05, "loss": 0.0433367, "step": 21882 }, { "epoch": 43.766, "grad_norm": 1.04941987991333, "learning_rate": 2e-05, "loss": 0.04682401, "step": 21883 }, { "epoch": 43.768, "grad_norm": 1.3590197563171387, "learning_rate": 2e-05, "loss": 0.04837655, "step": 21884 }, { "epoch": 43.77, "grad_norm": 0.9965466856956482, "learning_rate": 2e-05, "loss": 0.0414326, "step": 21885 }, { "epoch": 43.772, "grad_norm": 1.0282517671585083, "learning_rate": 2e-05, "loss": 0.0390597, "step": 21886 }, { "epoch": 43.774, "grad_norm": 3.162207841873169, "learning_rate": 2e-05, "loss": 0.0335244, "step": 21887 }, { "epoch": 43.776, "grad_norm": 0.9750503301620483, "learning_rate": 2e-05, "loss": 0.04198159, "step": 21888 }, { "epoch": 43.778, "grad_norm": 1.5152755975723267, "learning_rate": 2e-05, "loss": 0.06023128, "step": 21889 }, { "epoch": 43.78, "grad_norm": 0.9795051217079163, "learning_rate": 2e-05, "loss": 0.03448754, "step": 21890 }, { "epoch": 43.782, "grad_norm": 1.0254871845245361, "learning_rate": 2e-05, "loss": 0.0442871, "step": 21891 }, { "epoch": 43.784, "grad_norm": 1.128727674484253, "learning_rate": 2e-05, "loss": 0.04783348, "step": 21892 }, { "epoch": 43.786, "grad_norm": 1.2998372316360474, "learning_rate": 2e-05, "loss": 0.06326817, "step": 21893 }, { "epoch": 43.788, "grad_norm": 1.0472674369812012, "learning_rate": 2e-05, "loss": 0.04186059, "step": 21894 }, { "epoch": 43.79, "grad_norm": 1.0783990621566772, "learning_rate": 2e-05, "loss": 0.04566407, "step": 21895 }, { "epoch": 43.792, "grad_norm": 1.3083603382110596, "learning_rate": 2e-05, "loss": 0.04836886, "step": 21896 }, { "epoch": 43.794, "grad_norm": 1.1208419799804688, "learning_rate": 2e-05, "loss": 0.03880937, "step": 21897 }, { "epoch": 43.796, "grad_norm": 1.6302498579025269, "learning_rate": 2e-05, "loss": 0.05205537, "step": 21898 }, { "epoch": 43.798, "grad_norm": 1.0959464311599731, "learning_rate": 2e-05, "loss": 0.04417226, "step": 21899 }, { "epoch": 43.8, "grad_norm": 1.136833667755127, "learning_rate": 2e-05, "loss": 0.04366302, "step": 21900 }, { "epoch": 43.802, "grad_norm": 1.130436658859253, "learning_rate": 2e-05, "loss": 0.03622742, "step": 21901 }, { "epoch": 43.804, "grad_norm": 0.8595482110977173, "learning_rate": 2e-05, "loss": 0.03123369, "step": 21902 }, { "epoch": 43.806, "grad_norm": 1.0122137069702148, "learning_rate": 2e-05, "loss": 0.04580044, "step": 21903 }, { "epoch": 43.808, "grad_norm": 1.2473393678665161, "learning_rate": 2e-05, "loss": 0.0287952, "step": 21904 }, { "epoch": 43.81, "grad_norm": 1.1998612880706787, "learning_rate": 2e-05, "loss": 0.05742463, "step": 21905 }, { "epoch": 43.812, "grad_norm": 1.2040235996246338, "learning_rate": 2e-05, "loss": 0.04683301, "step": 21906 }, { "epoch": 43.814, "grad_norm": 3.085301637649536, "learning_rate": 2e-05, "loss": 0.0570135, "step": 21907 }, { "epoch": 43.816, "grad_norm": 0.9276028275489807, "learning_rate": 2e-05, "loss": 0.03538507, "step": 21908 }, { "epoch": 43.818, "grad_norm": 0.9261891841888428, "learning_rate": 2e-05, "loss": 0.03012399, "step": 21909 }, { "epoch": 43.82, "grad_norm": 1.0234099626541138, "learning_rate": 2e-05, "loss": 0.0496249, "step": 21910 }, { "epoch": 43.822, "grad_norm": 0.9667696356773376, "learning_rate": 2e-05, "loss": 0.03840217, "step": 21911 }, { "epoch": 43.824, "grad_norm": 1.103417992591858, "learning_rate": 2e-05, "loss": 0.04336877, "step": 21912 }, { "epoch": 43.826, "grad_norm": 1.0506141185760498, "learning_rate": 2e-05, "loss": 0.05306894, "step": 21913 }, { "epoch": 43.828, "grad_norm": 1.17722487449646, "learning_rate": 2e-05, "loss": 0.04201441, "step": 21914 }, { "epoch": 43.83, "grad_norm": 0.8532827496528625, "learning_rate": 2e-05, "loss": 0.02806678, "step": 21915 }, { "epoch": 43.832, "grad_norm": 1.0747712850570679, "learning_rate": 2e-05, "loss": 0.04104839, "step": 21916 }, { "epoch": 43.834, "grad_norm": 8.774784088134766, "learning_rate": 2e-05, "loss": 0.06282155, "step": 21917 }, { "epoch": 43.836, "grad_norm": 1.4782769680023193, "learning_rate": 2e-05, "loss": 0.03974598, "step": 21918 }, { "epoch": 43.838, "grad_norm": 1.1014549732208252, "learning_rate": 2e-05, "loss": 0.05010407, "step": 21919 }, { "epoch": 43.84, "grad_norm": 1.011607050895691, "learning_rate": 2e-05, "loss": 0.04163487, "step": 21920 }, { "epoch": 43.842, "grad_norm": 1.0383747816085815, "learning_rate": 2e-05, "loss": 0.04069871, "step": 21921 }, { "epoch": 43.844, "grad_norm": 1.1754111051559448, "learning_rate": 2e-05, "loss": 0.03793639, "step": 21922 }, { "epoch": 43.846, "grad_norm": 1.1690202951431274, "learning_rate": 2e-05, "loss": 0.04568651, "step": 21923 }, { "epoch": 43.848, "grad_norm": 1.1252903938293457, "learning_rate": 2e-05, "loss": 0.0298267, "step": 21924 }, { "epoch": 43.85, "grad_norm": 1.5545969009399414, "learning_rate": 2e-05, "loss": 0.0422214, "step": 21925 }, { "epoch": 43.852, "grad_norm": 1.0175963640213013, "learning_rate": 2e-05, "loss": 0.03590497, "step": 21926 }, { "epoch": 43.854, "grad_norm": 1.134313941001892, "learning_rate": 2e-05, "loss": 0.03775061, "step": 21927 }, { "epoch": 43.856, "grad_norm": 0.9652278423309326, "learning_rate": 2e-05, "loss": 0.03018519, "step": 21928 }, { "epoch": 43.858, "grad_norm": 0.9862887263298035, "learning_rate": 2e-05, "loss": 0.03899218, "step": 21929 }, { "epoch": 43.86, "grad_norm": 0.9911880493164062, "learning_rate": 2e-05, "loss": 0.02817927, "step": 21930 }, { "epoch": 43.862, "grad_norm": 1.1832093000411987, "learning_rate": 2e-05, "loss": 0.05190746, "step": 21931 }, { "epoch": 43.864, "grad_norm": 1.5983741283416748, "learning_rate": 2e-05, "loss": 0.05900955, "step": 21932 }, { "epoch": 43.866, "grad_norm": 0.9929104447364807, "learning_rate": 2e-05, "loss": 0.03811336, "step": 21933 }, { "epoch": 43.868, "grad_norm": 0.9780628681182861, "learning_rate": 2e-05, "loss": 0.03309367, "step": 21934 }, { "epoch": 43.87, "grad_norm": 1.0818418264389038, "learning_rate": 2e-05, "loss": 0.04214574, "step": 21935 }, { "epoch": 43.872, "grad_norm": 1.4343541860580444, "learning_rate": 2e-05, "loss": 0.04132379, "step": 21936 }, { "epoch": 43.874, "grad_norm": 0.7985007762908936, "learning_rate": 2e-05, "loss": 0.02514416, "step": 21937 }, { "epoch": 43.876, "grad_norm": 1.3363711833953857, "learning_rate": 2e-05, "loss": 0.0519973, "step": 21938 }, { "epoch": 43.878, "grad_norm": 1.0139611959457397, "learning_rate": 2e-05, "loss": 0.04034914, "step": 21939 }, { "epoch": 43.88, "grad_norm": 2.0334503650665283, "learning_rate": 2e-05, "loss": 0.05931049, "step": 21940 }, { "epoch": 43.882, "grad_norm": 0.9279800653457642, "learning_rate": 2e-05, "loss": 0.02854843, "step": 21941 }, { "epoch": 43.884, "grad_norm": 1.0682547092437744, "learning_rate": 2e-05, "loss": 0.04188439, "step": 21942 }, { "epoch": 43.886, "grad_norm": 0.9129590392112732, "learning_rate": 2e-05, "loss": 0.03035143, "step": 21943 }, { "epoch": 43.888, "grad_norm": 0.9826987385749817, "learning_rate": 2e-05, "loss": 0.02919892, "step": 21944 }, { "epoch": 43.89, "grad_norm": 0.90988689661026, "learning_rate": 2e-05, "loss": 0.03081316, "step": 21945 }, { "epoch": 43.892, "grad_norm": 1.0734930038452148, "learning_rate": 2e-05, "loss": 0.0445548, "step": 21946 }, { "epoch": 43.894, "grad_norm": 0.793425440788269, "learning_rate": 2e-05, "loss": 0.01941402, "step": 21947 }, { "epoch": 43.896, "grad_norm": 1.1897674798965454, "learning_rate": 2e-05, "loss": 0.04540803, "step": 21948 }, { "epoch": 43.898, "grad_norm": 0.9153079986572266, "learning_rate": 2e-05, "loss": 0.03546961, "step": 21949 }, { "epoch": 43.9, "grad_norm": 1.478383183479309, "learning_rate": 2e-05, "loss": 0.03460238, "step": 21950 }, { "epoch": 43.902, "grad_norm": 1.1459945440292358, "learning_rate": 2e-05, "loss": 0.04361208, "step": 21951 }, { "epoch": 43.904, "grad_norm": 2.8182897567749023, "learning_rate": 2e-05, "loss": 0.05983534, "step": 21952 }, { "epoch": 43.906, "grad_norm": 0.9799362421035767, "learning_rate": 2e-05, "loss": 0.04111208, "step": 21953 }, { "epoch": 43.908, "grad_norm": 0.9882168173789978, "learning_rate": 2e-05, "loss": 0.03295896, "step": 21954 }, { "epoch": 43.91, "grad_norm": 0.9092350006103516, "learning_rate": 2e-05, "loss": 0.03286574, "step": 21955 }, { "epoch": 43.912, "grad_norm": 1.0961514711380005, "learning_rate": 2e-05, "loss": 0.04028642, "step": 21956 }, { "epoch": 43.914, "grad_norm": 1.1327033042907715, "learning_rate": 2e-05, "loss": 0.04288281, "step": 21957 }, { "epoch": 43.916, "grad_norm": 1.0354961156845093, "learning_rate": 2e-05, "loss": 0.04554428, "step": 21958 }, { "epoch": 43.918, "grad_norm": 1.0633840560913086, "learning_rate": 2e-05, "loss": 0.03943173, "step": 21959 }, { "epoch": 43.92, "grad_norm": 1.1235687732696533, "learning_rate": 2e-05, "loss": 0.04602567, "step": 21960 }, { "epoch": 43.922, "grad_norm": 1.0829225778579712, "learning_rate": 2e-05, "loss": 0.04287158, "step": 21961 }, { "epoch": 43.924, "grad_norm": 1.2481268644332886, "learning_rate": 2e-05, "loss": 0.04584479, "step": 21962 }, { "epoch": 43.926, "grad_norm": 1.3785874843597412, "learning_rate": 2e-05, "loss": 0.04211417, "step": 21963 }, { "epoch": 43.928, "grad_norm": 1.077284336090088, "learning_rate": 2e-05, "loss": 0.0466284, "step": 21964 }, { "epoch": 43.93, "grad_norm": 1.1438968181610107, "learning_rate": 2e-05, "loss": 0.05030949, "step": 21965 }, { "epoch": 43.932, "grad_norm": 1.4342151880264282, "learning_rate": 2e-05, "loss": 0.06013999, "step": 21966 }, { "epoch": 43.934, "grad_norm": 1.1399656534194946, "learning_rate": 2e-05, "loss": 0.03678756, "step": 21967 }, { "epoch": 43.936, "grad_norm": 0.879771888256073, "learning_rate": 2e-05, "loss": 0.03162841, "step": 21968 }, { "epoch": 43.938, "grad_norm": 2.2394826412200928, "learning_rate": 2e-05, "loss": 0.04211564, "step": 21969 }, { "epoch": 43.94, "grad_norm": 0.7961384654045105, "learning_rate": 2e-05, "loss": 0.03152303, "step": 21970 }, { "epoch": 43.942, "grad_norm": 1.528683066368103, "learning_rate": 2e-05, "loss": 0.0362437, "step": 21971 }, { "epoch": 43.944, "grad_norm": 2.017845392227173, "learning_rate": 2e-05, "loss": 0.0635589, "step": 21972 }, { "epoch": 43.946, "grad_norm": 1.0617077350616455, "learning_rate": 2e-05, "loss": 0.0224614, "step": 21973 }, { "epoch": 43.948, "grad_norm": 1.3694946765899658, "learning_rate": 2e-05, "loss": 0.0438567, "step": 21974 }, { "epoch": 43.95, "grad_norm": 1.1613513231277466, "learning_rate": 2e-05, "loss": 0.05781006, "step": 21975 }, { "epoch": 43.952, "grad_norm": 0.9436536431312561, "learning_rate": 2e-05, "loss": 0.02919391, "step": 21976 }, { "epoch": 43.954, "grad_norm": 1.5133930444717407, "learning_rate": 2e-05, "loss": 0.06255869, "step": 21977 }, { "epoch": 43.956, "grad_norm": 0.9523667693138123, "learning_rate": 2e-05, "loss": 0.05239718, "step": 21978 }, { "epoch": 43.958, "grad_norm": 1.5109390020370483, "learning_rate": 2e-05, "loss": 0.05120644, "step": 21979 }, { "epoch": 43.96, "grad_norm": 1.7278436422348022, "learning_rate": 2e-05, "loss": 0.05690014, "step": 21980 }, { "epoch": 43.962, "grad_norm": 1.1575863361358643, "learning_rate": 2e-05, "loss": 0.04912923, "step": 21981 }, { "epoch": 43.964, "grad_norm": 0.9990217089653015, "learning_rate": 2e-05, "loss": 0.04037385, "step": 21982 }, { "epoch": 43.966, "grad_norm": 1.5984686613082886, "learning_rate": 2e-05, "loss": 0.06024916, "step": 21983 }, { "epoch": 43.968, "grad_norm": 1.0283946990966797, "learning_rate": 2e-05, "loss": 0.03911587, "step": 21984 }, { "epoch": 43.97, "grad_norm": 1.0172265768051147, "learning_rate": 2e-05, "loss": 0.04004759, "step": 21985 }, { "epoch": 43.972, "grad_norm": 1.1097747087478638, "learning_rate": 2e-05, "loss": 0.04810769, "step": 21986 }, { "epoch": 43.974, "grad_norm": 1.0351035594940186, "learning_rate": 2e-05, "loss": 0.03433497, "step": 21987 }, { "epoch": 43.976, "grad_norm": 1.0727968215942383, "learning_rate": 2e-05, "loss": 0.03750125, "step": 21988 }, { "epoch": 43.978, "grad_norm": 1.0355806350708008, "learning_rate": 2e-05, "loss": 0.05102752, "step": 21989 }, { "epoch": 43.98, "grad_norm": 1.1399158239364624, "learning_rate": 2e-05, "loss": 0.0493378, "step": 21990 }, { "epoch": 43.982, "grad_norm": 1.0254075527191162, "learning_rate": 2e-05, "loss": 0.03875503, "step": 21991 }, { "epoch": 43.984, "grad_norm": 1.05193030834198, "learning_rate": 2e-05, "loss": 0.04947971, "step": 21992 }, { "epoch": 43.986, "grad_norm": 1.1365094184875488, "learning_rate": 2e-05, "loss": 0.04819956, "step": 21993 }, { "epoch": 43.988, "grad_norm": 0.8426253199577332, "learning_rate": 2e-05, "loss": 0.03433996, "step": 21994 }, { "epoch": 43.99, "grad_norm": 1.9363209009170532, "learning_rate": 2e-05, "loss": 0.03850646, "step": 21995 }, { "epoch": 43.992, "grad_norm": 1.4353491067886353, "learning_rate": 2e-05, "loss": 0.05204204, "step": 21996 }, { "epoch": 43.994, "grad_norm": 0.9567469954490662, "learning_rate": 2e-05, "loss": 0.04024803, "step": 21997 }, { "epoch": 43.996, "grad_norm": 1.0015860795974731, "learning_rate": 2e-05, "loss": 0.03549778, "step": 21998 }, { "epoch": 43.998, "grad_norm": 1.1366320848464966, "learning_rate": 2e-05, "loss": 0.04876912, "step": 21999 }, { "epoch": 44.0, "grad_norm": 1.6706385612487793, "learning_rate": 2e-05, "loss": 0.06270337, "step": 22000 }, { "epoch": 44.0, "eval_performance": { "AngleClassification_1": 0.998, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9860279441117764, "Equal_1": 0.996, "Equal_2": 0.9800399201596807, "Equal_3": 0.9860279441117764, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.9976666666666667, "PointLiesOnCircle_3": 0.9896, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 227.4775, "eval_samples_per_second": 46.158, "eval_steps_per_second": 0.923, "step": 22000 }, { "epoch": 44.002, "grad_norm": 1.1486778259277344, "learning_rate": 2e-05, "loss": 0.03479688, "step": 22001 }, { "epoch": 44.004, "grad_norm": 1.128206729888916, "learning_rate": 2e-05, "loss": 0.03840196, "step": 22002 }, { "epoch": 44.006, "grad_norm": 3.7097110748291016, "learning_rate": 2e-05, "loss": 0.04238819, "step": 22003 }, { "epoch": 44.008, "grad_norm": 1.0236551761627197, "learning_rate": 2e-05, "loss": 0.04295935, "step": 22004 }, { "epoch": 44.01, "grad_norm": 1.437950611114502, "learning_rate": 2e-05, "loss": 0.03999566, "step": 22005 }, { "epoch": 44.012, "grad_norm": 0.9598550796508789, "learning_rate": 2e-05, "loss": 0.02585863, "step": 22006 }, { "epoch": 44.014, "grad_norm": 1.2125970125198364, "learning_rate": 2e-05, "loss": 0.04545759, "step": 22007 }, { "epoch": 44.016, "grad_norm": 0.9620596766471863, "learning_rate": 2e-05, "loss": 0.03089815, "step": 22008 }, { "epoch": 44.018, "grad_norm": 2.490267276763916, "learning_rate": 2e-05, "loss": 0.04687466, "step": 22009 }, { "epoch": 44.02, "grad_norm": 2.1504459381103516, "learning_rate": 2e-05, "loss": 0.05155072, "step": 22010 }, { "epoch": 44.022, "grad_norm": 1.1399226188659668, "learning_rate": 2e-05, "loss": 0.05008369, "step": 22011 }, { "epoch": 44.024, "grad_norm": 1.209912657737732, "learning_rate": 2e-05, "loss": 0.05356804, "step": 22012 }, { "epoch": 44.026, "grad_norm": 1.4935914278030396, "learning_rate": 2e-05, "loss": 0.05371639, "step": 22013 }, { "epoch": 44.028, "grad_norm": 5.445562362670898, "learning_rate": 2e-05, "loss": 0.05408959, "step": 22014 }, { "epoch": 44.03, "grad_norm": 0.9278953671455383, "learning_rate": 2e-05, "loss": 0.03237933, "step": 22015 }, { "epoch": 44.032, "grad_norm": 0.8344036340713501, "learning_rate": 2e-05, "loss": 0.02752409, "step": 22016 }, { "epoch": 44.034, "grad_norm": 3.2902767658233643, "learning_rate": 2e-05, "loss": 0.04624265, "step": 22017 }, { "epoch": 44.036, "grad_norm": 1.0212173461914062, "learning_rate": 2e-05, "loss": 0.03900445, "step": 22018 }, { "epoch": 44.038, "grad_norm": 1.0109410285949707, "learning_rate": 2e-05, "loss": 0.03278112, "step": 22019 }, { "epoch": 44.04, "grad_norm": 1.118867039680481, "learning_rate": 2e-05, "loss": 0.04292937, "step": 22020 }, { "epoch": 44.042, "grad_norm": 1.230000376701355, "learning_rate": 2e-05, "loss": 0.04995851, "step": 22021 }, { "epoch": 44.044, "grad_norm": 1.5635370016098022, "learning_rate": 2e-05, "loss": 0.05769979, "step": 22022 }, { "epoch": 44.046, "grad_norm": 1.0172475576400757, "learning_rate": 2e-05, "loss": 0.04210283, "step": 22023 }, { "epoch": 44.048, "grad_norm": 1.1504994630813599, "learning_rate": 2e-05, "loss": 0.05082963, "step": 22024 }, { "epoch": 44.05, "grad_norm": 1.1526234149932861, "learning_rate": 2e-05, "loss": 0.04860536, "step": 22025 }, { "epoch": 44.052, "grad_norm": 0.986532986164093, "learning_rate": 2e-05, "loss": 0.02972521, "step": 22026 }, { "epoch": 44.054, "grad_norm": 1.7157878875732422, "learning_rate": 2e-05, "loss": 0.04263962, "step": 22027 }, { "epoch": 44.056, "grad_norm": 2.5859761238098145, "learning_rate": 2e-05, "loss": 0.04729125, "step": 22028 }, { "epoch": 44.058, "grad_norm": 0.8942874670028687, "learning_rate": 2e-05, "loss": 0.03287193, "step": 22029 }, { "epoch": 44.06, "grad_norm": 3.5066335201263428, "learning_rate": 2e-05, "loss": 0.04010051, "step": 22030 }, { "epoch": 44.062, "grad_norm": 0.9948264360427856, "learning_rate": 2e-05, "loss": 0.04390644, "step": 22031 }, { "epoch": 44.064, "grad_norm": 1.338749885559082, "learning_rate": 2e-05, "loss": 0.04682916, "step": 22032 }, { "epoch": 44.066, "grad_norm": 1.3361210823059082, "learning_rate": 2e-05, "loss": 0.04816816, "step": 22033 }, { "epoch": 44.068, "grad_norm": 1.1360564231872559, "learning_rate": 2e-05, "loss": 0.06189732, "step": 22034 }, { "epoch": 44.07, "grad_norm": 2.00252103805542, "learning_rate": 2e-05, "loss": 0.05442529, "step": 22035 }, { "epoch": 44.072, "grad_norm": 1.015679121017456, "learning_rate": 2e-05, "loss": 0.05148002, "step": 22036 }, { "epoch": 44.074, "grad_norm": 0.9881927967071533, "learning_rate": 2e-05, "loss": 0.03703018, "step": 22037 }, { "epoch": 44.076, "grad_norm": 1.0406641960144043, "learning_rate": 2e-05, "loss": 0.04674499, "step": 22038 }, { "epoch": 44.078, "grad_norm": 1.0359641313552856, "learning_rate": 2e-05, "loss": 0.04124326, "step": 22039 }, { "epoch": 44.08, "grad_norm": 1.085798740386963, "learning_rate": 2e-05, "loss": 0.04590926, "step": 22040 }, { "epoch": 44.082, "grad_norm": 0.9627858400344849, "learning_rate": 2e-05, "loss": 0.033943, "step": 22041 }, { "epoch": 44.084, "grad_norm": 1.061272144317627, "learning_rate": 2e-05, "loss": 0.0406946, "step": 22042 }, { "epoch": 44.086, "grad_norm": 0.970061719417572, "learning_rate": 2e-05, "loss": 0.0392506, "step": 22043 }, { "epoch": 44.088, "grad_norm": 1.213443636894226, "learning_rate": 2e-05, "loss": 0.04148027, "step": 22044 }, { "epoch": 44.09, "grad_norm": 1.3926465511322021, "learning_rate": 2e-05, "loss": 0.05698966, "step": 22045 }, { "epoch": 44.092, "grad_norm": 1.186166763305664, "learning_rate": 2e-05, "loss": 0.04065004, "step": 22046 }, { "epoch": 44.094, "grad_norm": 1.488438367843628, "learning_rate": 2e-05, "loss": 0.05712264, "step": 22047 }, { "epoch": 44.096, "grad_norm": 1.0042719841003418, "learning_rate": 2e-05, "loss": 0.03435665, "step": 22048 }, { "epoch": 44.098, "grad_norm": 1.5414302349090576, "learning_rate": 2e-05, "loss": 0.0487525, "step": 22049 }, { "epoch": 44.1, "grad_norm": 1.0772267580032349, "learning_rate": 2e-05, "loss": 0.04256324, "step": 22050 }, { "epoch": 44.102, "grad_norm": 0.8900595903396606, "learning_rate": 2e-05, "loss": 0.03321432, "step": 22051 }, { "epoch": 44.104, "grad_norm": 1.0881271362304688, "learning_rate": 2e-05, "loss": 0.04632396, "step": 22052 }, { "epoch": 44.106, "grad_norm": 0.9731775522232056, "learning_rate": 2e-05, "loss": 0.03861157, "step": 22053 }, { "epoch": 44.108, "grad_norm": 1.2235480546951294, "learning_rate": 2e-05, "loss": 0.04304737, "step": 22054 }, { "epoch": 44.11, "grad_norm": 1.361940622329712, "learning_rate": 2e-05, "loss": 0.03586107, "step": 22055 }, { "epoch": 44.112, "grad_norm": 1.311922311782837, "learning_rate": 2e-05, "loss": 0.04131404, "step": 22056 }, { "epoch": 44.114, "grad_norm": 1.4777196645736694, "learning_rate": 2e-05, "loss": 0.0391974, "step": 22057 }, { "epoch": 44.116, "grad_norm": 1.388215184211731, "learning_rate": 2e-05, "loss": 0.04404885, "step": 22058 }, { "epoch": 44.118, "grad_norm": 0.9977417588233948, "learning_rate": 2e-05, "loss": 0.03999856, "step": 22059 }, { "epoch": 44.12, "grad_norm": 0.8863751292228699, "learning_rate": 2e-05, "loss": 0.03590537, "step": 22060 }, { "epoch": 44.122, "grad_norm": 1.367692470550537, "learning_rate": 2e-05, "loss": 0.06379272, "step": 22061 }, { "epoch": 44.124, "grad_norm": 1.4369134902954102, "learning_rate": 2e-05, "loss": 0.04493314, "step": 22062 }, { "epoch": 44.126, "grad_norm": 1.0349559783935547, "learning_rate": 2e-05, "loss": 0.03948306, "step": 22063 }, { "epoch": 44.128, "grad_norm": 1.2433481216430664, "learning_rate": 2e-05, "loss": 0.0533002, "step": 22064 }, { "epoch": 44.13, "grad_norm": 1.1504900455474854, "learning_rate": 2e-05, "loss": 0.02912554, "step": 22065 }, { "epoch": 44.132, "grad_norm": 0.9900869131088257, "learning_rate": 2e-05, "loss": 0.04054887, "step": 22066 }, { "epoch": 44.134, "grad_norm": 1.2694700956344604, "learning_rate": 2e-05, "loss": 0.04858723, "step": 22067 }, { "epoch": 44.136, "grad_norm": 1.1225833892822266, "learning_rate": 2e-05, "loss": 0.03824493, "step": 22068 }, { "epoch": 44.138, "grad_norm": 1.0948818922042847, "learning_rate": 2e-05, "loss": 0.03664181, "step": 22069 }, { "epoch": 44.14, "grad_norm": 1.0735647678375244, "learning_rate": 2e-05, "loss": 0.0435849, "step": 22070 }, { "epoch": 44.142, "grad_norm": 1.0746722221374512, "learning_rate": 2e-05, "loss": 0.04386512, "step": 22071 }, { "epoch": 44.144, "grad_norm": 0.9639008045196533, "learning_rate": 2e-05, "loss": 0.03280676, "step": 22072 }, { "epoch": 44.146, "grad_norm": 1.1211462020874023, "learning_rate": 2e-05, "loss": 0.03954466, "step": 22073 }, { "epoch": 44.148, "grad_norm": 0.996179461479187, "learning_rate": 2e-05, "loss": 0.04478, "step": 22074 }, { "epoch": 44.15, "grad_norm": 1.2136204242706299, "learning_rate": 2e-05, "loss": 0.06488493, "step": 22075 }, { "epoch": 44.152, "grad_norm": 2.1508867740631104, "learning_rate": 2e-05, "loss": 0.05240536, "step": 22076 }, { "epoch": 44.154, "grad_norm": 1.0234856605529785, "learning_rate": 2e-05, "loss": 0.05230555, "step": 22077 }, { "epoch": 44.156, "grad_norm": 1.1181385517120361, "learning_rate": 2e-05, "loss": 0.05086254, "step": 22078 }, { "epoch": 44.158, "grad_norm": 1.4816415309906006, "learning_rate": 2e-05, "loss": 0.02878286, "step": 22079 }, { "epoch": 44.16, "grad_norm": 1.9716747999191284, "learning_rate": 2e-05, "loss": 0.04603901, "step": 22080 }, { "epoch": 44.162, "grad_norm": 1.8581093549728394, "learning_rate": 2e-05, "loss": 0.04996035, "step": 22081 }, { "epoch": 44.164, "grad_norm": 0.9676352143287659, "learning_rate": 2e-05, "loss": 0.03708947, "step": 22082 }, { "epoch": 44.166, "grad_norm": 0.974334716796875, "learning_rate": 2e-05, "loss": 0.03217529, "step": 22083 }, { "epoch": 44.168, "grad_norm": 1.1695177555084229, "learning_rate": 2e-05, "loss": 0.02807998, "step": 22084 }, { "epoch": 44.17, "grad_norm": 1.10268235206604, "learning_rate": 2e-05, "loss": 0.04646431, "step": 22085 }, { "epoch": 44.172, "grad_norm": 0.9223012328147888, "learning_rate": 2e-05, "loss": 0.03594355, "step": 22086 }, { "epoch": 44.174, "grad_norm": 1.1538010835647583, "learning_rate": 2e-05, "loss": 0.0563981, "step": 22087 }, { "epoch": 44.176, "grad_norm": 1.2387542724609375, "learning_rate": 2e-05, "loss": 0.02701533, "step": 22088 }, { "epoch": 44.178, "grad_norm": 1.4882017374038696, "learning_rate": 2e-05, "loss": 0.03421149, "step": 22089 }, { "epoch": 44.18, "grad_norm": 1.3977926969528198, "learning_rate": 2e-05, "loss": 0.05531197, "step": 22090 }, { "epoch": 44.182, "grad_norm": 1.2232346534729004, "learning_rate": 2e-05, "loss": 0.05046552, "step": 22091 }, { "epoch": 44.184, "grad_norm": 1.1296195983886719, "learning_rate": 2e-05, "loss": 0.04450367, "step": 22092 }, { "epoch": 44.186, "grad_norm": 0.972041130065918, "learning_rate": 2e-05, "loss": 0.03986066, "step": 22093 }, { "epoch": 44.188, "grad_norm": 1.846015453338623, "learning_rate": 2e-05, "loss": 0.03839, "step": 22094 }, { "epoch": 44.19, "grad_norm": 1.3027209043502808, "learning_rate": 2e-05, "loss": 0.0479676, "step": 22095 }, { "epoch": 44.192, "grad_norm": 1.2403392791748047, "learning_rate": 2e-05, "loss": 0.04153728, "step": 22096 }, { "epoch": 44.194, "grad_norm": 0.8577671051025391, "learning_rate": 2e-05, "loss": 0.0346971, "step": 22097 }, { "epoch": 44.196, "grad_norm": 1.088318943977356, "learning_rate": 2e-05, "loss": 0.03946019, "step": 22098 }, { "epoch": 44.198, "grad_norm": 1.1416734457015991, "learning_rate": 2e-05, "loss": 0.04504424, "step": 22099 }, { "epoch": 44.2, "grad_norm": 1.152953028678894, "learning_rate": 2e-05, "loss": 0.04866257, "step": 22100 }, { "epoch": 44.202, "grad_norm": 0.9592025876045227, "learning_rate": 2e-05, "loss": 0.04162867, "step": 22101 }, { "epoch": 44.204, "grad_norm": 1.0223129987716675, "learning_rate": 2e-05, "loss": 0.03451943, "step": 22102 }, { "epoch": 44.206, "grad_norm": 2.095681667327881, "learning_rate": 2e-05, "loss": 0.06493147, "step": 22103 }, { "epoch": 44.208, "grad_norm": 1.1211806535720825, "learning_rate": 2e-05, "loss": 0.05203718, "step": 22104 }, { "epoch": 44.21, "grad_norm": 1.136399507522583, "learning_rate": 2e-05, "loss": 0.03635731, "step": 22105 }, { "epoch": 44.212, "grad_norm": 1.0691232681274414, "learning_rate": 2e-05, "loss": 0.05409363, "step": 22106 }, { "epoch": 44.214, "grad_norm": 1.4868789911270142, "learning_rate": 2e-05, "loss": 0.04908937, "step": 22107 }, { "epoch": 44.216, "grad_norm": 1.035171389579773, "learning_rate": 2e-05, "loss": 0.03525033, "step": 22108 }, { "epoch": 44.218, "grad_norm": 3.680039405822754, "learning_rate": 2e-05, "loss": 0.0449592, "step": 22109 }, { "epoch": 44.22, "grad_norm": 0.9612916111946106, "learning_rate": 2e-05, "loss": 0.030032, "step": 22110 }, { "epoch": 44.222, "grad_norm": 0.9486534595489502, "learning_rate": 2e-05, "loss": 0.0384844, "step": 22111 }, { "epoch": 44.224, "grad_norm": 2.912959575653076, "learning_rate": 2e-05, "loss": 0.03703021, "step": 22112 }, { "epoch": 44.226, "grad_norm": 1.3475031852722168, "learning_rate": 2e-05, "loss": 0.04136727, "step": 22113 }, { "epoch": 44.228, "grad_norm": 1.1465694904327393, "learning_rate": 2e-05, "loss": 0.04480891, "step": 22114 }, { "epoch": 44.23, "grad_norm": 3.2178385257720947, "learning_rate": 2e-05, "loss": 0.07284944, "step": 22115 }, { "epoch": 44.232, "grad_norm": 1.764460802078247, "learning_rate": 2e-05, "loss": 0.0688644, "step": 22116 }, { "epoch": 44.234, "grad_norm": 1.0309200286865234, "learning_rate": 2e-05, "loss": 0.03732551, "step": 22117 }, { "epoch": 44.236, "grad_norm": 1.1919935941696167, "learning_rate": 2e-05, "loss": 0.04687998, "step": 22118 }, { "epoch": 44.238, "grad_norm": 1.862392544746399, "learning_rate": 2e-05, "loss": 0.0302507, "step": 22119 }, { "epoch": 44.24, "grad_norm": 1.1466257572174072, "learning_rate": 2e-05, "loss": 0.04855706, "step": 22120 }, { "epoch": 44.242, "grad_norm": 1.234276294708252, "learning_rate": 2e-05, "loss": 0.05205379, "step": 22121 }, { "epoch": 44.244, "grad_norm": 1.068332314491272, "learning_rate": 2e-05, "loss": 0.03916954, "step": 22122 }, { "epoch": 44.246, "grad_norm": 1.3641719818115234, "learning_rate": 2e-05, "loss": 0.04176648, "step": 22123 }, { "epoch": 44.248, "grad_norm": 1.134946346282959, "learning_rate": 2e-05, "loss": 0.04125345, "step": 22124 }, { "epoch": 44.25, "grad_norm": 0.8787440657615662, "learning_rate": 2e-05, "loss": 0.03392597, "step": 22125 }, { "epoch": 44.252, "grad_norm": 1.048780918121338, "learning_rate": 2e-05, "loss": 0.03813901, "step": 22126 }, { "epoch": 44.254, "grad_norm": 1.777086615562439, "learning_rate": 2e-05, "loss": 0.06852502, "step": 22127 }, { "epoch": 44.256, "grad_norm": 1.2903952598571777, "learning_rate": 2e-05, "loss": 0.04117747, "step": 22128 }, { "epoch": 44.258, "grad_norm": 1.4206805229187012, "learning_rate": 2e-05, "loss": 0.05051043, "step": 22129 }, { "epoch": 44.26, "grad_norm": 0.9553940296173096, "learning_rate": 2e-05, "loss": 0.03918055, "step": 22130 }, { "epoch": 44.262, "grad_norm": 1.4055997133255005, "learning_rate": 2e-05, "loss": 0.05484942, "step": 22131 }, { "epoch": 44.264, "grad_norm": 0.8347626328468323, "learning_rate": 2e-05, "loss": 0.03171722, "step": 22132 }, { "epoch": 44.266, "grad_norm": 1.2679643630981445, "learning_rate": 2e-05, "loss": 0.03901686, "step": 22133 }, { "epoch": 44.268, "grad_norm": 0.9170762896537781, "learning_rate": 2e-05, "loss": 0.03071016, "step": 22134 }, { "epoch": 44.27, "grad_norm": 1.1278331279754639, "learning_rate": 2e-05, "loss": 0.03388392, "step": 22135 }, { "epoch": 44.272, "grad_norm": 1.0093847513198853, "learning_rate": 2e-05, "loss": 0.04385267, "step": 22136 }, { "epoch": 44.274, "grad_norm": 0.9710104465484619, "learning_rate": 2e-05, "loss": 0.02359691, "step": 22137 }, { "epoch": 44.276, "grad_norm": 0.9345017671585083, "learning_rate": 2e-05, "loss": 0.03838722, "step": 22138 }, { "epoch": 44.278, "grad_norm": 1.3171968460083008, "learning_rate": 2e-05, "loss": 0.03028284, "step": 22139 }, { "epoch": 44.28, "grad_norm": 0.9421518445014954, "learning_rate": 2e-05, "loss": 0.04354414, "step": 22140 }, { "epoch": 44.282, "grad_norm": 1.18100106716156, "learning_rate": 2e-05, "loss": 0.04566178, "step": 22141 }, { "epoch": 44.284, "grad_norm": 1.3652108907699585, "learning_rate": 2e-05, "loss": 0.04804132, "step": 22142 }, { "epoch": 44.286, "grad_norm": 0.8998939394950867, "learning_rate": 2e-05, "loss": 0.0368832, "step": 22143 }, { "epoch": 44.288, "grad_norm": 1.100090503692627, "learning_rate": 2e-05, "loss": 0.04489031, "step": 22144 }, { "epoch": 44.29, "grad_norm": 1.7968313694000244, "learning_rate": 2e-05, "loss": 0.05429298, "step": 22145 }, { "epoch": 44.292, "grad_norm": 0.9581459164619446, "learning_rate": 2e-05, "loss": 0.0292104, "step": 22146 }, { "epoch": 44.294, "grad_norm": 1.0603289604187012, "learning_rate": 2e-05, "loss": 0.03868495, "step": 22147 }, { "epoch": 44.296, "grad_norm": 1.4357331991195679, "learning_rate": 2e-05, "loss": 0.04415393, "step": 22148 }, { "epoch": 44.298, "grad_norm": 1.5788393020629883, "learning_rate": 2e-05, "loss": 0.05479918, "step": 22149 }, { "epoch": 44.3, "grad_norm": 1.2428208589553833, "learning_rate": 2e-05, "loss": 0.02508749, "step": 22150 }, { "epoch": 44.302, "grad_norm": 1.2665565013885498, "learning_rate": 2e-05, "loss": 0.05589785, "step": 22151 }, { "epoch": 44.304, "grad_norm": 1.0671167373657227, "learning_rate": 2e-05, "loss": 0.03900685, "step": 22152 }, { "epoch": 44.306, "grad_norm": 1.5799546241760254, "learning_rate": 2e-05, "loss": 0.05999384, "step": 22153 }, { "epoch": 44.308, "grad_norm": 1.2408109903335571, "learning_rate": 2e-05, "loss": 0.04679197, "step": 22154 }, { "epoch": 44.31, "grad_norm": 1.2633663415908813, "learning_rate": 2e-05, "loss": 0.05944971, "step": 22155 }, { "epoch": 44.312, "grad_norm": 1.2997796535491943, "learning_rate": 2e-05, "loss": 0.03434312, "step": 22156 }, { "epoch": 44.314, "grad_norm": 1.2305532693862915, "learning_rate": 2e-05, "loss": 0.04723737, "step": 22157 }, { "epoch": 44.316, "grad_norm": 0.9954727292060852, "learning_rate": 2e-05, "loss": 0.03561275, "step": 22158 }, { "epoch": 44.318, "grad_norm": 1.2701889276504517, "learning_rate": 2e-05, "loss": 0.04012364, "step": 22159 }, { "epoch": 44.32, "grad_norm": 1.1554813385009766, "learning_rate": 2e-05, "loss": 0.04713125, "step": 22160 }, { "epoch": 44.322, "grad_norm": 1.5843422412872314, "learning_rate": 2e-05, "loss": 0.05255471, "step": 22161 }, { "epoch": 44.324, "grad_norm": 1.189100742340088, "learning_rate": 2e-05, "loss": 0.04949726, "step": 22162 }, { "epoch": 44.326, "grad_norm": 1.1100517511367798, "learning_rate": 2e-05, "loss": 0.0382578, "step": 22163 }, { "epoch": 44.328, "grad_norm": 1.1668012142181396, "learning_rate": 2e-05, "loss": 0.04668381, "step": 22164 }, { "epoch": 44.33, "grad_norm": 1.4729169607162476, "learning_rate": 2e-05, "loss": 0.05481876, "step": 22165 }, { "epoch": 44.332, "grad_norm": 0.8602598905563354, "learning_rate": 2e-05, "loss": 0.03191109, "step": 22166 }, { "epoch": 44.334, "grad_norm": 1.1224842071533203, "learning_rate": 2e-05, "loss": 0.0474259, "step": 22167 }, { "epoch": 44.336, "grad_norm": 1.1729068756103516, "learning_rate": 2e-05, "loss": 0.04017886, "step": 22168 }, { "epoch": 44.338, "grad_norm": 1.1660051345825195, "learning_rate": 2e-05, "loss": 0.05880002, "step": 22169 }, { "epoch": 44.34, "grad_norm": 5.4262566566467285, "learning_rate": 2e-05, "loss": 0.04919434, "step": 22170 }, { "epoch": 44.342, "grad_norm": 1.907534122467041, "learning_rate": 2e-05, "loss": 0.05685405, "step": 22171 }, { "epoch": 44.344, "grad_norm": 1.0995590686798096, "learning_rate": 2e-05, "loss": 0.03859051, "step": 22172 }, { "epoch": 44.346, "grad_norm": 1.6237661838531494, "learning_rate": 2e-05, "loss": 0.04738256, "step": 22173 }, { "epoch": 44.348, "grad_norm": 1.414217233657837, "learning_rate": 2e-05, "loss": 0.04273638, "step": 22174 }, { "epoch": 44.35, "grad_norm": 1.1273901462554932, "learning_rate": 2e-05, "loss": 0.04976018, "step": 22175 }, { "epoch": 44.352, "grad_norm": 1.0022634267807007, "learning_rate": 2e-05, "loss": 0.04153289, "step": 22176 }, { "epoch": 44.354, "grad_norm": 1.1537055969238281, "learning_rate": 2e-05, "loss": 0.05898451, "step": 22177 }, { "epoch": 44.356, "grad_norm": 0.9678599834442139, "learning_rate": 2e-05, "loss": 0.04154794, "step": 22178 }, { "epoch": 44.358, "grad_norm": 1.2798097133636475, "learning_rate": 2e-05, "loss": 0.04710197, "step": 22179 }, { "epoch": 44.36, "grad_norm": 1.0993256568908691, "learning_rate": 2e-05, "loss": 0.05892128, "step": 22180 }, { "epoch": 44.362, "grad_norm": 2.6523799896240234, "learning_rate": 2e-05, "loss": 0.06699716, "step": 22181 }, { "epoch": 44.364, "grad_norm": 1.1506569385528564, "learning_rate": 2e-05, "loss": 0.05674159, "step": 22182 }, { "epoch": 44.366, "grad_norm": 1.1494166851043701, "learning_rate": 2e-05, "loss": 0.04806326, "step": 22183 }, { "epoch": 44.368, "grad_norm": 0.9502562880516052, "learning_rate": 2e-05, "loss": 0.04611699, "step": 22184 }, { "epoch": 44.37, "grad_norm": 0.971316397190094, "learning_rate": 2e-05, "loss": 0.03556896, "step": 22185 }, { "epoch": 44.372, "grad_norm": 1.9422978162765503, "learning_rate": 2e-05, "loss": 0.05291616, "step": 22186 }, { "epoch": 44.374, "grad_norm": 1.4101529121398926, "learning_rate": 2e-05, "loss": 0.06052881, "step": 22187 }, { "epoch": 44.376, "grad_norm": 1.2755916118621826, "learning_rate": 2e-05, "loss": 0.06201012, "step": 22188 }, { "epoch": 44.378, "grad_norm": 1.2771319150924683, "learning_rate": 2e-05, "loss": 0.05124611, "step": 22189 }, { "epoch": 44.38, "grad_norm": 1.1453359127044678, "learning_rate": 2e-05, "loss": 0.04612196, "step": 22190 }, { "epoch": 44.382, "grad_norm": 1.2461353540420532, "learning_rate": 2e-05, "loss": 0.04717721, "step": 22191 }, { "epoch": 44.384, "grad_norm": 0.9414719343185425, "learning_rate": 2e-05, "loss": 0.03941375, "step": 22192 }, { "epoch": 44.386, "grad_norm": 0.9822537302970886, "learning_rate": 2e-05, "loss": 0.02946872, "step": 22193 }, { "epoch": 44.388, "grad_norm": 1.2123873233795166, "learning_rate": 2e-05, "loss": 0.05044227, "step": 22194 }, { "epoch": 44.39, "grad_norm": 1.1495866775512695, "learning_rate": 2e-05, "loss": 0.04441724, "step": 22195 }, { "epoch": 44.392, "grad_norm": 1.019335389137268, "learning_rate": 2e-05, "loss": 0.04443783, "step": 22196 }, { "epoch": 44.394, "grad_norm": 1.1822301149368286, "learning_rate": 2e-05, "loss": 0.04529393, "step": 22197 }, { "epoch": 44.396, "grad_norm": 1.0161681175231934, "learning_rate": 2e-05, "loss": 0.03272749, "step": 22198 }, { "epoch": 44.398, "grad_norm": 1.5643200874328613, "learning_rate": 2e-05, "loss": 0.02541869, "step": 22199 }, { "epoch": 44.4, "grad_norm": 1.0314042568206787, "learning_rate": 2e-05, "loss": 0.04241816, "step": 22200 }, { "epoch": 44.402, "grad_norm": 1.241451382637024, "learning_rate": 2e-05, "loss": 0.05062532, "step": 22201 }, { "epoch": 44.404, "grad_norm": 1.1817564964294434, "learning_rate": 2e-05, "loss": 0.04277293, "step": 22202 }, { "epoch": 44.406, "grad_norm": 2.568077802658081, "learning_rate": 2e-05, "loss": 0.04537624, "step": 22203 }, { "epoch": 44.408, "grad_norm": 1.0882031917572021, "learning_rate": 2e-05, "loss": 0.04879285, "step": 22204 }, { "epoch": 44.41, "grad_norm": 2.4440858364105225, "learning_rate": 2e-05, "loss": 0.04765217, "step": 22205 }, { "epoch": 44.412, "grad_norm": 1.0320936441421509, "learning_rate": 2e-05, "loss": 0.04338135, "step": 22206 }, { "epoch": 44.414, "grad_norm": 1.193641185760498, "learning_rate": 2e-05, "loss": 0.04208658, "step": 22207 }, { "epoch": 44.416, "grad_norm": 1.0440864562988281, "learning_rate": 2e-05, "loss": 0.04133472, "step": 22208 }, { "epoch": 44.418, "grad_norm": 1.2432972192764282, "learning_rate": 2e-05, "loss": 0.06200829, "step": 22209 }, { "epoch": 44.42, "grad_norm": 0.9779486656188965, "learning_rate": 2e-05, "loss": 0.04294281, "step": 22210 }, { "epoch": 44.422, "grad_norm": 1.24173903465271, "learning_rate": 2e-05, "loss": 0.06318801, "step": 22211 }, { "epoch": 44.424, "grad_norm": 1.062990665435791, "learning_rate": 2e-05, "loss": 0.04287001, "step": 22212 }, { "epoch": 44.426, "grad_norm": 0.9807645678520203, "learning_rate": 2e-05, "loss": 0.04174829, "step": 22213 }, { "epoch": 44.428, "grad_norm": 1.3242791891098022, "learning_rate": 2e-05, "loss": 0.05152968, "step": 22214 }, { "epoch": 44.43, "grad_norm": 1.2429180145263672, "learning_rate": 2e-05, "loss": 0.05501713, "step": 22215 }, { "epoch": 44.432, "grad_norm": 0.8948402404785156, "learning_rate": 2e-05, "loss": 0.02539237, "step": 22216 }, { "epoch": 44.434, "grad_norm": 1.0229709148406982, "learning_rate": 2e-05, "loss": 0.04136929, "step": 22217 }, { "epoch": 44.436, "grad_norm": 2.1175084114074707, "learning_rate": 2e-05, "loss": 0.03336482, "step": 22218 }, { "epoch": 44.438, "grad_norm": 0.8617966771125793, "learning_rate": 2e-05, "loss": 0.02731165, "step": 22219 }, { "epoch": 44.44, "grad_norm": 0.9811795353889465, "learning_rate": 2e-05, "loss": 0.03888777, "step": 22220 }, { "epoch": 44.442, "grad_norm": 0.8522233366966248, "learning_rate": 2e-05, "loss": 0.0315674, "step": 22221 }, { "epoch": 44.444, "grad_norm": 1.054345965385437, "learning_rate": 2e-05, "loss": 0.04386459, "step": 22222 }, { "epoch": 44.446, "grad_norm": 1.089239478111267, "learning_rate": 2e-05, "loss": 0.05201201, "step": 22223 }, { "epoch": 44.448, "grad_norm": 2.02441143989563, "learning_rate": 2e-05, "loss": 0.03751792, "step": 22224 }, { "epoch": 44.45, "grad_norm": 0.9886369109153748, "learning_rate": 2e-05, "loss": 0.03129119, "step": 22225 }, { "epoch": 44.452, "grad_norm": 1.1047935485839844, "learning_rate": 2e-05, "loss": 0.04079865, "step": 22226 }, { "epoch": 44.454, "grad_norm": 1.2218470573425293, "learning_rate": 2e-05, "loss": 0.05456521, "step": 22227 }, { "epoch": 44.456, "grad_norm": 1.220600962638855, "learning_rate": 2e-05, "loss": 0.04364393, "step": 22228 }, { "epoch": 44.458, "grad_norm": 1.1628963947296143, "learning_rate": 2e-05, "loss": 0.04233833, "step": 22229 }, { "epoch": 44.46, "grad_norm": 1.6179635524749756, "learning_rate": 2e-05, "loss": 0.04072451, "step": 22230 }, { "epoch": 44.462, "grad_norm": 1.387764811515808, "learning_rate": 2e-05, "loss": 0.03863503, "step": 22231 }, { "epoch": 44.464, "grad_norm": 1.4256991147994995, "learning_rate": 2e-05, "loss": 0.05408198, "step": 22232 }, { "epoch": 44.466, "grad_norm": 1.875833511352539, "learning_rate": 2e-05, "loss": 0.03306981, "step": 22233 }, { "epoch": 44.468, "grad_norm": 1.0154317617416382, "learning_rate": 2e-05, "loss": 0.04412909, "step": 22234 }, { "epoch": 44.47, "grad_norm": 1.1102559566497803, "learning_rate": 2e-05, "loss": 0.04411479, "step": 22235 }, { "epoch": 44.472, "grad_norm": 0.9136313199996948, "learning_rate": 2e-05, "loss": 0.02665544, "step": 22236 }, { "epoch": 44.474, "grad_norm": 1.2407798767089844, "learning_rate": 2e-05, "loss": 0.05606948, "step": 22237 }, { "epoch": 44.476, "grad_norm": 1.161798357963562, "learning_rate": 2e-05, "loss": 0.0502895, "step": 22238 }, { "epoch": 44.478, "grad_norm": 0.9837492108345032, "learning_rate": 2e-05, "loss": 0.04210538, "step": 22239 }, { "epoch": 44.48, "grad_norm": 1.0783110857009888, "learning_rate": 2e-05, "loss": 0.04939334, "step": 22240 }, { "epoch": 44.482, "grad_norm": 1.1095173358917236, "learning_rate": 2e-05, "loss": 0.04624308, "step": 22241 }, { "epoch": 44.484, "grad_norm": 1.3388311862945557, "learning_rate": 2e-05, "loss": 0.03101594, "step": 22242 }, { "epoch": 44.486, "grad_norm": 1.0356800556182861, "learning_rate": 2e-05, "loss": 0.04061799, "step": 22243 }, { "epoch": 44.488, "grad_norm": 1.7525897026062012, "learning_rate": 2e-05, "loss": 0.04050147, "step": 22244 }, { "epoch": 44.49, "grad_norm": 1.3241046667099, "learning_rate": 2e-05, "loss": 0.0449311, "step": 22245 }, { "epoch": 44.492, "grad_norm": 1.0734533071517944, "learning_rate": 2e-05, "loss": 0.05382101, "step": 22246 }, { "epoch": 44.494, "grad_norm": 1.8206911087036133, "learning_rate": 2e-05, "loss": 0.0390985, "step": 22247 }, { "epoch": 44.496, "grad_norm": 1.0143961906433105, "learning_rate": 2e-05, "loss": 0.04033419, "step": 22248 }, { "epoch": 44.498, "grad_norm": 1.1307861804962158, "learning_rate": 2e-05, "loss": 0.03557163, "step": 22249 }, { "epoch": 44.5, "grad_norm": 1.0195642709732056, "learning_rate": 2e-05, "loss": 0.04074779, "step": 22250 }, { "epoch": 44.502, "grad_norm": 1.2070776224136353, "learning_rate": 2e-05, "loss": 0.0609494, "step": 22251 }, { "epoch": 44.504, "grad_norm": 1.7347524166107178, "learning_rate": 2e-05, "loss": 0.04837489, "step": 22252 }, { "epoch": 44.506, "grad_norm": 0.8527013659477234, "learning_rate": 2e-05, "loss": 0.03283353, "step": 22253 }, { "epoch": 44.508, "grad_norm": 1.1635022163391113, "learning_rate": 2e-05, "loss": 0.0558426, "step": 22254 }, { "epoch": 44.51, "grad_norm": 1.0687940120697021, "learning_rate": 2e-05, "loss": 0.04316232, "step": 22255 }, { "epoch": 44.512, "grad_norm": 1.6677595376968384, "learning_rate": 2e-05, "loss": 0.04650988, "step": 22256 }, { "epoch": 44.514, "grad_norm": 1.2293652296066284, "learning_rate": 2e-05, "loss": 0.03766809, "step": 22257 }, { "epoch": 44.516, "grad_norm": 0.9184119701385498, "learning_rate": 2e-05, "loss": 0.03994112, "step": 22258 }, { "epoch": 44.518, "grad_norm": 3.6875901222229004, "learning_rate": 2e-05, "loss": 0.06111805, "step": 22259 }, { "epoch": 44.52, "grad_norm": 1.354076862335205, "learning_rate": 2e-05, "loss": 0.04839518, "step": 22260 }, { "epoch": 44.522, "grad_norm": 1.4465880393981934, "learning_rate": 2e-05, "loss": 0.04688533, "step": 22261 }, { "epoch": 44.524, "grad_norm": 1.2020069360733032, "learning_rate": 2e-05, "loss": 0.04431077, "step": 22262 }, { "epoch": 44.526, "grad_norm": 2.7824759483337402, "learning_rate": 2e-05, "loss": 0.04523676, "step": 22263 }, { "epoch": 44.528, "grad_norm": 0.8695899248123169, "learning_rate": 2e-05, "loss": 0.03087896, "step": 22264 }, { "epoch": 44.53, "grad_norm": 0.983089029788971, "learning_rate": 2e-05, "loss": 0.04153121, "step": 22265 }, { "epoch": 44.532, "grad_norm": 1.5301804542541504, "learning_rate": 2e-05, "loss": 0.06744932, "step": 22266 }, { "epoch": 44.534, "grad_norm": 1.208066463470459, "learning_rate": 2e-05, "loss": 0.05154322, "step": 22267 }, { "epoch": 44.536, "grad_norm": 1.0892307758331299, "learning_rate": 2e-05, "loss": 0.03704853, "step": 22268 }, { "epoch": 44.538, "grad_norm": 1.0036585330963135, "learning_rate": 2e-05, "loss": 0.0423006, "step": 22269 }, { "epoch": 44.54, "grad_norm": 0.9021454453468323, "learning_rate": 2e-05, "loss": 0.02821821, "step": 22270 }, { "epoch": 44.542, "grad_norm": 1.2011229991912842, "learning_rate": 2e-05, "loss": 0.05255362, "step": 22271 }, { "epoch": 44.544, "grad_norm": 1.519547462463379, "learning_rate": 2e-05, "loss": 0.04982952, "step": 22272 }, { "epoch": 44.546, "grad_norm": 1.1644221544265747, "learning_rate": 2e-05, "loss": 0.04374713, "step": 22273 }, { "epoch": 44.548, "grad_norm": 1.0513334274291992, "learning_rate": 2e-05, "loss": 0.04468258, "step": 22274 }, { "epoch": 44.55, "grad_norm": 1.2599111795425415, "learning_rate": 2e-05, "loss": 0.06521084, "step": 22275 }, { "epoch": 44.552, "grad_norm": 1.6103324890136719, "learning_rate": 2e-05, "loss": 0.04291496, "step": 22276 }, { "epoch": 44.554, "grad_norm": 1.1199339628219604, "learning_rate": 2e-05, "loss": 0.03871044, "step": 22277 }, { "epoch": 44.556, "grad_norm": 1.2212353944778442, "learning_rate": 2e-05, "loss": 0.04675107, "step": 22278 }, { "epoch": 44.558, "grad_norm": 1.5701035261154175, "learning_rate": 2e-05, "loss": 0.05078992, "step": 22279 }, { "epoch": 44.56, "grad_norm": 1.0534440279006958, "learning_rate": 2e-05, "loss": 0.0456759, "step": 22280 }, { "epoch": 44.562, "grad_norm": 0.9268868565559387, "learning_rate": 2e-05, "loss": 0.03790492, "step": 22281 }, { "epoch": 44.564, "grad_norm": 1.5612784624099731, "learning_rate": 2e-05, "loss": 0.05247653, "step": 22282 }, { "epoch": 44.566, "grad_norm": 0.9932795763015747, "learning_rate": 2e-05, "loss": 0.03737234, "step": 22283 }, { "epoch": 44.568, "grad_norm": 1.1670511960983276, "learning_rate": 2e-05, "loss": 0.0481028, "step": 22284 }, { "epoch": 44.57, "grad_norm": 1.0124061107635498, "learning_rate": 2e-05, "loss": 0.03426702, "step": 22285 }, { "epoch": 44.572, "grad_norm": 1.0174399614334106, "learning_rate": 2e-05, "loss": 0.04280933, "step": 22286 }, { "epoch": 44.574, "grad_norm": 0.8174017667770386, "learning_rate": 2e-05, "loss": 0.03322933, "step": 22287 }, { "epoch": 44.576, "grad_norm": 1.3033250570297241, "learning_rate": 2e-05, "loss": 0.04733118, "step": 22288 }, { "epoch": 44.578, "grad_norm": 1.4559167623519897, "learning_rate": 2e-05, "loss": 0.046699, "step": 22289 }, { "epoch": 44.58, "grad_norm": 1.0813908576965332, "learning_rate": 2e-05, "loss": 0.04066889, "step": 22290 }, { "epoch": 44.582, "grad_norm": 0.8276329040527344, "learning_rate": 2e-05, "loss": 0.02443325, "step": 22291 }, { "epoch": 44.584, "grad_norm": 1.2571067810058594, "learning_rate": 2e-05, "loss": 0.03324332, "step": 22292 }, { "epoch": 44.586, "grad_norm": 1.1909873485565186, "learning_rate": 2e-05, "loss": 0.06143606, "step": 22293 }, { "epoch": 44.588, "grad_norm": 1.1068476438522339, "learning_rate": 2e-05, "loss": 0.04848798, "step": 22294 }, { "epoch": 44.59, "grad_norm": 0.899484395980835, "learning_rate": 2e-05, "loss": 0.02549462, "step": 22295 }, { "epoch": 44.592, "grad_norm": 1.0280879735946655, "learning_rate": 2e-05, "loss": 0.03539465, "step": 22296 }, { "epoch": 44.594, "grad_norm": 1.095839500427246, "learning_rate": 2e-05, "loss": 0.04799998, "step": 22297 }, { "epoch": 44.596, "grad_norm": 2.029635190963745, "learning_rate": 2e-05, "loss": 0.05961087, "step": 22298 }, { "epoch": 44.598, "grad_norm": 1.326757788658142, "learning_rate": 2e-05, "loss": 0.04216152, "step": 22299 }, { "epoch": 44.6, "grad_norm": 1.689661979675293, "learning_rate": 2e-05, "loss": 0.056648, "step": 22300 }, { "epoch": 44.602, "grad_norm": 0.9674505591392517, "learning_rate": 2e-05, "loss": 0.03306528, "step": 22301 }, { "epoch": 44.604, "grad_norm": 0.9252945780754089, "learning_rate": 2e-05, "loss": 0.03612683, "step": 22302 }, { "epoch": 44.606, "grad_norm": 1.9345459938049316, "learning_rate": 2e-05, "loss": 0.04596725, "step": 22303 }, { "epoch": 44.608, "grad_norm": 1.107701301574707, "learning_rate": 2e-05, "loss": 0.04526986, "step": 22304 }, { "epoch": 44.61, "grad_norm": 0.9936708211898804, "learning_rate": 2e-05, "loss": 0.04293675, "step": 22305 }, { "epoch": 44.612, "grad_norm": 1.0487710237503052, "learning_rate": 2e-05, "loss": 0.03803833, "step": 22306 }, { "epoch": 44.614, "grad_norm": 1.0231132507324219, "learning_rate": 2e-05, "loss": 0.04327705, "step": 22307 }, { "epoch": 44.616, "grad_norm": 0.9560285210609436, "learning_rate": 2e-05, "loss": 0.03382394, "step": 22308 }, { "epoch": 44.618, "grad_norm": 1.060267448425293, "learning_rate": 2e-05, "loss": 0.05201086, "step": 22309 }, { "epoch": 44.62, "grad_norm": 1.0428204536437988, "learning_rate": 2e-05, "loss": 0.04709801, "step": 22310 }, { "epoch": 44.622, "grad_norm": 1.2413737773895264, "learning_rate": 2e-05, "loss": 0.06882562, "step": 22311 }, { "epoch": 44.624, "grad_norm": 1.0623873472213745, "learning_rate": 2e-05, "loss": 0.03993951, "step": 22312 }, { "epoch": 44.626, "grad_norm": 1.2858834266662598, "learning_rate": 2e-05, "loss": 0.05642603, "step": 22313 }, { "epoch": 44.628, "grad_norm": 0.995250940322876, "learning_rate": 2e-05, "loss": 0.0377385, "step": 22314 }, { "epoch": 44.63, "grad_norm": 0.9684804677963257, "learning_rate": 2e-05, "loss": 0.03657473, "step": 22315 }, { "epoch": 44.632, "grad_norm": 1.1011205911636353, "learning_rate": 2e-05, "loss": 0.03600908, "step": 22316 }, { "epoch": 44.634, "grad_norm": 1.0236982107162476, "learning_rate": 2e-05, "loss": 0.04915296, "step": 22317 }, { "epoch": 44.636, "grad_norm": 0.9508784413337708, "learning_rate": 2e-05, "loss": 0.03333165, "step": 22318 }, { "epoch": 44.638, "grad_norm": 2.581618309020996, "learning_rate": 2e-05, "loss": 0.04577326, "step": 22319 }, { "epoch": 44.64, "grad_norm": 0.865103006362915, "learning_rate": 2e-05, "loss": 0.03204204, "step": 22320 }, { "epoch": 44.642, "grad_norm": 1.7910643815994263, "learning_rate": 2e-05, "loss": 0.05404741, "step": 22321 }, { "epoch": 44.644, "grad_norm": 1.5276849269866943, "learning_rate": 2e-05, "loss": 0.04781954, "step": 22322 }, { "epoch": 44.646, "grad_norm": 0.9655041098594666, "learning_rate": 2e-05, "loss": 0.02730069, "step": 22323 }, { "epoch": 44.648, "grad_norm": 1.3372278213500977, "learning_rate": 2e-05, "loss": 0.05218061, "step": 22324 }, { "epoch": 44.65, "grad_norm": 1.0904228687286377, "learning_rate": 2e-05, "loss": 0.03820439, "step": 22325 }, { "epoch": 44.652, "grad_norm": 1.405514121055603, "learning_rate": 2e-05, "loss": 0.0401639, "step": 22326 }, { "epoch": 44.654, "grad_norm": 2.7324392795562744, "learning_rate": 2e-05, "loss": 0.0484706, "step": 22327 }, { "epoch": 44.656, "grad_norm": 1.661473274230957, "learning_rate": 2e-05, "loss": 0.05770289, "step": 22328 }, { "epoch": 44.658, "grad_norm": 1.1218535900115967, "learning_rate": 2e-05, "loss": 0.05359188, "step": 22329 }, { "epoch": 44.66, "grad_norm": 1.7433900833129883, "learning_rate": 2e-05, "loss": 0.04724948, "step": 22330 }, { "epoch": 44.662, "grad_norm": 1.246673345565796, "learning_rate": 2e-05, "loss": 0.0573798, "step": 22331 }, { "epoch": 44.664, "grad_norm": 1.2429487705230713, "learning_rate": 2e-05, "loss": 0.040094, "step": 22332 }, { "epoch": 44.666, "grad_norm": 0.9677478075027466, "learning_rate": 2e-05, "loss": 0.04057613, "step": 22333 }, { "epoch": 44.668, "grad_norm": 0.9513575434684753, "learning_rate": 2e-05, "loss": 0.03855705, "step": 22334 }, { "epoch": 44.67, "grad_norm": 1.0048766136169434, "learning_rate": 2e-05, "loss": 0.03509572, "step": 22335 }, { "epoch": 44.672, "grad_norm": 0.9157565236091614, "learning_rate": 2e-05, "loss": 0.03610381, "step": 22336 }, { "epoch": 44.674, "grad_norm": 0.975386381149292, "learning_rate": 2e-05, "loss": 0.0354522, "step": 22337 }, { "epoch": 44.676, "grad_norm": 1.130433440208435, "learning_rate": 2e-05, "loss": 0.05053788, "step": 22338 }, { "epoch": 44.678, "grad_norm": 0.9550377130508423, "learning_rate": 2e-05, "loss": 0.03478336, "step": 22339 }, { "epoch": 44.68, "grad_norm": 1.0262507200241089, "learning_rate": 2e-05, "loss": 0.03937672, "step": 22340 }, { "epoch": 44.682, "grad_norm": 1.0736156702041626, "learning_rate": 2e-05, "loss": 0.03108214, "step": 22341 }, { "epoch": 44.684, "grad_norm": 1.5756748914718628, "learning_rate": 2e-05, "loss": 0.03586569, "step": 22342 }, { "epoch": 44.686, "grad_norm": 0.9697388410568237, "learning_rate": 2e-05, "loss": 0.03758947, "step": 22343 }, { "epoch": 44.688, "grad_norm": 1.1279771327972412, "learning_rate": 2e-05, "loss": 0.05336948, "step": 22344 }, { "epoch": 44.69, "grad_norm": 1.3549916744232178, "learning_rate": 2e-05, "loss": 0.05845712, "step": 22345 }, { "epoch": 44.692, "grad_norm": 1.2117455005645752, "learning_rate": 2e-05, "loss": 0.03689345, "step": 22346 }, { "epoch": 44.694, "grad_norm": 1.0066114664077759, "learning_rate": 2e-05, "loss": 0.03682059, "step": 22347 }, { "epoch": 44.696, "grad_norm": 1.1049946546554565, "learning_rate": 2e-05, "loss": 0.03354327, "step": 22348 }, { "epoch": 44.698, "grad_norm": 1.0396767854690552, "learning_rate": 2e-05, "loss": 0.04869078, "step": 22349 }, { "epoch": 44.7, "grad_norm": 0.9242196679115295, "learning_rate": 2e-05, "loss": 0.03528931, "step": 22350 }, { "epoch": 44.702, "grad_norm": 1.1391725540161133, "learning_rate": 2e-05, "loss": 0.03417369, "step": 22351 }, { "epoch": 44.704, "grad_norm": 0.8730292320251465, "learning_rate": 2e-05, "loss": 0.0294034, "step": 22352 }, { "epoch": 44.706, "grad_norm": 1.0678536891937256, "learning_rate": 2e-05, "loss": 0.04340443, "step": 22353 }, { "epoch": 44.708, "grad_norm": 1.4122169017791748, "learning_rate": 2e-05, "loss": 0.03654358, "step": 22354 }, { "epoch": 44.71, "grad_norm": 1.0455156564712524, "learning_rate": 2e-05, "loss": 0.05023683, "step": 22355 }, { "epoch": 44.712, "grad_norm": 2.5826010704040527, "learning_rate": 2e-05, "loss": 0.05081033, "step": 22356 }, { "epoch": 44.714, "grad_norm": 0.9565540552139282, "learning_rate": 2e-05, "loss": 0.0369923, "step": 22357 }, { "epoch": 44.716, "grad_norm": 0.9309918880462646, "learning_rate": 2e-05, "loss": 0.03079583, "step": 22358 }, { "epoch": 44.718, "grad_norm": 1.9977333545684814, "learning_rate": 2e-05, "loss": 0.04587205, "step": 22359 }, { "epoch": 44.72, "grad_norm": 1.2527209520339966, "learning_rate": 2e-05, "loss": 0.05882639, "step": 22360 }, { "epoch": 44.722, "grad_norm": 0.9152764678001404, "learning_rate": 2e-05, "loss": 0.03343434, "step": 22361 }, { "epoch": 44.724, "grad_norm": 1.2569345235824585, "learning_rate": 2e-05, "loss": 0.04354488, "step": 22362 }, { "epoch": 44.726, "grad_norm": 0.962058424949646, "learning_rate": 2e-05, "loss": 0.03416631, "step": 22363 }, { "epoch": 44.728, "grad_norm": 0.9565489292144775, "learning_rate": 2e-05, "loss": 0.03315727, "step": 22364 }, { "epoch": 44.73, "grad_norm": 2.5995280742645264, "learning_rate": 2e-05, "loss": 0.05633999, "step": 22365 }, { "epoch": 44.732, "grad_norm": 1.0589574575424194, "learning_rate": 2e-05, "loss": 0.04468869, "step": 22366 }, { "epoch": 44.734, "grad_norm": 1.0586923360824585, "learning_rate": 2e-05, "loss": 0.04303212, "step": 22367 }, { "epoch": 44.736, "grad_norm": 1.2594135999679565, "learning_rate": 2e-05, "loss": 0.05090232, "step": 22368 }, { "epoch": 44.738, "grad_norm": 1.101635217666626, "learning_rate": 2e-05, "loss": 0.04898684, "step": 22369 }, { "epoch": 44.74, "grad_norm": 0.9810888171195984, "learning_rate": 2e-05, "loss": 0.03836356, "step": 22370 }, { "epoch": 44.742, "grad_norm": 1.3980979919433594, "learning_rate": 2e-05, "loss": 0.05007745, "step": 22371 }, { "epoch": 44.744, "grad_norm": 1.0829483270645142, "learning_rate": 2e-05, "loss": 0.0448014, "step": 22372 }, { "epoch": 44.746, "grad_norm": 1.7258037328720093, "learning_rate": 2e-05, "loss": 0.04841124, "step": 22373 }, { "epoch": 44.748, "grad_norm": 3.515979051589966, "learning_rate": 2e-05, "loss": 0.05872615, "step": 22374 }, { "epoch": 44.75, "grad_norm": 1.1168161630630493, "learning_rate": 2e-05, "loss": 0.0367819, "step": 22375 }, { "epoch": 44.752, "grad_norm": 1.4257042407989502, "learning_rate": 2e-05, "loss": 0.056124, "step": 22376 }, { "epoch": 44.754, "grad_norm": 0.9796735048294067, "learning_rate": 2e-05, "loss": 0.03417499, "step": 22377 }, { "epoch": 44.756, "grad_norm": 3.7734193801879883, "learning_rate": 2e-05, "loss": 0.05913823, "step": 22378 }, { "epoch": 44.758, "grad_norm": 0.8969755172729492, "learning_rate": 2e-05, "loss": 0.03003871, "step": 22379 }, { "epoch": 44.76, "grad_norm": 1.400591492652893, "learning_rate": 2e-05, "loss": 0.03813247, "step": 22380 }, { "epoch": 44.762, "grad_norm": 0.9112294912338257, "learning_rate": 2e-05, "loss": 0.02693655, "step": 22381 }, { "epoch": 44.764, "grad_norm": 1.0678571462631226, "learning_rate": 2e-05, "loss": 0.01864264, "step": 22382 }, { "epoch": 44.766, "grad_norm": 0.984465479850769, "learning_rate": 2e-05, "loss": 0.02944342, "step": 22383 }, { "epoch": 44.768, "grad_norm": 1.4391398429870605, "learning_rate": 2e-05, "loss": 0.05334909, "step": 22384 }, { "epoch": 44.77, "grad_norm": 1.0842714309692383, "learning_rate": 2e-05, "loss": 0.03291496, "step": 22385 }, { "epoch": 44.772, "grad_norm": 0.9750580787658691, "learning_rate": 2e-05, "loss": 0.03178076, "step": 22386 }, { "epoch": 44.774, "grad_norm": 1.6174362897872925, "learning_rate": 2e-05, "loss": 0.03959208, "step": 22387 }, { "epoch": 44.776, "grad_norm": 1.139848232269287, "learning_rate": 2e-05, "loss": 0.03738019, "step": 22388 }, { "epoch": 44.778, "grad_norm": 1.7880958318710327, "learning_rate": 2e-05, "loss": 0.05709739, "step": 22389 }, { "epoch": 44.78, "grad_norm": 0.9525642991065979, "learning_rate": 2e-05, "loss": 0.03777033, "step": 22390 }, { "epoch": 44.782, "grad_norm": 1.1282070875167847, "learning_rate": 2e-05, "loss": 0.0503444, "step": 22391 }, { "epoch": 44.784, "grad_norm": 1.1139459609985352, "learning_rate": 2e-05, "loss": 0.0403349, "step": 22392 }, { "epoch": 44.786, "grad_norm": 1.112560749053955, "learning_rate": 2e-05, "loss": 0.04720819, "step": 22393 }, { "epoch": 44.788, "grad_norm": 1.0122205018997192, "learning_rate": 2e-05, "loss": 0.03305869, "step": 22394 }, { "epoch": 44.79, "grad_norm": 2.323425054550171, "learning_rate": 2e-05, "loss": 0.05507622, "step": 22395 }, { "epoch": 44.792, "grad_norm": 1.0494868755340576, "learning_rate": 2e-05, "loss": 0.04509784, "step": 22396 }, { "epoch": 44.794, "grad_norm": 2.270958423614502, "learning_rate": 2e-05, "loss": 0.03690244, "step": 22397 }, { "epoch": 44.796, "grad_norm": 1.0795804262161255, "learning_rate": 2e-05, "loss": 0.05281503, "step": 22398 }, { "epoch": 44.798, "grad_norm": 1.2804744243621826, "learning_rate": 2e-05, "loss": 0.03850177, "step": 22399 }, { "epoch": 44.8, "grad_norm": 1.1747266054153442, "learning_rate": 2e-05, "loss": 0.04706708, "step": 22400 }, { "epoch": 44.802, "grad_norm": 1.9693273305892944, "learning_rate": 2e-05, "loss": 0.06883971, "step": 22401 }, { "epoch": 44.804, "grad_norm": 0.8730043172836304, "learning_rate": 2e-05, "loss": 0.02741276, "step": 22402 }, { "epoch": 44.806, "grad_norm": 0.9706993103027344, "learning_rate": 2e-05, "loss": 0.03180061, "step": 22403 }, { "epoch": 44.808, "grad_norm": 1.0679796934127808, "learning_rate": 2e-05, "loss": 0.03563013, "step": 22404 }, { "epoch": 44.81, "grad_norm": 0.9856441020965576, "learning_rate": 2e-05, "loss": 0.04094358, "step": 22405 }, { "epoch": 44.812, "grad_norm": 1.4911634922027588, "learning_rate": 2e-05, "loss": 0.03347642, "step": 22406 }, { "epoch": 44.814, "grad_norm": 1.1564414501190186, "learning_rate": 2e-05, "loss": 0.03856932, "step": 22407 }, { "epoch": 44.816, "grad_norm": 0.9815131425857544, "learning_rate": 2e-05, "loss": 0.03464168, "step": 22408 }, { "epoch": 44.818, "grad_norm": 0.8881816267967224, "learning_rate": 2e-05, "loss": 0.02569991, "step": 22409 }, { "epoch": 44.82, "grad_norm": 1.2676564455032349, "learning_rate": 2e-05, "loss": 0.05067092, "step": 22410 }, { "epoch": 44.822, "grad_norm": 1.1654129028320312, "learning_rate": 2e-05, "loss": 0.04802899, "step": 22411 }, { "epoch": 44.824, "grad_norm": 0.9606305360794067, "learning_rate": 2e-05, "loss": 0.03352612, "step": 22412 }, { "epoch": 44.826, "grad_norm": 1.068911075592041, "learning_rate": 2e-05, "loss": 0.04454533, "step": 22413 }, { "epoch": 44.828, "grad_norm": 0.9555947184562683, "learning_rate": 2e-05, "loss": 0.03919222, "step": 22414 }, { "epoch": 44.83, "grad_norm": 0.8904321193695068, "learning_rate": 2e-05, "loss": 0.02778956, "step": 22415 }, { "epoch": 44.832, "grad_norm": 1.1577892303466797, "learning_rate": 2e-05, "loss": 0.04118215, "step": 22416 }, { "epoch": 44.834, "grad_norm": 1.2163898944854736, "learning_rate": 2e-05, "loss": 0.03865066, "step": 22417 }, { "epoch": 44.836, "grad_norm": 1.1335779428482056, "learning_rate": 2e-05, "loss": 0.04228361, "step": 22418 }, { "epoch": 44.838, "grad_norm": 1.1965153217315674, "learning_rate": 2e-05, "loss": 0.04404308, "step": 22419 }, { "epoch": 44.84, "grad_norm": 1.1963372230529785, "learning_rate": 2e-05, "loss": 0.03975984, "step": 22420 }, { "epoch": 44.842, "grad_norm": 1.108413577079773, "learning_rate": 2e-05, "loss": 0.05062433, "step": 22421 }, { "epoch": 44.844, "grad_norm": 1.1551628112792969, "learning_rate": 2e-05, "loss": 0.04348875, "step": 22422 }, { "epoch": 44.846, "grad_norm": 0.9485704898834229, "learning_rate": 2e-05, "loss": 0.0289953, "step": 22423 }, { "epoch": 44.848, "grad_norm": 1.0071600675582886, "learning_rate": 2e-05, "loss": 0.03718989, "step": 22424 }, { "epoch": 44.85, "grad_norm": 1.1458314657211304, "learning_rate": 2e-05, "loss": 0.03177065, "step": 22425 }, { "epoch": 44.852, "grad_norm": 1.778181791305542, "learning_rate": 2e-05, "loss": 0.05281895, "step": 22426 }, { "epoch": 44.854, "grad_norm": 1.296013593673706, "learning_rate": 2e-05, "loss": 0.04177289, "step": 22427 }, { "epoch": 44.856, "grad_norm": 1.199934720993042, "learning_rate": 2e-05, "loss": 0.03374371, "step": 22428 }, { "epoch": 44.858, "grad_norm": 0.9594781398773193, "learning_rate": 2e-05, "loss": 0.03046091, "step": 22429 }, { "epoch": 44.86, "grad_norm": 1.276280164718628, "learning_rate": 2e-05, "loss": 0.06274056, "step": 22430 }, { "epoch": 44.862, "grad_norm": 1.0076258182525635, "learning_rate": 2e-05, "loss": 0.0389448, "step": 22431 }, { "epoch": 44.864, "grad_norm": 1.3280401229858398, "learning_rate": 2e-05, "loss": 0.04889119, "step": 22432 }, { "epoch": 44.866, "grad_norm": 1.1819334030151367, "learning_rate": 2e-05, "loss": 0.04417578, "step": 22433 }, { "epoch": 44.868, "grad_norm": 1.2220628261566162, "learning_rate": 2e-05, "loss": 0.05297129, "step": 22434 }, { "epoch": 44.87, "grad_norm": 1.032162070274353, "learning_rate": 2e-05, "loss": 0.04856939, "step": 22435 }, { "epoch": 44.872, "grad_norm": 1.0263824462890625, "learning_rate": 2e-05, "loss": 0.02882082, "step": 22436 }, { "epoch": 44.874, "grad_norm": 1.4182676076889038, "learning_rate": 2e-05, "loss": 0.05373186, "step": 22437 }, { "epoch": 44.876, "grad_norm": 1.0512267351150513, "learning_rate": 2e-05, "loss": 0.04678575, "step": 22438 }, { "epoch": 44.878, "grad_norm": 2.560997247695923, "learning_rate": 2e-05, "loss": 0.03201003, "step": 22439 }, { "epoch": 44.88, "grad_norm": 1.5839498043060303, "learning_rate": 2e-05, "loss": 0.05814524, "step": 22440 }, { "epoch": 44.882, "grad_norm": 1.0000758171081543, "learning_rate": 2e-05, "loss": 0.03928522, "step": 22441 }, { "epoch": 44.884, "grad_norm": 1.0724201202392578, "learning_rate": 2e-05, "loss": 0.04862811, "step": 22442 }, { "epoch": 44.886, "grad_norm": 0.8467284440994263, "learning_rate": 2e-05, "loss": 0.02656439, "step": 22443 }, { "epoch": 44.888, "grad_norm": 1.129129409790039, "learning_rate": 2e-05, "loss": 0.0334824, "step": 22444 }, { "epoch": 44.89, "grad_norm": 0.8156976699829102, "learning_rate": 2e-05, "loss": 0.02706487, "step": 22445 }, { "epoch": 44.892, "grad_norm": 1.7276281118392944, "learning_rate": 2e-05, "loss": 0.04316651, "step": 22446 }, { "epoch": 44.894, "grad_norm": 1.261051058769226, "learning_rate": 2e-05, "loss": 0.04908384, "step": 22447 }, { "epoch": 44.896, "grad_norm": 1.1480718851089478, "learning_rate": 2e-05, "loss": 0.03944552, "step": 22448 }, { "epoch": 44.898, "grad_norm": 1.590351939201355, "learning_rate": 2e-05, "loss": 0.04764176, "step": 22449 }, { "epoch": 44.9, "grad_norm": 1.1975303888320923, "learning_rate": 2e-05, "loss": 0.04468657, "step": 22450 }, { "epoch": 44.902, "grad_norm": 1.2141342163085938, "learning_rate": 2e-05, "loss": 0.03355991, "step": 22451 }, { "epoch": 44.904, "grad_norm": 1.0241870880126953, "learning_rate": 2e-05, "loss": 0.03846227, "step": 22452 }, { "epoch": 44.906, "grad_norm": 1.4084351062774658, "learning_rate": 2e-05, "loss": 0.05193393, "step": 22453 }, { "epoch": 44.908, "grad_norm": 1.1747103929519653, "learning_rate": 2e-05, "loss": 0.05403937, "step": 22454 }, { "epoch": 44.91, "grad_norm": 1.2485325336456299, "learning_rate": 2e-05, "loss": 0.05346002, "step": 22455 }, { "epoch": 44.912, "grad_norm": 0.9821465015411377, "learning_rate": 2e-05, "loss": 0.0362882, "step": 22456 }, { "epoch": 44.914, "grad_norm": 1.2896426916122437, "learning_rate": 2e-05, "loss": 0.04576202, "step": 22457 }, { "epoch": 44.916, "grad_norm": 0.9823945164680481, "learning_rate": 2e-05, "loss": 0.04252811, "step": 22458 }, { "epoch": 44.918, "grad_norm": 1.187057375907898, "learning_rate": 2e-05, "loss": 0.0504555, "step": 22459 }, { "epoch": 44.92, "grad_norm": 1.3347331285476685, "learning_rate": 2e-05, "loss": 0.04351444, "step": 22460 }, { "epoch": 44.922, "grad_norm": 1.4206515550613403, "learning_rate": 2e-05, "loss": 0.02593698, "step": 22461 }, { "epoch": 44.924, "grad_norm": 0.9729888439178467, "learning_rate": 2e-05, "loss": 0.05103148, "step": 22462 }, { "epoch": 44.926, "grad_norm": 1.5794564485549927, "learning_rate": 2e-05, "loss": 0.05536435, "step": 22463 }, { "epoch": 44.928, "grad_norm": 1.0293936729431152, "learning_rate": 2e-05, "loss": 0.03708414, "step": 22464 }, { "epoch": 44.93, "grad_norm": 4.012156009674072, "learning_rate": 2e-05, "loss": 0.06015166, "step": 22465 }, { "epoch": 44.932, "grad_norm": 1.142695665359497, "learning_rate": 2e-05, "loss": 0.05368748, "step": 22466 }, { "epoch": 44.934, "grad_norm": 1.0002529621124268, "learning_rate": 2e-05, "loss": 0.03308527, "step": 22467 }, { "epoch": 44.936, "grad_norm": 2.2301363945007324, "learning_rate": 2e-05, "loss": 0.04753839, "step": 22468 }, { "epoch": 44.938, "grad_norm": 1.0900908708572388, "learning_rate": 2e-05, "loss": 0.03648573, "step": 22469 }, { "epoch": 44.94, "grad_norm": 1.2102247476577759, "learning_rate": 2e-05, "loss": 0.05175774, "step": 22470 }, { "epoch": 44.942, "grad_norm": 1.1298000812530518, "learning_rate": 2e-05, "loss": 0.04414134, "step": 22471 }, { "epoch": 44.944, "grad_norm": 1.5387704372406006, "learning_rate": 2e-05, "loss": 0.05122928, "step": 22472 }, { "epoch": 44.946, "grad_norm": 1.352080225944519, "learning_rate": 2e-05, "loss": 0.04572622, "step": 22473 }, { "epoch": 44.948, "grad_norm": 0.9292472004890442, "learning_rate": 2e-05, "loss": 0.0358121, "step": 22474 }, { "epoch": 44.95, "grad_norm": 1.2184200286865234, "learning_rate": 2e-05, "loss": 0.05341076, "step": 22475 }, { "epoch": 44.952, "grad_norm": 1.1083459854125977, "learning_rate": 2e-05, "loss": 0.03161187, "step": 22476 }, { "epoch": 44.954, "grad_norm": 1.0209262371063232, "learning_rate": 2e-05, "loss": 0.04262392, "step": 22477 }, { "epoch": 44.956, "grad_norm": 1.9002419710159302, "learning_rate": 2e-05, "loss": 0.03795274, "step": 22478 }, { "epoch": 44.958, "grad_norm": 0.9931967854499817, "learning_rate": 2e-05, "loss": 0.04694855, "step": 22479 }, { "epoch": 44.96, "grad_norm": 1.1245759725570679, "learning_rate": 2e-05, "loss": 0.04808666, "step": 22480 }, { "epoch": 44.962, "grad_norm": 1.1964689493179321, "learning_rate": 2e-05, "loss": 0.0462534, "step": 22481 }, { "epoch": 44.964, "grad_norm": 1.1455614566802979, "learning_rate": 2e-05, "loss": 0.04598229, "step": 22482 }, { "epoch": 44.966, "grad_norm": 1.021041989326477, "learning_rate": 2e-05, "loss": 0.03521863, "step": 22483 }, { "epoch": 44.968, "grad_norm": 1.055078148841858, "learning_rate": 2e-05, "loss": 0.03188962, "step": 22484 }, { "epoch": 44.97, "grad_norm": 1.0768845081329346, "learning_rate": 2e-05, "loss": 0.05136817, "step": 22485 }, { "epoch": 44.972, "grad_norm": 2.1760170459747314, "learning_rate": 2e-05, "loss": 0.03863563, "step": 22486 }, { "epoch": 44.974, "grad_norm": 0.8704553246498108, "learning_rate": 2e-05, "loss": 0.02656063, "step": 22487 }, { "epoch": 44.976, "grad_norm": 1.0095819234848022, "learning_rate": 2e-05, "loss": 0.03248763, "step": 22488 }, { "epoch": 44.978, "grad_norm": 0.8135930895805359, "learning_rate": 2e-05, "loss": 0.02239985, "step": 22489 }, { "epoch": 44.98, "grad_norm": 1.160813808441162, "learning_rate": 2e-05, "loss": 0.03367274, "step": 22490 }, { "epoch": 44.982, "grad_norm": 0.8896259665489197, "learning_rate": 2e-05, "loss": 0.02435785, "step": 22491 }, { "epoch": 44.984, "grad_norm": 1.8309701681137085, "learning_rate": 2e-05, "loss": 0.05061591, "step": 22492 }, { "epoch": 44.986, "grad_norm": 1.2446043491363525, "learning_rate": 2e-05, "loss": 0.03834666, "step": 22493 }, { "epoch": 44.988, "grad_norm": 1.1362097263336182, "learning_rate": 2e-05, "loss": 0.04740632, "step": 22494 }, { "epoch": 44.99, "grad_norm": 1.0524965524673462, "learning_rate": 2e-05, "loss": 0.040223, "step": 22495 }, { "epoch": 44.992, "grad_norm": 0.9642549753189087, "learning_rate": 2e-05, "loss": 0.02992929, "step": 22496 }, { "epoch": 44.994, "grad_norm": 0.913936197757721, "learning_rate": 2e-05, "loss": 0.03224386, "step": 22497 }, { "epoch": 44.996, "grad_norm": 1.2856546640396118, "learning_rate": 2e-05, "loss": 0.04659765, "step": 22498 }, { "epoch": 44.998, "grad_norm": 1.5435144901275635, "learning_rate": 2e-05, "loss": 0.05536109, "step": 22499 }, { "epoch": 45.0, "grad_norm": 0.9943864345550537, "learning_rate": 2e-05, "loss": 0.03740847, "step": 22500 }, { "epoch": 45.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.994, "Equal_2": 0.9760479041916168, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.996, "Perpendicular_2": 0.998, "Perpendicular_3": 0.8957915831663327, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 0.9956666666666667, "PointLiesOnCircle_3": 0.992, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 225.022, "eval_samples_per_second": 46.662, "eval_steps_per_second": 0.933, "step": 22500 }, { "epoch": 45.002, "grad_norm": 1.3969717025756836, "learning_rate": 2e-05, "loss": 0.06817479, "step": 22501 }, { "epoch": 45.004, "grad_norm": 0.8960119485855103, "learning_rate": 2e-05, "loss": 0.03126392, "step": 22502 }, { "epoch": 45.006, "grad_norm": 0.9075344204902649, "learning_rate": 2e-05, "loss": 0.03196636, "step": 22503 }, { "epoch": 45.008, "grad_norm": 1.4471746683120728, "learning_rate": 2e-05, "loss": 0.04853083, "step": 22504 }, { "epoch": 45.01, "grad_norm": 1.1390432119369507, "learning_rate": 2e-05, "loss": 0.03473972, "step": 22505 }, { "epoch": 45.012, "grad_norm": 2.580009937286377, "learning_rate": 2e-05, "loss": 0.03911752, "step": 22506 }, { "epoch": 45.014, "grad_norm": 1.12912118434906, "learning_rate": 2e-05, "loss": 0.04563247, "step": 22507 }, { "epoch": 45.016, "grad_norm": 1.2612600326538086, "learning_rate": 2e-05, "loss": 0.05117512, "step": 22508 }, { "epoch": 45.018, "grad_norm": 1.2428263425827026, "learning_rate": 2e-05, "loss": 0.04447511, "step": 22509 }, { "epoch": 45.02, "grad_norm": 1.2652740478515625, "learning_rate": 2e-05, "loss": 0.06364898, "step": 22510 }, { "epoch": 45.022, "grad_norm": 1.683158040046692, "learning_rate": 2e-05, "loss": 0.03643538, "step": 22511 }, { "epoch": 45.024, "grad_norm": 1.0766136646270752, "learning_rate": 2e-05, "loss": 0.04969937, "step": 22512 }, { "epoch": 45.026, "grad_norm": 0.9929112792015076, "learning_rate": 2e-05, "loss": 0.04057132, "step": 22513 }, { "epoch": 45.028, "grad_norm": 0.9966371059417725, "learning_rate": 2e-05, "loss": 0.03701176, "step": 22514 }, { "epoch": 45.03, "grad_norm": 1.1576004028320312, "learning_rate": 2e-05, "loss": 0.05128994, "step": 22515 }, { "epoch": 45.032, "grad_norm": 1.084555983543396, "learning_rate": 2e-05, "loss": 0.04959207, "step": 22516 }, { "epoch": 45.034, "grad_norm": 1.3754557371139526, "learning_rate": 2e-05, "loss": 0.05709638, "step": 22517 }, { "epoch": 45.036, "grad_norm": 1.0302624702453613, "learning_rate": 2e-05, "loss": 0.03493063, "step": 22518 }, { "epoch": 45.038, "grad_norm": 0.9910299777984619, "learning_rate": 2e-05, "loss": 0.03679488, "step": 22519 }, { "epoch": 45.04, "grad_norm": 2.000528573989868, "learning_rate": 2e-05, "loss": 0.0474072, "step": 22520 }, { "epoch": 45.042, "grad_norm": 1.0020743608474731, "learning_rate": 2e-05, "loss": 0.04147084, "step": 22521 }, { "epoch": 45.044, "grad_norm": 0.9819155335426331, "learning_rate": 2e-05, "loss": 0.0447064, "step": 22522 }, { "epoch": 45.046, "grad_norm": 1.2244595289230347, "learning_rate": 2e-05, "loss": 0.04908337, "step": 22523 }, { "epoch": 45.048, "grad_norm": 0.9420642256736755, "learning_rate": 2e-05, "loss": 0.03659626, "step": 22524 }, { "epoch": 45.05, "grad_norm": 1.4613120555877686, "learning_rate": 2e-05, "loss": 0.06458926, "step": 22525 }, { "epoch": 45.052, "grad_norm": 1.024276614189148, "learning_rate": 2e-05, "loss": 0.03932497, "step": 22526 }, { "epoch": 45.054, "grad_norm": 1.5050458908081055, "learning_rate": 2e-05, "loss": 0.05850245, "step": 22527 }, { "epoch": 45.056, "grad_norm": 1.070955514907837, "learning_rate": 2e-05, "loss": 0.03962543, "step": 22528 }, { "epoch": 45.058, "grad_norm": 1.0784193277359009, "learning_rate": 2e-05, "loss": 0.04383793, "step": 22529 }, { "epoch": 45.06, "grad_norm": 2.5958659648895264, "learning_rate": 2e-05, "loss": 0.05419977, "step": 22530 }, { "epoch": 45.062, "grad_norm": 1.199507713317871, "learning_rate": 2e-05, "loss": 0.04987177, "step": 22531 }, { "epoch": 45.064, "grad_norm": 1.723249912261963, "learning_rate": 2e-05, "loss": 0.04738272, "step": 22532 }, { "epoch": 45.066, "grad_norm": 1.023488998413086, "learning_rate": 2e-05, "loss": 0.04166118, "step": 22533 }, { "epoch": 45.068, "grad_norm": 1.512513279914856, "learning_rate": 2e-05, "loss": 0.03430131, "step": 22534 }, { "epoch": 45.07, "grad_norm": 1.0386521816253662, "learning_rate": 2e-05, "loss": 0.04658692, "step": 22535 }, { "epoch": 45.072, "grad_norm": 1.1373211145401, "learning_rate": 2e-05, "loss": 0.05163994, "step": 22536 }, { "epoch": 45.074, "grad_norm": 1.3114489316940308, "learning_rate": 2e-05, "loss": 0.03155287, "step": 22537 }, { "epoch": 45.076, "grad_norm": 1.6281450986862183, "learning_rate": 2e-05, "loss": 0.03621594, "step": 22538 }, { "epoch": 45.078, "grad_norm": 1.0148260593414307, "learning_rate": 2e-05, "loss": 0.03990772, "step": 22539 }, { "epoch": 45.08, "grad_norm": 0.987768292427063, "learning_rate": 2e-05, "loss": 0.03295402, "step": 22540 }, { "epoch": 45.082, "grad_norm": 0.870937705039978, "learning_rate": 2e-05, "loss": 0.02919325, "step": 22541 }, { "epoch": 45.084, "grad_norm": 1.264768123626709, "learning_rate": 2e-05, "loss": 0.05428372, "step": 22542 }, { "epoch": 45.086, "grad_norm": 1.0479131937026978, "learning_rate": 2e-05, "loss": 0.05013015, "step": 22543 }, { "epoch": 45.088, "grad_norm": 0.9399276375770569, "learning_rate": 2e-05, "loss": 0.03904264, "step": 22544 }, { "epoch": 45.09, "grad_norm": 0.953085720539093, "learning_rate": 2e-05, "loss": 0.03690369, "step": 22545 }, { "epoch": 45.092, "grad_norm": 1.0929187536239624, "learning_rate": 2e-05, "loss": 0.04805703, "step": 22546 }, { "epoch": 45.094, "grad_norm": 1.1775532960891724, "learning_rate": 2e-05, "loss": 0.03257169, "step": 22547 }, { "epoch": 45.096, "grad_norm": 1.3686949014663696, "learning_rate": 2e-05, "loss": 0.04788046, "step": 22548 }, { "epoch": 45.098, "grad_norm": 1.2574232816696167, "learning_rate": 2e-05, "loss": 0.04393671, "step": 22549 }, { "epoch": 45.1, "grad_norm": 1.0173851251602173, "learning_rate": 2e-05, "loss": 0.04385318, "step": 22550 }, { "epoch": 45.102, "grad_norm": 1.1309326887130737, "learning_rate": 2e-05, "loss": 0.02719438, "step": 22551 }, { "epoch": 45.104, "grad_norm": 1.853333830833435, "learning_rate": 2e-05, "loss": 0.05166949, "step": 22552 }, { "epoch": 45.106, "grad_norm": 1.0070621967315674, "learning_rate": 2e-05, "loss": 0.03831605, "step": 22553 }, { "epoch": 45.108, "grad_norm": 1.4704504013061523, "learning_rate": 2e-05, "loss": 0.05314073, "step": 22554 }, { "epoch": 45.11, "grad_norm": 1.0346410274505615, "learning_rate": 2e-05, "loss": 0.04530941, "step": 22555 }, { "epoch": 45.112, "grad_norm": 1.087776780128479, "learning_rate": 2e-05, "loss": 0.03647161, "step": 22556 }, { "epoch": 45.114, "grad_norm": 0.8761861324310303, "learning_rate": 2e-05, "loss": 0.03212115, "step": 22557 }, { "epoch": 45.116, "grad_norm": 0.972069501876831, "learning_rate": 2e-05, "loss": 0.04109576, "step": 22558 }, { "epoch": 45.118, "grad_norm": 1.0920774936676025, "learning_rate": 2e-05, "loss": 0.04330002, "step": 22559 }, { "epoch": 45.12, "grad_norm": 1.1553515195846558, "learning_rate": 2e-05, "loss": 0.04564039, "step": 22560 }, { "epoch": 45.122, "grad_norm": 1.8114463090896606, "learning_rate": 2e-05, "loss": 0.04312771, "step": 22561 }, { "epoch": 45.124, "grad_norm": 0.9037060141563416, "learning_rate": 2e-05, "loss": 0.03658786, "step": 22562 }, { "epoch": 45.126, "grad_norm": 1.7182328701019287, "learning_rate": 2e-05, "loss": 0.05565553, "step": 22563 }, { "epoch": 45.128, "grad_norm": 0.8934664130210876, "learning_rate": 2e-05, "loss": 0.03438888, "step": 22564 }, { "epoch": 45.13, "grad_norm": 0.9855318665504456, "learning_rate": 2e-05, "loss": 0.04035582, "step": 22565 }, { "epoch": 45.132, "grad_norm": 0.9053523540496826, "learning_rate": 2e-05, "loss": 0.03289046, "step": 22566 }, { "epoch": 45.134, "grad_norm": 1.134564757347107, "learning_rate": 2e-05, "loss": 0.0546699, "step": 22567 }, { "epoch": 45.136, "grad_norm": 1.726763129234314, "learning_rate": 2e-05, "loss": 0.04333213, "step": 22568 }, { "epoch": 45.138, "grad_norm": 1.6346923112869263, "learning_rate": 2e-05, "loss": 0.04667462, "step": 22569 }, { "epoch": 45.14, "grad_norm": 2.1227314472198486, "learning_rate": 2e-05, "loss": 0.04433148, "step": 22570 }, { "epoch": 45.142, "grad_norm": 1.1375401020050049, "learning_rate": 2e-05, "loss": 0.05206484, "step": 22571 }, { "epoch": 45.144, "grad_norm": 1.4685722589492798, "learning_rate": 2e-05, "loss": 0.02864407, "step": 22572 }, { "epoch": 45.146, "grad_norm": 1.1892337799072266, "learning_rate": 2e-05, "loss": 0.05431047, "step": 22573 }, { "epoch": 45.148, "grad_norm": 1.5828921794891357, "learning_rate": 2e-05, "loss": 0.04674748, "step": 22574 }, { "epoch": 45.15, "grad_norm": 1.1374571323394775, "learning_rate": 2e-05, "loss": 0.04989852, "step": 22575 }, { "epoch": 45.152, "grad_norm": 1.0103979110717773, "learning_rate": 2e-05, "loss": 0.03697056, "step": 22576 }, { "epoch": 45.154, "grad_norm": 1.0518049001693726, "learning_rate": 2e-05, "loss": 0.04566965, "step": 22577 }, { "epoch": 45.156, "grad_norm": 1.245856523513794, "learning_rate": 2e-05, "loss": 0.05060218, "step": 22578 }, { "epoch": 45.158, "grad_norm": 1.1464107036590576, "learning_rate": 2e-05, "loss": 0.04743565, "step": 22579 }, { "epoch": 45.16, "grad_norm": 2.3893489837646484, "learning_rate": 2e-05, "loss": 0.03963726, "step": 22580 }, { "epoch": 45.162, "grad_norm": 1.7832940816879272, "learning_rate": 2e-05, "loss": 0.04793505, "step": 22581 }, { "epoch": 45.164, "grad_norm": 1.2135182619094849, "learning_rate": 2e-05, "loss": 0.0547461, "step": 22582 }, { "epoch": 45.166, "grad_norm": 1.2692515850067139, "learning_rate": 2e-05, "loss": 0.04469053, "step": 22583 }, { "epoch": 45.168, "grad_norm": 0.9289724230766296, "learning_rate": 2e-05, "loss": 0.03659691, "step": 22584 }, { "epoch": 45.17, "grad_norm": 1.0520449876785278, "learning_rate": 2e-05, "loss": 0.04413296, "step": 22585 }, { "epoch": 45.172, "grad_norm": 1.5497771501541138, "learning_rate": 2e-05, "loss": 0.04320464, "step": 22586 }, { "epoch": 45.174, "grad_norm": 1.374786138534546, "learning_rate": 2e-05, "loss": 0.06070436, "step": 22587 }, { "epoch": 45.176, "grad_norm": 1.1327978372573853, "learning_rate": 2e-05, "loss": 0.03812924, "step": 22588 }, { "epoch": 45.178, "grad_norm": 1.0761662721633911, "learning_rate": 2e-05, "loss": 0.0392059, "step": 22589 }, { "epoch": 45.18, "grad_norm": 0.956294596195221, "learning_rate": 2e-05, "loss": 0.03747541, "step": 22590 }, { "epoch": 45.182, "grad_norm": 4.199977874755859, "learning_rate": 2e-05, "loss": 0.03816468, "step": 22591 }, { "epoch": 45.184, "grad_norm": 2.235480308532715, "learning_rate": 2e-05, "loss": 0.03837253, "step": 22592 }, { "epoch": 45.186, "grad_norm": 1.6322890520095825, "learning_rate": 2e-05, "loss": 0.04221357, "step": 22593 }, { "epoch": 45.188, "grad_norm": 1.3366886377334595, "learning_rate": 2e-05, "loss": 0.05573425, "step": 22594 }, { "epoch": 45.19, "grad_norm": 0.9577432870864868, "learning_rate": 2e-05, "loss": 0.03308515, "step": 22595 }, { "epoch": 45.192, "grad_norm": 1.0539096593856812, "learning_rate": 2e-05, "loss": 0.04248886, "step": 22596 }, { "epoch": 45.194, "grad_norm": 1.1969517469406128, "learning_rate": 2e-05, "loss": 0.05103723, "step": 22597 }, { "epoch": 45.196, "grad_norm": 1.7519115209579468, "learning_rate": 2e-05, "loss": 0.03787765, "step": 22598 }, { "epoch": 45.198, "grad_norm": 1.1365214586257935, "learning_rate": 2e-05, "loss": 0.04729158, "step": 22599 }, { "epoch": 45.2, "grad_norm": 0.9086182117462158, "learning_rate": 2e-05, "loss": 0.02625214, "step": 22600 }, { "epoch": 45.202, "grad_norm": 0.9892885088920593, "learning_rate": 2e-05, "loss": 0.04961496, "step": 22601 }, { "epoch": 45.204, "grad_norm": 0.9479159712791443, "learning_rate": 2e-05, "loss": 0.03868186, "step": 22602 }, { "epoch": 45.206, "grad_norm": 1.1169151067733765, "learning_rate": 2e-05, "loss": 0.0357076, "step": 22603 }, { "epoch": 45.208, "grad_norm": 0.8113283514976501, "learning_rate": 2e-05, "loss": 0.03237044, "step": 22604 }, { "epoch": 45.21, "grad_norm": 1.5293796062469482, "learning_rate": 2e-05, "loss": 0.05383504, "step": 22605 }, { "epoch": 45.212, "grad_norm": 0.8658296465873718, "learning_rate": 2e-05, "loss": 0.03717101, "step": 22606 }, { "epoch": 45.214, "grad_norm": 1.1086194515228271, "learning_rate": 2e-05, "loss": 0.03789288, "step": 22607 }, { "epoch": 45.216, "grad_norm": 1.4397318363189697, "learning_rate": 2e-05, "loss": 0.04886861, "step": 22608 }, { "epoch": 45.218, "grad_norm": 0.9238728284835815, "learning_rate": 2e-05, "loss": 0.03179143, "step": 22609 }, { "epoch": 45.22, "grad_norm": 1.1815979480743408, "learning_rate": 2e-05, "loss": 0.05355689, "step": 22610 }, { "epoch": 45.222, "grad_norm": 1.1790995597839355, "learning_rate": 2e-05, "loss": 0.0421137, "step": 22611 }, { "epoch": 45.224, "grad_norm": 1.0949044227600098, "learning_rate": 2e-05, "loss": 0.04550068, "step": 22612 }, { "epoch": 45.226, "grad_norm": 1.1146636009216309, "learning_rate": 2e-05, "loss": 0.05019861, "step": 22613 }, { "epoch": 45.228, "grad_norm": 1.0667710304260254, "learning_rate": 2e-05, "loss": 0.03783801, "step": 22614 }, { "epoch": 45.23, "grad_norm": 1.05813467502594, "learning_rate": 2e-05, "loss": 0.05697027, "step": 22615 }, { "epoch": 45.232, "grad_norm": 1.258849024772644, "learning_rate": 2e-05, "loss": 0.03156347, "step": 22616 }, { "epoch": 45.234, "grad_norm": 1.2029353380203247, "learning_rate": 2e-05, "loss": 0.04755739, "step": 22617 }, { "epoch": 45.236, "grad_norm": 0.8909667134284973, "learning_rate": 2e-05, "loss": 0.03271152, "step": 22618 }, { "epoch": 45.238, "grad_norm": 1.0951409339904785, "learning_rate": 2e-05, "loss": 0.05758268, "step": 22619 }, { "epoch": 45.24, "grad_norm": 1.5308946371078491, "learning_rate": 2e-05, "loss": 0.05036226, "step": 22620 }, { "epoch": 45.242, "grad_norm": 1.1315953731536865, "learning_rate": 2e-05, "loss": 0.04879746, "step": 22621 }, { "epoch": 45.244, "grad_norm": 1.2328094244003296, "learning_rate": 2e-05, "loss": 0.04744268, "step": 22622 }, { "epoch": 45.246, "grad_norm": 1.3572648763656616, "learning_rate": 2e-05, "loss": 0.04770868, "step": 22623 }, { "epoch": 45.248, "grad_norm": 1.0951495170593262, "learning_rate": 2e-05, "loss": 0.04146712, "step": 22624 }, { "epoch": 45.25, "grad_norm": 1.0948742628097534, "learning_rate": 2e-05, "loss": 0.05581894, "step": 22625 }, { "epoch": 45.252, "grad_norm": 1.2621768712997437, "learning_rate": 2e-05, "loss": 0.03256617, "step": 22626 }, { "epoch": 45.254, "grad_norm": 1.6411503553390503, "learning_rate": 2e-05, "loss": 0.05436235, "step": 22627 }, { "epoch": 45.256, "grad_norm": 0.9571181535720825, "learning_rate": 2e-05, "loss": 0.02940347, "step": 22628 }, { "epoch": 45.258, "grad_norm": 1.3076335191726685, "learning_rate": 2e-05, "loss": 0.05692288, "step": 22629 }, { "epoch": 45.26, "grad_norm": 0.9797654747962952, "learning_rate": 2e-05, "loss": 0.03967388, "step": 22630 }, { "epoch": 45.262, "grad_norm": 1.8855012655258179, "learning_rate": 2e-05, "loss": 0.04717269, "step": 22631 }, { "epoch": 45.264, "grad_norm": 1.106624960899353, "learning_rate": 2e-05, "loss": 0.03724663, "step": 22632 }, { "epoch": 45.266, "grad_norm": 1.0780366659164429, "learning_rate": 2e-05, "loss": 0.03106453, "step": 22633 }, { "epoch": 45.268, "grad_norm": 0.9857956171035767, "learning_rate": 2e-05, "loss": 0.0363286, "step": 22634 }, { "epoch": 45.27, "grad_norm": 1.2943743467330933, "learning_rate": 2e-05, "loss": 0.02954138, "step": 22635 }, { "epoch": 45.272, "grad_norm": 0.9821824431419373, "learning_rate": 2e-05, "loss": 0.0456384, "step": 22636 }, { "epoch": 45.274, "grad_norm": 0.9704524874687195, "learning_rate": 2e-05, "loss": 0.0415858, "step": 22637 }, { "epoch": 45.276, "grad_norm": 1.0735297203063965, "learning_rate": 2e-05, "loss": 0.0451444, "step": 22638 }, { "epoch": 45.278, "grad_norm": 0.9718818664550781, "learning_rate": 2e-05, "loss": 0.03819172, "step": 22639 }, { "epoch": 45.28, "grad_norm": 3.547299385070801, "learning_rate": 2e-05, "loss": 0.04704066, "step": 22640 }, { "epoch": 45.282, "grad_norm": 0.9835482835769653, "learning_rate": 2e-05, "loss": 0.0417568, "step": 22641 }, { "epoch": 45.284, "grad_norm": 0.9171995520591736, "learning_rate": 2e-05, "loss": 0.0414474, "step": 22642 }, { "epoch": 45.286, "grad_norm": 1.108377456665039, "learning_rate": 2e-05, "loss": 0.04013625, "step": 22643 }, { "epoch": 45.288, "grad_norm": 2.6813571453094482, "learning_rate": 2e-05, "loss": 0.0615818, "step": 22644 }, { "epoch": 45.29, "grad_norm": 1.5888181924819946, "learning_rate": 2e-05, "loss": 0.05036303, "step": 22645 }, { "epoch": 45.292, "grad_norm": 1.10150146484375, "learning_rate": 2e-05, "loss": 0.04077693, "step": 22646 }, { "epoch": 45.294, "grad_norm": 1.0552695989608765, "learning_rate": 2e-05, "loss": 0.04050602, "step": 22647 }, { "epoch": 45.296, "grad_norm": 0.870725154876709, "learning_rate": 2e-05, "loss": 0.02974698, "step": 22648 }, { "epoch": 45.298, "grad_norm": 1.0313730239868164, "learning_rate": 2e-05, "loss": 0.04764192, "step": 22649 }, { "epoch": 45.3, "grad_norm": 1.2592331171035767, "learning_rate": 2e-05, "loss": 0.03480305, "step": 22650 }, { "epoch": 45.302, "grad_norm": 0.8976898193359375, "learning_rate": 2e-05, "loss": 0.03877586, "step": 22651 }, { "epoch": 45.304, "grad_norm": 0.8818565607070923, "learning_rate": 2e-05, "loss": 0.03589277, "step": 22652 }, { "epoch": 45.306, "grad_norm": 1.2200487852096558, "learning_rate": 2e-05, "loss": 0.04458153, "step": 22653 }, { "epoch": 45.308, "grad_norm": 1.5366629362106323, "learning_rate": 2e-05, "loss": 0.04485605, "step": 22654 }, { "epoch": 45.31, "grad_norm": 0.9309005737304688, "learning_rate": 2e-05, "loss": 0.0368764, "step": 22655 }, { "epoch": 45.312, "grad_norm": 1.032126545906067, "learning_rate": 2e-05, "loss": 0.03982931, "step": 22656 }, { "epoch": 45.314, "grad_norm": 0.8174816966056824, "learning_rate": 2e-05, "loss": 0.02997202, "step": 22657 }, { "epoch": 45.316, "grad_norm": 1.18435800075531, "learning_rate": 2e-05, "loss": 0.04317258, "step": 22658 }, { "epoch": 45.318, "grad_norm": 1.1493366956710815, "learning_rate": 2e-05, "loss": 0.03779929, "step": 22659 }, { "epoch": 45.32, "grad_norm": 1.4151698350906372, "learning_rate": 2e-05, "loss": 0.05547353, "step": 22660 }, { "epoch": 45.322, "grad_norm": 1.0504828691482544, "learning_rate": 2e-05, "loss": 0.0477428, "step": 22661 }, { "epoch": 45.324, "grad_norm": 1.823428750038147, "learning_rate": 2e-05, "loss": 0.03226326, "step": 22662 }, { "epoch": 45.326, "grad_norm": 0.8989893198013306, "learning_rate": 2e-05, "loss": 0.03514536, "step": 22663 }, { "epoch": 45.328, "grad_norm": 1.4097903966903687, "learning_rate": 2e-05, "loss": 0.05434159, "step": 22664 }, { "epoch": 45.33, "grad_norm": 1.5705863237380981, "learning_rate": 2e-05, "loss": 0.04037646, "step": 22665 }, { "epoch": 45.332, "grad_norm": 0.9952764511108398, "learning_rate": 2e-05, "loss": 0.04163586, "step": 22666 }, { "epoch": 45.334, "grad_norm": 2.0294463634490967, "learning_rate": 2e-05, "loss": 0.04768112, "step": 22667 }, { "epoch": 45.336, "grad_norm": 1.1313111782073975, "learning_rate": 2e-05, "loss": 0.05268801, "step": 22668 }, { "epoch": 45.338, "grad_norm": 1.1542003154754639, "learning_rate": 2e-05, "loss": 0.04983789, "step": 22669 }, { "epoch": 45.34, "grad_norm": 1.3491170406341553, "learning_rate": 2e-05, "loss": 0.0404748, "step": 22670 }, { "epoch": 45.342, "grad_norm": 1.212705373764038, "learning_rate": 2e-05, "loss": 0.04511958, "step": 22671 }, { "epoch": 45.344, "grad_norm": 1.1908196210861206, "learning_rate": 2e-05, "loss": 0.03308444, "step": 22672 }, { "epoch": 45.346, "grad_norm": 4.28449010848999, "learning_rate": 2e-05, "loss": 0.04276413, "step": 22673 }, { "epoch": 45.348, "grad_norm": 0.9184468388557434, "learning_rate": 2e-05, "loss": 0.03018254, "step": 22674 }, { "epoch": 45.35, "grad_norm": 1.1921261548995972, "learning_rate": 2e-05, "loss": 0.04371266, "step": 22675 }, { "epoch": 45.352, "grad_norm": 1.1007202863693237, "learning_rate": 2e-05, "loss": 0.05074195, "step": 22676 }, { "epoch": 45.354, "grad_norm": 0.9843090772628784, "learning_rate": 2e-05, "loss": 0.04036358, "step": 22677 }, { "epoch": 45.356, "grad_norm": 1.344691276550293, "learning_rate": 2e-05, "loss": 0.04783353, "step": 22678 }, { "epoch": 45.358, "grad_norm": 2.444915771484375, "learning_rate": 2e-05, "loss": 0.04788929, "step": 22679 }, { "epoch": 45.36, "grad_norm": 1.4027483463287354, "learning_rate": 2e-05, "loss": 0.04991694, "step": 22680 }, { "epoch": 45.362, "grad_norm": 1.0518147945404053, "learning_rate": 2e-05, "loss": 0.04481725, "step": 22681 }, { "epoch": 45.364, "grad_norm": 1.2735545635223389, "learning_rate": 2e-05, "loss": 0.04290124, "step": 22682 }, { "epoch": 45.366, "grad_norm": 1.3433082103729248, "learning_rate": 2e-05, "loss": 0.05651001, "step": 22683 }, { "epoch": 45.368, "grad_norm": 1.2136088609695435, "learning_rate": 2e-05, "loss": 0.05479234, "step": 22684 }, { "epoch": 45.37, "grad_norm": 1.002616286277771, "learning_rate": 2e-05, "loss": 0.05382376, "step": 22685 }, { "epoch": 45.372, "grad_norm": 1.2218647003173828, "learning_rate": 2e-05, "loss": 0.04835265, "step": 22686 }, { "epoch": 45.374, "grad_norm": 0.9984238147735596, "learning_rate": 2e-05, "loss": 0.04757559, "step": 22687 }, { "epoch": 45.376, "grad_norm": 1.3952957391738892, "learning_rate": 2e-05, "loss": 0.03917549, "step": 22688 }, { "epoch": 45.378, "grad_norm": 1.020330786705017, "learning_rate": 2e-05, "loss": 0.0388756, "step": 22689 }, { "epoch": 45.38, "grad_norm": 1.2622413635253906, "learning_rate": 2e-05, "loss": 0.06351278, "step": 22690 }, { "epoch": 45.382, "grad_norm": 1.1160707473754883, "learning_rate": 2e-05, "loss": 0.03981574, "step": 22691 }, { "epoch": 45.384, "grad_norm": 0.9516853094100952, "learning_rate": 2e-05, "loss": 0.03871073, "step": 22692 }, { "epoch": 45.386, "grad_norm": 1.4745827913284302, "learning_rate": 2e-05, "loss": 0.02624869, "step": 22693 }, { "epoch": 45.388, "grad_norm": 1.3506964445114136, "learning_rate": 2e-05, "loss": 0.06072439, "step": 22694 }, { "epoch": 45.39, "grad_norm": 1.1125704050064087, "learning_rate": 2e-05, "loss": 0.05068371, "step": 22695 }, { "epoch": 45.392, "grad_norm": 1.3637044429779053, "learning_rate": 2e-05, "loss": 0.06045147, "step": 22696 }, { "epoch": 45.394, "grad_norm": 1.1329246759414673, "learning_rate": 2e-05, "loss": 0.05339756, "step": 22697 }, { "epoch": 45.396, "grad_norm": 0.9130178093910217, "learning_rate": 2e-05, "loss": 0.03290018, "step": 22698 }, { "epoch": 45.398, "grad_norm": 1.184698224067688, "learning_rate": 2e-05, "loss": 0.04154731, "step": 22699 }, { "epoch": 45.4, "grad_norm": 1.2803211212158203, "learning_rate": 2e-05, "loss": 0.0556927, "step": 22700 }, { "epoch": 45.402, "grad_norm": 1.0708740949630737, "learning_rate": 2e-05, "loss": 0.04219895, "step": 22701 }, { "epoch": 45.404, "grad_norm": 1.111457109451294, "learning_rate": 2e-05, "loss": 0.04769673, "step": 22702 }, { "epoch": 45.406, "grad_norm": 1.0332413911819458, "learning_rate": 2e-05, "loss": 0.05009396, "step": 22703 }, { "epoch": 45.408, "grad_norm": 6.520485877990723, "learning_rate": 2e-05, "loss": 0.0374029, "step": 22704 }, { "epoch": 45.41, "grad_norm": 1.0618611574172974, "learning_rate": 2e-05, "loss": 0.04241582, "step": 22705 }, { "epoch": 45.412, "grad_norm": 1.2006244659423828, "learning_rate": 2e-05, "loss": 0.04492627, "step": 22706 }, { "epoch": 45.414, "grad_norm": 0.8487184047698975, "learning_rate": 2e-05, "loss": 0.0323393, "step": 22707 }, { "epoch": 45.416, "grad_norm": 1.6922245025634766, "learning_rate": 2e-05, "loss": 0.06060058, "step": 22708 }, { "epoch": 45.418, "grad_norm": 2.7331457138061523, "learning_rate": 2e-05, "loss": 0.04143894, "step": 22709 }, { "epoch": 45.42, "grad_norm": 4.94014835357666, "learning_rate": 2e-05, "loss": 0.05090659, "step": 22710 }, { "epoch": 45.422, "grad_norm": 1.2680546045303345, "learning_rate": 2e-05, "loss": 0.0542921, "step": 22711 }, { "epoch": 45.424, "grad_norm": 1.5890108346939087, "learning_rate": 2e-05, "loss": 0.04281344, "step": 22712 }, { "epoch": 45.426, "grad_norm": 1.072758674621582, "learning_rate": 2e-05, "loss": 0.03711671, "step": 22713 }, { "epoch": 45.428, "grad_norm": 1.2075330018997192, "learning_rate": 2e-05, "loss": 0.03751999, "step": 22714 }, { "epoch": 45.43, "grad_norm": 0.9870922565460205, "learning_rate": 2e-05, "loss": 0.04167493, "step": 22715 }, { "epoch": 45.432, "grad_norm": 1.2968511581420898, "learning_rate": 2e-05, "loss": 0.03765602, "step": 22716 }, { "epoch": 45.434, "grad_norm": 1.1292532682418823, "learning_rate": 2e-05, "loss": 0.02997157, "step": 22717 }, { "epoch": 45.436, "grad_norm": 1.0345921516418457, "learning_rate": 2e-05, "loss": 0.03048265, "step": 22718 }, { "epoch": 45.438, "grad_norm": 1.4411016702651978, "learning_rate": 2e-05, "loss": 0.04629766, "step": 22719 }, { "epoch": 45.44, "grad_norm": 1.0237294435501099, "learning_rate": 2e-05, "loss": 0.04571994, "step": 22720 }, { "epoch": 45.442, "grad_norm": 1.7032222747802734, "learning_rate": 2e-05, "loss": 0.05016217, "step": 22721 }, { "epoch": 45.444, "grad_norm": 1.183050274848938, "learning_rate": 2e-05, "loss": 0.04526314, "step": 22722 }, { "epoch": 45.446, "grad_norm": 1.6441885232925415, "learning_rate": 2e-05, "loss": 0.0430004, "step": 22723 }, { "epoch": 45.448, "grad_norm": 1.7326163053512573, "learning_rate": 2e-05, "loss": 0.05265944, "step": 22724 }, { "epoch": 45.45, "grad_norm": 1.2740235328674316, "learning_rate": 2e-05, "loss": 0.05235073, "step": 22725 }, { "epoch": 45.452, "grad_norm": 1.7515263557434082, "learning_rate": 2e-05, "loss": 0.06340651, "step": 22726 }, { "epoch": 45.454, "grad_norm": 0.9470655918121338, "learning_rate": 2e-05, "loss": 0.04113812, "step": 22727 }, { "epoch": 45.456, "grad_norm": 1.6942975521087646, "learning_rate": 2e-05, "loss": 0.04586442, "step": 22728 }, { "epoch": 45.458, "grad_norm": 2.324281930923462, "learning_rate": 2e-05, "loss": 0.0400774, "step": 22729 }, { "epoch": 45.46, "grad_norm": 0.9977216720581055, "learning_rate": 2e-05, "loss": 0.03369318, "step": 22730 }, { "epoch": 45.462, "grad_norm": 1.1995673179626465, "learning_rate": 2e-05, "loss": 0.03263012, "step": 22731 }, { "epoch": 45.464, "grad_norm": 0.9570655226707458, "learning_rate": 2e-05, "loss": 0.04091702, "step": 22732 }, { "epoch": 45.466, "grad_norm": 1.1437430381774902, "learning_rate": 2e-05, "loss": 0.05885084, "step": 22733 }, { "epoch": 45.468, "grad_norm": 0.8572333455085754, "learning_rate": 2e-05, "loss": 0.03177877, "step": 22734 }, { "epoch": 45.47, "grad_norm": 1.145864725112915, "learning_rate": 2e-05, "loss": 0.0358723, "step": 22735 }, { "epoch": 45.472, "grad_norm": 1.0808372497558594, "learning_rate": 2e-05, "loss": 0.04354683, "step": 22736 }, { "epoch": 45.474, "grad_norm": 1.0540697574615479, "learning_rate": 2e-05, "loss": 0.02645936, "step": 22737 }, { "epoch": 45.476, "grad_norm": 1.0225580930709839, "learning_rate": 2e-05, "loss": 0.04234812, "step": 22738 }, { "epoch": 45.478, "grad_norm": 1.2443112134933472, "learning_rate": 2e-05, "loss": 0.05094778, "step": 22739 }, { "epoch": 45.48, "grad_norm": 0.8900364637374878, "learning_rate": 2e-05, "loss": 0.02521351, "step": 22740 }, { "epoch": 45.482, "grad_norm": 1.0209593772888184, "learning_rate": 2e-05, "loss": 0.0532154, "step": 22741 }, { "epoch": 45.484, "grad_norm": 1.0292103290557861, "learning_rate": 2e-05, "loss": 0.05081788, "step": 22742 }, { "epoch": 45.486, "grad_norm": 1.6414892673492432, "learning_rate": 2e-05, "loss": 0.05739576, "step": 22743 }, { "epoch": 45.488, "grad_norm": 1.1527272462844849, "learning_rate": 2e-05, "loss": 0.04760287, "step": 22744 }, { "epoch": 45.49, "grad_norm": 1.122249960899353, "learning_rate": 2e-05, "loss": 0.04415414, "step": 22745 }, { "epoch": 45.492, "grad_norm": 1.6215230226516724, "learning_rate": 2e-05, "loss": 0.04642128, "step": 22746 }, { "epoch": 45.494, "grad_norm": 1.2234916687011719, "learning_rate": 2e-05, "loss": 0.03209268, "step": 22747 }, { "epoch": 45.496, "grad_norm": 1.69521963596344, "learning_rate": 2e-05, "loss": 0.05727867, "step": 22748 }, { "epoch": 45.498, "grad_norm": 1.0897610187530518, "learning_rate": 2e-05, "loss": 0.04372771, "step": 22749 }, { "epoch": 45.5, "grad_norm": 1.0344184637069702, "learning_rate": 2e-05, "loss": 0.05361933, "step": 22750 }, { "epoch": 45.502, "grad_norm": 1.0740419626235962, "learning_rate": 2e-05, "loss": 0.05186666, "step": 22751 }, { "epoch": 45.504, "grad_norm": 1.1925361156463623, "learning_rate": 2e-05, "loss": 0.03813919, "step": 22752 }, { "epoch": 45.506, "grad_norm": 1.1967079639434814, "learning_rate": 2e-05, "loss": 0.04697201, "step": 22753 }, { "epoch": 45.508, "grad_norm": 1.3757407665252686, "learning_rate": 2e-05, "loss": 0.04626047, "step": 22754 }, { "epoch": 45.51, "grad_norm": 1.3299400806427002, "learning_rate": 2e-05, "loss": 0.05921501, "step": 22755 }, { "epoch": 45.512, "grad_norm": 2.3574445247650146, "learning_rate": 2e-05, "loss": 0.06191718, "step": 22756 }, { "epoch": 45.514, "grad_norm": 1.0568877458572388, "learning_rate": 2e-05, "loss": 0.0492296, "step": 22757 }, { "epoch": 45.516, "grad_norm": 0.9746472239494324, "learning_rate": 2e-05, "loss": 0.03089088, "step": 22758 }, { "epoch": 45.518, "grad_norm": 1.0493680238723755, "learning_rate": 2e-05, "loss": 0.03683866, "step": 22759 }, { "epoch": 45.52, "grad_norm": 0.9401288032531738, "learning_rate": 2e-05, "loss": 0.02954689, "step": 22760 }, { "epoch": 45.522, "grad_norm": 1.0596168041229248, "learning_rate": 2e-05, "loss": 0.04454588, "step": 22761 }, { "epoch": 45.524, "grad_norm": 1.077014684677124, "learning_rate": 2e-05, "loss": 0.04307317, "step": 22762 }, { "epoch": 45.526, "grad_norm": 0.8536494970321655, "learning_rate": 2e-05, "loss": 0.0324789, "step": 22763 }, { "epoch": 45.528, "grad_norm": 1.3965752124786377, "learning_rate": 2e-05, "loss": 0.03975829, "step": 22764 }, { "epoch": 45.53, "grad_norm": 1.083166480064392, "learning_rate": 2e-05, "loss": 0.04537066, "step": 22765 }, { "epoch": 45.532, "grad_norm": 2.0132784843444824, "learning_rate": 2e-05, "loss": 0.0450456, "step": 22766 }, { "epoch": 45.534, "grad_norm": 0.8198070526123047, "learning_rate": 2e-05, "loss": 0.03127465, "step": 22767 }, { "epoch": 45.536, "grad_norm": 1.1586196422576904, "learning_rate": 2e-05, "loss": 0.04691871, "step": 22768 }, { "epoch": 45.538, "grad_norm": 0.7624765634536743, "learning_rate": 2e-05, "loss": 0.02710294, "step": 22769 }, { "epoch": 45.54, "grad_norm": 1.3532878160476685, "learning_rate": 2e-05, "loss": 0.0431123, "step": 22770 }, { "epoch": 45.542, "grad_norm": 1.3241866827011108, "learning_rate": 2e-05, "loss": 0.03120969, "step": 22771 }, { "epoch": 45.544, "grad_norm": 0.980930507183075, "learning_rate": 2e-05, "loss": 0.04814873, "step": 22772 }, { "epoch": 45.546, "grad_norm": 1.012178659439087, "learning_rate": 2e-05, "loss": 0.03718514, "step": 22773 }, { "epoch": 45.548, "grad_norm": 1.740031361579895, "learning_rate": 2e-05, "loss": 0.04545609, "step": 22774 }, { "epoch": 45.55, "grad_norm": 1.2103362083435059, "learning_rate": 2e-05, "loss": 0.05362935, "step": 22775 }, { "epoch": 45.552, "grad_norm": 2.1168038845062256, "learning_rate": 2e-05, "loss": 0.05177964, "step": 22776 }, { "epoch": 45.554, "grad_norm": 0.9236567616462708, "learning_rate": 2e-05, "loss": 0.03204521, "step": 22777 }, { "epoch": 45.556, "grad_norm": 1.123531699180603, "learning_rate": 2e-05, "loss": 0.04508154, "step": 22778 }, { "epoch": 45.558, "grad_norm": 1.5705060958862305, "learning_rate": 2e-05, "loss": 0.05697759, "step": 22779 }, { "epoch": 45.56, "grad_norm": 2.5711798667907715, "learning_rate": 2e-05, "loss": 0.06582118, "step": 22780 }, { "epoch": 45.562, "grad_norm": 1.1930994987487793, "learning_rate": 2e-05, "loss": 0.03991815, "step": 22781 }, { "epoch": 45.564, "grad_norm": 1.2321208715438843, "learning_rate": 2e-05, "loss": 0.04735018, "step": 22782 }, { "epoch": 45.566, "grad_norm": 1.0998891592025757, "learning_rate": 2e-05, "loss": 0.0471554, "step": 22783 }, { "epoch": 45.568, "grad_norm": 0.8055955171585083, "learning_rate": 2e-05, "loss": 0.03103444, "step": 22784 }, { "epoch": 45.57, "grad_norm": 1.137664556503296, "learning_rate": 2e-05, "loss": 0.05315431, "step": 22785 }, { "epoch": 45.572, "grad_norm": 1.428505778312683, "learning_rate": 2e-05, "loss": 0.05824667, "step": 22786 }, { "epoch": 45.574, "grad_norm": 1.1720763444900513, "learning_rate": 2e-05, "loss": 0.05049345, "step": 22787 }, { "epoch": 45.576, "grad_norm": 2.2381720542907715, "learning_rate": 2e-05, "loss": 0.05937954, "step": 22788 }, { "epoch": 45.578, "grad_norm": 2.7922146320343018, "learning_rate": 2e-05, "loss": 0.05318843, "step": 22789 }, { "epoch": 45.58, "grad_norm": 1.1432366371154785, "learning_rate": 2e-05, "loss": 0.03855621, "step": 22790 }, { "epoch": 45.582, "grad_norm": 0.9563543796539307, "learning_rate": 2e-05, "loss": 0.03782255, "step": 22791 }, { "epoch": 45.584, "grad_norm": 2.5303802490234375, "learning_rate": 2e-05, "loss": 0.05539024, "step": 22792 }, { "epoch": 45.586, "grad_norm": 1.0670671463012695, "learning_rate": 2e-05, "loss": 0.03452586, "step": 22793 }, { "epoch": 45.588, "grad_norm": 1.0244661569595337, "learning_rate": 2e-05, "loss": 0.03537963, "step": 22794 }, { "epoch": 45.59, "grad_norm": 1.7589315176010132, "learning_rate": 2e-05, "loss": 0.05638959, "step": 22795 }, { "epoch": 45.592, "grad_norm": 1.091156244277954, "learning_rate": 2e-05, "loss": 0.04027123, "step": 22796 }, { "epoch": 45.594, "grad_norm": 1.361618161201477, "learning_rate": 2e-05, "loss": 0.04939012, "step": 22797 }, { "epoch": 45.596, "grad_norm": 1.3494913578033447, "learning_rate": 2e-05, "loss": 0.04348815, "step": 22798 }, { "epoch": 45.598, "grad_norm": 1.1284618377685547, "learning_rate": 2e-05, "loss": 0.04234558, "step": 22799 }, { "epoch": 45.6, "grad_norm": 1.104109764099121, "learning_rate": 2e-05, "loss": 0.04131437, "step": 22800 }, { "epoch": 45.602, "grad_norm": 1.118873119354248, "learning_rate": 2e-05, "loss": 0.04525807, "step": 22801 }, { "epoch": 45.604, "grad_norm": 1.4957573413848877, "learning_rate": 2e-05, "loss": 0.04322227, "step": 22802 }, { "epoch": 45.606, "grad_norm": 2.740567207336426, "learning_rate": 2e-05, "loss": 0.06499271, "step": 22803 }, { "epoch": 45.608, "grad_norm": 1.2358646392822266, "learning_rate": 2e-05, "loss": 0.04856521, "step": 22804 }, { "epoch": 45.61, "grad_norm": 1.1504874229431152, "learning_rate": 2e-05, "loss": 0.04914548, "step": 22805 }, { "epoch": 45.612, "grad_norm": 1.0091999769210815, "learning_rate": 2e-05, "loss": 0.04622948, "step": 22806 }, { "epoch": 45.614, "grad_norm": 1.684523344039917, "learning_rate": 2e-05, "loss": 0.03886724, "step": 22807 }, { "epoch": 45.616, "grad_norm": 0.9210161566734314, "learning_rate": 2e-05, "loss": 0.03574962, "step": 22808 }, { "epoch": 45.618, "grad_norm": 2.7812511920928955, "learning_rate": 2e-05, "loss": 0.05236585, "step": 22809 }, { "epoch": 45.62, "grad_norm": 0.9422674179077148, "learning_rate": 2e-05, "loss": 0.03036742, "step": 22810 }, { "epoch": 45.622, "grad_norm": 0.9877131581306458, "learning_rate": 2e-05, "loss": 0.03449025, "step": 22811 }, { "epoch": 45.624, "grad_norm": 1.9917272329330444, "learning_rate": 2e-05, "loss": 0.06884912, "step": 22812 }, { "epoch": 45.626, "grad_norm": 1.1866600513458252, "learning_rate": 2e-05, "loss": 0.03858717, "step": 22813 }, { "epoch": 45.628, "grad_norm": 0.9313453435897827, "learning_rate": 2e-05, "loss": 0.04033993, "step": 22814 }, { "epoch": 45.63, "grad_norm": 1.1456177234649658, "learning_rate": 2e-05, "loss": 0.03991004, "step": 22815 }, { "epoch": 45.632, "grad_norm": 1.3446094989776611, "learning_rate": 2e-05, "loss": 0.05135424, "step": 22816 }, { "epoch": 45.634, "grad_norm": 1.4655600786209106, "learning_rate": 2e-05, "loss": 0.04335689, "step": 22817 }, { "epoch": 45.636, "grad_norm": 0.9724588394165039, "learning_rate": 2e-05, "loss": 0.03118525, "step": 22818 }, { "epoch": 45.638, "grad_norm": 1.461982250213623, "learning_rate": 2e-05, "loss": 0.03366026, "step": 22819 }, { "epoch": 45.64, "grad_norm": 1.1791331768035889, "learning_rate": 2e-05, "loss": 0.02651989, "step": 22820 }, { "epoch": 45.642, "grad_norm": 0.9885498881340027, "learning_rate": 2e-05, "loss": 0.03888188, "step": 22821 }, { "epoch": 45.644, "grad_norm": 1.387880802154541, "learning_rate": 2e-05, "loss": 0.03492934, "step": 22822 }, { "epoch": 45.646, "grad_norm": 0.972472071647644, "learning_rate": 2e-05, "loss": 0.0382218, "step": 22823 }, { "epoch": 45.648, "grad_norm": 1.298417568206787, "learning_rate": 2e-05, "loss": 0.04526832, "step": 22824 }, { "epoch": 45.65, "grad_norm": 1.1075867414474487, "learning_rate": 2e-05, "loss": 0.04629479, "step": 22825 }, { "epoch": 45.652, "grad_norm": 0.9512344002723694, "learning_rate": 2e-05, "loss": 0.03577991, "step": 22826 }, { "epoch": 45.654, "grad_norm": 1.1160368919372559, "learning_rate": 2e-05, "loss": 0.04539494, "step": 22827 }, { "epoch": 45.656, "grad_norm": 1.0705761909484863, "learning_rate": 2e-05, "loss": 0.04036682, "step": 22828 }, { "epoch": 45.658, "grad_norm": 1.0978666543960571, "learning_rate": 2e-05, "loss": 0.04726621, "step": 22829 }, { "epoch": 45.66, "grad_norm": 0.8812873363494873, "learning_rate": 2e-05, "loss": 0.03402353, "step": 22830 }, { "epoch": 45.662, "grad_norm": 1.305201530456543, "learning_rate": 2e-05, "loss": 0.04314324, "step": 22831 }, { "epoch": 45.664, "grad_norm": 1.1452564001083374, "learning_rate": 2e-05, "loss": 0.05007552, "step": 22832 }, { "epoch": 45.666, "grad_norm": 1.1126997470855713, "learning_rate": 2e-05, "loss": 0.05092571, "step": 22833 }, { "epoch": 45.668, "grad_norm": 0.9946684241294861, "learning_rate": 2e-05, "loss": 0.04005679, "step": 22834 }, { "epoch": 45.67, "grad_norm": 1.0103343725204468, "learning_rate": 2e-05, "loss": 0.03741839, "step": 22835 }, { "epoch": 45.672, "grad_norm": 1.171213984489441, "learning_rate": 2e-05, "loss": 0.04636289, "step": 22836 }, { "epoch": 45.674, "grad_norm": 1.4816864728927612, "learning_rate": 2e-05, "loss": 0.06608205, "step": 22837 }, { "epoch": 45.676, "grad_norm": 0.9572558999061584, "learning_rate": 2e-05, "loss": 0.03231528, "step": 22838 }, { "epoch": 45.678, "grad_norm": 1.018825650215149, "learning_rate": 2e-05, "loss": 0.03872021, "step": 22839 }, { "epoch": 45.68, "grad_norm": 2.049959897994995, "learning_rate": 2e-05, "loss": 0.04652671, "step": 22840 }, { "epoch": 45.682, "grad_norm": 1.1602877378463745, "learning_rate": 2e-05, "loss": 0.04048842, "step": 22841 }, { "epoch": 45.684, "grad_norm": 1.112059473991394, "learning_rate": 2e-05, "loss": 0.04651512, "step": 22842 }, { "epoch": 45.686, "grad_norm": 1.4088462591171265, "learning_rate": 2e-05, "loss": 0.04688901, "step": 22843 }, { "epoch": 45.688, "grad_norm": 1.2107744216918945, "learning_rate": 2e-05, "loss": 0.03887016, "step": 22844 }, { "epoch": 45.69, "grad_norm": 1.3035609722137451, "learning_rate": 2e-05, "loss": 0.0463603, "step": 22845 }, { "epoch": 45.692, "grad_norm": 1.4932314157485962, "learning_rate": 2e-05, "loss": 0.03909862, "step": 22846 }, { "epoch": 45.694, "grad_norm": 0.8740349411964417, "learning_rate": 2e-05, "loss": 0.02654898, "step": 22847 }, { "epoch": 45.696, "grad_norm": 1.08855402469635, "learning_rate": 2e-05, "loss": 0.03953352, "step": 22848 }, { "epoch": 45.698, "grad_norm": 1.5773930549621582, "learning_rate": 2e-05, "loss": 0.0479054, "step": 22849 }, { "epoch": 45.7, "grad_norm": 1.2244648933410645, "learning_rate": 2e-05, "loss": 0.04167821, "step": 22850 }, { "epoch": 45.702, "grad_norm": 1.9656699895858765, "learning_rate": 2e-05, "loss": 0.04000026, "step": 22851 }, { "epoch": 45.704, "grad_norm": 0.9548643231391907, "learning_rate": 2e-05, "loss": 0.04616936, "step": 22852 }, { "epoch": 45.706, "grad_norm": 0.871763288974762, "learning_rate": 2e-05, "loss": 0.03091203, "step": 22853 }, { "epoch": 45.708, "grad_norm": 1.0010076761245728, "learning_rate": 2e-05, "loss": 0.03298238, "step": 22854 }, { "epoch": 45.71, "grad_norm": 0.9753047823905945, "learning_rate": 2e-05, "loss": 0.03809507, "step": 22855 }, { "epoch": 45.712, "grad_norm": 1.1269019842147827, "learning_rate": 2e-05, "loss": 0.05176232, "step": 22856 }, { "epoch": 45.714, "grad_norm": 1.3892861604690552, "learning_rate": 2e-05, "loss": 0.04807642, "step": 22857 }, { "epoch": 45.716, "grad_norm": 1.0833181142807007, "learning_rate": 2e-05, "loss": 0.04307299, "step": 22858 }, { "epoch": 45.718, "grad_norm": 1.1591814756393433, "learning_rate": 2e-05, "loss": 0.04200939, "step": 22859 }, { "epoch": 45.72, "grad_norm": 1.0798828601837158, "learning_rate": 2e-05, "loss": 0.03986163, "step": 22860 }, { "epoch": 45.722, "grad_norm": 1.1151132583618164, "learning_rate": 2e-05, "loss": 0.03489438, "step": 22861 }, { "epoch": 45.724, "grad_norm": 1.1221650838851929, "learning_rate": 2e-05, "loss": 0.04655517, "step": 22862 }, { "epoch": 45.726, "grad_norm": 1.2297589778900146, "learning_rate": 2e-05, "loss": 0.0457954, "step": 22863 }, { "epoch": 45.728, "grad_norm": 1.289986491203308, "learning_rate": 2e-05, "loss": 0.05135708, "step": 22864 }, { "epoch": 45.73, "grad_norm": 1.176823616027832, "learning_rate": 2e-05, "loss": 0.04665578, "step": 22865 }, { "epoch": 45.732, "grad_norm": 0.9727978706359863, "learning_rate": 2e-05, "loss": 0.03969512, "step": 22866 }, { "epoch": 45.734, "grad_norm": 0.9292997121810913, "learning_rate": 2e-05, "loss": 0.03250346, "step": 22867 }, { "epoch": 45.736, "grad_norm": 1.1784592866897583, "learning_rate": 2e-05, "loss": 0.0401311, "step": 22868 }, { "epoch": 45.738, "grad_norm": 1.2398289442062378, "learning_rate": 2e-05, "loss": 0.06123427, "step": 22869 }, { "epoch": 45.74, "grad_norm": 1.4263213872909546, "learning_rate": 2e-05, "loss": 0.03866532, "step": 22870 }, { "epoch": 45.742, "grad_norm": 1.0542892217636108, "learning_rate": 2e-05, "loss": 0.04666059, "step": 22871 }, { "epoch": 45.744, "grad_norm": 3.976712703704834, "learning_rate": 2e-05, "loss": 0.05393363, "step": 22872 }, { "epoch": 45.746, "grad_norm": 1.1893327236175537, "learning_rate": 2e-05, "loss": 0.04344828, "step": 22873 }, { "epoch": 45.748, "grad_norm": 1.0429264307022095, "learning_rate": 2e-05, "loss": 0.04576417, "step": 22874 }, { "epoch": 45.75, "grad_norm": 0.9618239998817444, "learning_rate": 2e-05, "loss": 0.02891281, "step": 22875 }, { "epoch": 45.752, "grad_norm": 1.2758760452270508, "learning_rate": 2e-05, "loss": 0.05423602, "step": 22876 }, { "epoch": 45.754, "grad_norm": 1.0951226949691772, "learning_rate": 2e-05, "loss": 0.04736341, "step": 22877 }, { "epoch": 45.756, "grad_norm": 1.2242333889007568, "learning_rate": 2e-05, "loss": 0.04454009, "step": 22878 }, { "epoch": 45.758, "grad_norm": 1.2175134420394897, "learning_rate": 2e-05, "loss": 0.05233236, "step": 22879 }, { "epoch": 45.76, "grad_norm": 1.0067569017410278, "learning_rate": 2e-05, "loss": 0.03734559, "step": 22880 }, { "epoch": 45.762, "grad_norm": 1.0544008016586304, "learning_rate": 2e-05, "loss": 0.04446972, "step": 22881 }, { "epoch": 45.764, "grad_norm": 1.9381471872329712, "learning_rate": 2e-05, "loss": 0.04462221, "step": 22882 }, { "epoch": 45.766, "grad_norm": 1.5526267290115356, "learning_rate": 2e-05, "loss": 0.05249919, "step": 22883 }, { "epoch": 45.768, "grad_norm": 1.0446468591690063, "learning_rate": 2e-05, "loss": 0.05133056, "step": 22884 }, { "epoch": 45.77, "grad_norm": 1.68937087059021, "learning_rate": 2e-05, "loss": 0.03557207, "step": 22885 }, { "epoch": 45.772, "grad_norm": 1.0683984756469727, "learning_rate": 2e-05, "loss": 0.04517757, "step": 22886 }, { "epoch": 45.774, "grad_norm": 1.746897578239441, "learning_rate": 2e-05, "loss": 0.05658411, "step": 22887 }, { "epoch": 45.776, "grad_norm": 0.9132818579673767, "learning_rate": 2e-05, "loss": 0.03768554, "step": 22888 }, { "epoch": 45.778, "grad_norm": 0.9918961524963379, "learning_rate": 2e-05, "loss": 0.03709259, "step": 22889 }, { "epoch": 45.78, "grad_norm": 0.935555100440979, "learning_rate": 2e-05, "loss": 0.04078614, "step": 22890 }, { "epoch": 45.782, "grad_norm": 1.0053125619888306, "learning_rate": 2e-05, "loss": 0.04169351, "step": 22891 }, { "epoch": 45.784, "grad_norm": 1.0130285024642944, "learning_rate": 2e-05, "loss": 0.03680064, "step": 22892 }, { "epoch": 45.786, "grad_norm": 1.867035984992981, "learning_rate": 2e-05, "loss": 0.05970931, "step": 22893 }, { "epoch": 45.788, "grad_norm": 1.3895435333251953, "learning_rate": 2e-05, "loss": 0.04175571, "step": 22894 }, { "epoch": 45.79, "grad_norm": 1.235295057296753, "learning_rate": 2e-05, "loss": 0.0524193, "step": 22895 }, { "epoch": 45.792, "grad_norm": 0.9691627025604248, "learning_rate": 2e-05, "loss": 0.02199671, "step": 22896 }, { "epoch": 45.794, "grad_norm": 0.8234715461730957, "learning_rate": 2e-05, "loss": 0.0224343, "step": 22897 }, { "epoch": 45.796, "grad_norm": 1.321062684059143, "learning_rate": 2e-05, "loss": 0.0486863, "step": 22898 }, { "epoch": 45.798, "grad_norm": 1.2613604068756104, "learning_rate": 2e-05, "loss": 0.04978583, "step": 22899 }, { "epoch": 45.8, "grad_norm": 1.3854029178619385, "learning_rate": 2e-05, "loss": 0.06145602, "step": 22900 }, { "epoch": 45.802, "grad_norm": 1.938504695892334, "learning_rate": 2e-05, "loss": 0.06690025, "step": 22901 }, { "epoch": 45.804, "grad_norm": 1.242529273033142, "learning_rate": 2e-05, "loss": 0.05242737, "step": 22902 }, { "epoch": 45.806, "grad_norm": 1.0569992065429688, "learning_rate": 2e-05, "loss": 0.04005806, "step": 22903 }, { "epoch": 45.808, "grad_norm": 1.1304805278778076, "learning_rate": 2e-05, "loss": 0.05338077, "step": 22904 }, { "epoch": 45.81, "grad_norm": 5.261485576629639, "learning_rate": 2e-05, "loss": 0.03530961, "step": 22905 }, { "epoch": 45.812, "grad_norm": 0.9543238878250122, "learning_rate": 2e-05, "loss": 0.03193602, "step": 22906 }, { "epoch": 45.814, "grad_norm": 1.8528729677200317, "learning_rate": 2e-05, "loss": 0.04215121, "step": 22907 }, { "epoch": 45.816, "grad_norm": 1.0285489559173584, "learning_rate": 2e-05, "loss": 0.04820376, "step": 22908 }, { "epoch": 45.818, "grad_norm": 0.9057236909866333, "learning_rate": 2e-05, "loss": 0.03559089, "step": 22909 }, { "epoch": 45.82, "grad_norm": 1.0207762718200684, "learning_rate": 2e-05, "loss": 0.04220248, "step": 22910 }, { "epoch": 45.822, "grad_norm": 0.9384340643882751, "learning_rate": 2e-05, "loss": 0.04070425, "step": 22911 }, { "epoch": 45.824, "grad_norm": 1.1020053625106812, "learning_rate": 2e-05, "loss": 0.03858484, "step": 22912 }, { "epoch": 45.826, "grad_norm": 1.1059986352920532, "learning_rate": 2e-05, "loss": 0.05134423, "step": 22913 }, { "epoch": 45.828, "grad_norm": 0.8538850545883179, "learning_rate": 2e-05, "loss": 0.02401451, "step": 22914 }, { "epoch": 45.83, "grad_norm": 1.0629873275756836, "learning_rate": 2e-05, "loss": 0.04580153, "step": 22915 }, { "epoch": 45.832, "grad_norm": 0.961974024772644, "learning_rate": 2e-05, "loss": 0.03148204, "step": 22916 }, { "epoch": 45.834, "grad_norm": 1.1628038883209229, "learning_rate": 2e-05, "loss": 0.03698536, "step": 22917 }, { "epoch": 45.836, "grad_norm": 1.843483805656433, "learning_rate": 2e-05, "loss": 0.0587501, "step": 22918 }, { "epoch": 45.838, "grad_norm": 1.750087857246399, "learning_rate": 2e-05, "loss": 0.0401295, "step": 22919 }, { "epoch": 45.84, "grad_norm": 1.3009958267211914, "learning_rate": 2e-05, "loss": 0.04588776, "step": 22920 }, { "epoch": 45.842, "grad_norm": 0.964752733707428, "learning_rate": 2e-05, "loss": 0.02900057, "step": 22921 }, { "epoch": 45.844, "grad_norm": 1.823072075843811, "learning_rate": 2e-05, "loss": 0.05737882, "step": 22922 }, { "epoch": 45.846, "grad_norm": 1.0906617641448975, "learning_rate": 2e-05, "loss": 0.05159894, "step": 22923 }, { "epoch": 45.848, "grad_norm": 0.984144926071167, "learning_rate": 2e-05, "loss": 0.04352382, "step": 22924 }, { "epoch": 45.85, "grad_norm": 1.0206811428070068, "learning_rate": 2e-05, "loss": 0.03888148, "step": 22925 }, { "epoch": 45.852, "grad_norm": 1.1020495891571045, "learning_rate": 2e-05, "loss": 0.04888733, "step": 22926 }, { "epoch": 45.854, "grad_norm": 0.943946123123169, "learning_rate": 2e-05, "loss": 0.03423984, "step": 22927 }, { "epoch": 45.856, "grad_norm": 1.279475450515747, "learning_rate": 2e-05, "loss": 0.04742841, "step": 22928 }, { "epoch": 45.858, "grad_norm": 1.0205110311508179, "learning_rate": 2e-05, "loss": 0.03363776, "step": 22929 }, { "epoch": 45.86, "grad_norm": 0.952771008014679, "learning_rate": 2e-05, "loss": 0.03752216, "step": 22930 }, { "epoch": 45.862, "grad_norm": 0.8683675527572632, "learning_rate": 2e-05, "loss": 0.03787312, "step": 22931 }, { "epoch": 45.864, "grad_norm": 0.9761491417884827, "learning_rate": 2e-05, "loss": 0.03768709, "step": 22932 }, { "epoch": 45.866, "grad_norm": 1.0012956857681274, "learning_rate": 2e-05, "loss": 0.03822475, "step": 22933 }, { "epoch": 45.868, "grad_norm": 1.5585095882415771, "learning_rate": 2e-05, "loss": 0.05710295, "step": 22934 }, { "epoch": 45.87, "grad_norm": 1.076458215713501, "learning_rate": 2e-05, "loss": 0.04197651, "step": 22935 }, { "epoch": 45.872, "grad_norm": 1.3669697046279907, "learning_rate": 2e-05, "loss": 0.04502393, "step": 22936 }, { "epoch": 45.874, "grad_norm": 0.9954581260681152, "learning_rate": 2e-05, "loss": 0.0445012, "step": 22937 }, { "epoch": 45.876, "grad_norm": 0.9255000948905945, "learning_rate": 2e-05, "loss": 0.03059526, "step": 22938 }, { "epoch": 45.878, "grad_norm": 0.8148857951164246, "learning_rate": 2e-05, "loss": 0.02271536, "step": 22939 }, { "epoch": 45.88, "grad_norm": 1.1037157773971558, "learning_rate": 2e-05, "loss": 0.05006099, "step": 22940 }, { "epoch": 45.882, "grad_norm": 1.7392206192016602, "learning_rate": 2e-05, "loss": 0.05934298, "step": 22941 }, { "epoch": 45.884, "grad_norm": 1.1167516708374023, "learning_rate": 2e-05, "loss": 0.05150472, "step": 22942 }, { "epoch": 45.886, "grad_norm": 1.1795276403427124, "learning_rate": 2e-05, "loss": 0.0336176, "step": 22943 }, { "epoch": 45.888, "grad_norm": 0.8127323985099792, "learning_rate": 2e-05, "loss": 0.0240802, "step": 22944 }, { "epoch": 45.89, "grad_norm": 1.0285916328430176, "learning_rate": 2e-05, "loss": 0.04078464, "step": 22945 }, { "epoch": 45.892, "grad_norm": 0.8910039067268372, "learning_rate": 2e-05, "loss": 0.02416823, "step": 22946 }, { "epoch": 45.894, "grad_norm": 0.9570349454879761, "learning_rate": 2e-05, "loss": 0.03280583, "step": 22947 }, { "epoch": 45.896, "grad_norm": 1.7092523574829102, "learning_rate": 2e-05, "loss": 0.05914164, "step": 22948 }, { "epoch": 45.898, "grad_norm": 1.166751503944397, "learning_rate": 2e-05, "loss": 0.03660767, "step": 22949 }, { "epoch": 45.9, "grad_norm": 1.32887864112854, "learning_rate": 2e-05, "loss": 0.04854822, "step": 22950 }, { "epoch": 45.902, "grad_norm": 1.245317816734314, "learning_rate": 2e-05, "loss": 0.03073648, "step": 22951 }, { "epoch": 45.904, "grad_norm": 0.8496929407119751, "learning_rate": 2e-05, "loss": 0.02464218, "step": 22952 }, { "epoch": 45.906, "grad_norm": 0.9057668447494507, "learning_rate": 2e-05, "loss": 0.03114329, "step": 22953 }, { "epoch": 45.908, "grad_norm": 1.1205207109451294, "learning_rate": 2e-05, "loss": 0.03626901, "step": 22954 }, { "epoch": 45.91, "grad_norm": 1.0607713460922241, "learning_rate": 2e-05, "loss": 0.04431652, "step": 22955 }, { "epoch": 45.912, "grad_norm": 0.935172438621521, "learning_rate": 2e-05, "loss": 0.03240005, "step": 22956 }, { "epoch": 45.914, "grad_norm": 1.3375014066696167, "learning_rate": 2e-05, "loss": 0.04643151, "step": 22957 }, { "epoch": 45.916, "grad_norm": 1.0213937759399414, "learning_rate": 2e-05, "loss": 0.0386704, "step": 22958 }, { "epoch": 45.918, "grad_norm": 1.0581977367401123, "learning_rate": 2e-05, "loss": 0.04323316, "step": 22959 }, { "epoch": 45.92, "grad_norm": 1.1347464323043823, "learning_rate": 2e-05, "loss": 0.04544326, "step": 22960 }, { "epoch": 45.922, "grad_norm": 1.0418998003005981, "learning_rate": 2e-05, "loss": 0.0339935, "step": 22961 }, { "epoch": 45.924, "grad_norm": 1.0169612169265747, "learning_rate": 2e-05, "loss": 0.03452504, "step": 22962 }, { "epoch": 45.926, "grad_norm": 1.2221120595932007, "learning_rate": 2e-05, "loss": 0.04586016, "step": 22963 }, { "epoch": 45.928, "grad_norm": 1.2962942123413086, "learning_rate": 2e-05, "loss": 0.03911393, "step": 22964 }, { "epoch": 45.93, "grad_norm": 1.124649167060852, "learning_rate": 2e-05, "loss": 0.0409162, "step": 22965 }, { "epoch": 45.932, "grad_norm": 1.2271252870559692, "learning_rate": 2e-05, "loss": 0.04452129, "step": 22966 }, { "epoch": 45.934, "grad_norm": 1.1133909225463867, "learning_rate": 2e-05, "loss": 0.0638171, "step": 22967 }, { "epoch": 45.936, "grad_norm": 1.205997347831726, "learning_rate": 2e-05, "loss": 0.04188669, "step": 22968 }, { "epoch": 45.938, "grad_norm": 1.0356420278549194, "learning_rate": 2e-05, "loss": 0.0454236, "step": 22969 }, { "epoch": 45.94, "grad_norm": 1.0883393287658691, "learning_rate": 2e-05, "loss": 0.05287962, "step": 22970 }, { "epoch": 45.942, "grad_norm": 1.0511224269866943, "learning_rate": 2e-05, "loss": 0.04458201, "step": 22971 }, { "epoch": 45.944, "grad_norm": 1.1505107879638672, "learning_rate": 2e-05, "loss": 0.04705449, "step": 22972 }, { "epoch": 45.946, "grad_norm": 1.0341613292694092, "learning_rate": 2e-05, "loss": 0.04306821, "step": 22973 }, { "epoch": 45.948, "grad_norm": 1.5687587261199951, "learning_rate": 2e-05, "loss": 0.06012786, "step": 22974 }, { "epoch": 45.95, "grad_norm": 1.0023505687713623, "learning_rate": 2e-05, "loss": 0.04756876, "step": 22975 }, { "epoch": 45.952, "grad_norm": 1.7120577096939087, "learning_rate": 2e-05, "loss": 0.04021297, "step": 22976 }, { "epoch": 45.954, "grad_norm": 1.2223602533340454, "learning_rate": 2e-05, "loss": 0.05187104, "step": 22977 }, { "epoch": 45.956, "grad_norm": 1.2133601903915405, "learning_rate": 2e-05, "loss": 0.0455308, "step": 22978 }, { "epoch": 45.958, "grad_norm": 0.8647286295890808, "learning_rate": 2e-05, "loss": 0.02877588, "step": 22979 }, { "epoch": 45.96, "grad_norm": 2.4779179096221924, "learning_rate": 2e-05, "loss": 0.05048584, "step": 22980 }, { "epoch": 45.962, "grad_norm": 1.208946704864502, "learning_rate": 2e-05, "loss": 0.04282832, "step": 22981 }, { "epoch": 45.964, "grad_norm": 1.050665020942688, "learning_rate": 2e-05, "loss": 0.03963824, "step": 22982 }, { "epoch": 45.966, "grad_norm": 1.4532694816589355, "learning_rate": 2e-05, "loss": 0.06232072, "step": 22983 }, { "epoch": 45.968, "grad_norm": 0.8150445222854614, "learning_rate": 2e-05, "loss": 0.02603839, "step": 22984 }, { "epoch": 45.97, "grad_norm": 0.877778947353363, "learning_rate": 2e-05, "loss": 0.02853515, "step": 22985 }, { "epoch": 45.972, "grad_norm": 0.9975916743278503, "learning_rate": 2e-05, "loss": 0.0395987, "step": 22986 }, { "epoch": 45.974, "grad_norm": 1.7292685508728027, "learning_rate": 2e-05, "loss": 0.0553902, "step": 22987 }, { "epoch": 45.976, "grad_norm": 2.2031028270721436, "learning_rate": 2e-05, "loss": 0.05389892, "step": 22988 }, { "epoch": 45.978, "grad_norm": 1.244927167892456, "learning_rate": 2e-05, "loss": 0.05231181, "step": 22989 }, { "epoch": 45.98, "grad_norm": 1.484117031097412, "learning_rate": 2e-05, "loss": 0.0488895, "step": 22990 }, { "epoch": 45.982, "grad_norm": 1.904813528060913, "learning_rate": 2e-05, "loss": 0.04904924, "step": 22991 }, { "epoch": 45.984, "grad_norm": 0.9134712815284729, "learning_rate": 2e-05, "loss": 0.02854902, "step": 22992 }, { "epoch": 45.986, "grad_norm": 1.0044866800308228, "learning_rate": 2e-05, "loss": 0.03731186, "step": 22993 }, { "epoch": 45.988, "grad_norm": 0.9467823505401611, "learning_rate": 2e-05, "loss": 0.0363578, "step": 22994 }, { "epoch": 45.99, "grad_norm": 1.31486976146698, "learning_rate": 2e-05, "loss": 0.03351432, "step": 22995 }, { "epoch": 45.992, "grad_norm": 0.8413812518119812, "learning_rate": 2e-05, "loss": 0.03210289, "step": 22996 }, { "epoch": 45.994, "grad_norm": 2.120770215988159, "learning_rate": 2e-05, "loss": 0.04027916, "step": 22997 }, { "epoch": 45.996, "grad_norm": 2.214883327484131, "learning_rate": 2e-05, "loss": 0.0631251, "step": 22998 }, { "epoch": 45.998, "grad_norm": 0.9259530305862427, "learning_rate": 2e-05, "loss": 0.03363225, "step": 22999 }, { "epoch": 46.0, "grad_norm": 1.3655165433883667, "learning_rate": 2e-05, "loss": 0.04936378, "step": 23000 }, { "epoch": 46.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9880239520958084, "Equal_1": 0.998, "Equal_2": 0.9840319361277445, "Equal_3": 0.9840319361277445, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.994, "Perpendicular_1": 1.0, "Perpendicular_2": 0.99, "Perpendicular_3": 0.8897795591182365, "PointLiesOnCircle_1": 0.9959919839679359, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9959919839679359, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9760479041916168 }, "eval_runtime": 226.6926, "eval_samples_per_second": 46.318, "eval_steps_per_second": 0.926, "step": 23000 }, { "epoch": 46.002, "grad_norm": 1.3411600589752197, "learning_rate": 2e-05, "loss": 0.06362125, "step": 23001 }, { "epoch": 46.004, "grad_norm": 1.1186604499816895, "learning_rate": 2e-05, "loss": 0.05206663, "step": 23002 }, { "epoch": 46.006, "grad_norm": 1.0327969789505005, "learning_rate": 2e-05, "loss": 0.04788616, "step": 23003 }, { "epoch": 46.008, "grad_norm": 1.2077038288116455, "learning_rate": 2e-05, "loss": 0.0389897, "step": 23004 }, { "epoch": 46.01, "grad_norm": 1.3198155164718628, "learning_rate": 2e-05, "loss": 0.05596844, "step": 23005 }, { "epoch": 46.012, "grad_norm": 1.8129225969314575, "learning_rate": 2e-05, "loss": 0.05204503, "step": 23006 }, { "epoch": 46.014, "grad_norm": 1.2729936838150024, "learning_rate": 2e-05, "loss": 0.03887081, "step": 23007 }, { "epoch": 46.016, "grad_norm": 0.9993786811828613, "learning_rate": 2e-05, "loss": 0.04473443, "step": 23008 }, { "epoch": 46.018, "grad_norm": 1.3990881443023682, "learning_rate": 2e-05, "loss": 0.05093615, "step": 23009 }, { "epoch": 46.02, "grad_norm": 0.9721465110778809, "learning_rate": 2e-05, "loss": 0.03677879, "step": 23010 }, { "epoch": 46.022, "grad_norm": 1.1848127841949463, "learning_rate": 2e-05, "loss": 0.04001631, "step": 23011 }, { "epoch": 46.024, "grad_norm": 1.1356348991394043, "learning_rate": 2e-05, "loss": 0.0425076, "step": 23012 }, { "epoch": 46.026, "grad_norm": 1.88307523727417, "learning_rate": 2e-05, "loss": 0.05549126, "step": 23013 }, { "epoch": 46.028, "grad_norm": 1.173237681388855, "learning_rate": 2e-05, "loss": 0.04677022, "step": 23014 }, { "epoch": 46.03, "grad_norm": 0.977333128452301, "learning_rate": 2e-05, "loss": 0.03571994, "step": 23015 }, { "epoch": 46.032, "grad_norm": 1.2675398588180542, "learning_rate": 2e-05, "loss": 0.03351235, "step": 23016 }, { "epoch": 46.034, "grad_norm": 0.9801085591316223, "learning_rate": 2e-05, "loss": 0.02896846, "step": 23017 }, { "epoch": 46.036, "grad_norm": 1.3268541097640991, "learning_rate": 2e-05, "loss": 0.04963908, "step": 23018 }, { "epoch": 46.038, "grad_norm": 1.0372778177261353, "learning_rate": 2e-05, "loss": 0.04053154, "step": 23019 }, { "epoch": 46.04, "grad_norm": 0.8788153529167175, "learning_rate": 2e-05, "loss": 0.03195224, "step": 23020 }, { "epoch": 46.042, "grad_norm": 0.823895275592804, "learning_rate": 2e-05, "loss": 0.02677827, "step": 23021 }, { "epoch": 46.044, "grad_norm": 1.124933123588562, "learning_rate": 2e-05, "loss": 0.05068642, "step": 23022 }, { "epoch": 46.046, "grad_norm": 2.35408616065979, "learning_rate": 2e-05, "loss": 0.03417914, "step": 23023 }, { "epoch": 46.048, "grad_norm": 1.326403021812439, "learning_rate": 2e-05, "loss": 0.05060087, "step": 23024 }, { "epoch": 46.05, "grad_norm": 2.8667726516723633, "learning_rate": 2e-05, "loss": 0.04179538, "step": 23025 }, { "epoch": 46.052, "grad_norm": 1.3784321546554565, "learning_rate": 2e-05, "loss": 0.04331154, "step": 23026 }, { "epoch": 46.054, "grad_norm": 1.2534751892089844, "learning_rate": 2e-05, "loss": 0.06876025, "step": 23027 }, { "epoch": 46.056, "grad_norm": 1.017913579940796, "learning_rate": 2e-05, "loss": 0.03826562, "step": 23028 }, { "epoch": 46.058, "grad_norm": 1.2315905094146729, "learning_rate": 2e-05, "loss": 0.03713519, "step": 23029 }, { "epoch": 46.06, "grad_norm": 1.5021116733551025, "learning_rate": 2e-05, "loss": 0.0436249, "step": 23030 }, { "epoch": 46.062, "grad_norm": 1.1392513513565063, "learning_rate": 2e-05, "loss": 0.0369242, "step": 23031 }, { "epoch": 46.064, "grad_norm": 1.0997964143753052, "learning_rate": 2e-05, "loss": 0.05487487, "step": 23032 }, { "epoch": 46.066, "grad_norm": 1.3098500967025757, "learning_rate": 2e-05, "loss": 0.04130578, "step": 23033 }, { "epoch": 46.068, "grad_norm": 1.5100171566009521, "learning_rate": 2e-05, "loss": 0.0550752, "step": 23034 }, { "epoch": 46.07, "grad_norm": 1.0542734861373901, "learning_rate": 2e-05, "loss": 0.04020436, "step": 23035 }, { "epoch": 46.072, "grad_norm": 1.0686390399932861, "learning_rate": 2e-05, "loss": 0.05052323, "step": 23036 }, { "epoch": 46.074, "grad_norm": 0.9055821299552917, "learning_rate": 2e-05, "loss": 0.03061108, "step": 23037 }, { "epoch": 46.076, "grad_norm": 1.3100045919418335, "learning_rate": 2e-05, "loss": 0.06069642, "step": 23038 }, { "epoch": 46.078, "grad_norm": 1.1237154006958008, "learning_rate": 2e-05, "loss": 0.050055, "step": 23039 }, { "epoch": 46.08, "grad_norm": 2.2940642833709717, "learning_rate": 2e-05, "loss": 0.0554807, "step": 23040 }, { "epoch": 46.082, "grad_norm": 1.3819749355316162, "learning_rate": 2e-05, "loss": 0.05044323, "step": 23041 }, { "epoch": 46.084, "grad_norm": 1.1651939153671265, "learning_rate": 2e-05, "loss": 0.0385211, "step": 23042 }, { "epoch": 46.086, "grad_norm": 1.8606843948364258, "learning_rate": 2e-05, "loss": 0.0595234, "step": 23043 }, { "epoch": 46.088, "grad_norm": 0.7811092138290405, "learning_rate": 2e-05, "loss": 0.0264758, "step": 23044 }, { "epoch": 46.09, "grad_norm": 2.5626580715179443, "learning_rate": 2e-05, "loss": 0.03954588, "step": 23045 }, { "epoch": 46.092, "grad_norm": 0.9564827680587769, "learning_rate": 2e-05, "loss": 0.04492767, "step": 23046 }, { "epoch": 46.094, "grad_norm": 1.3334673643112183, "learning_rate": 2e-05, "loss": 0.05920311, "step": 23047 }, { "epoch": 46.096, "grad_norm": 1.1041178703308105, "learning_rate": 2e-05, "loss": 0.03735137, "step": 23048 }, { "epoch": 46.098, "grad_norm": 1.2948436737060547, "learning_rate": 2e-05, "loss": 0.04420089, "step": 23049 }, { "epoch": 46.1, "grad_norm": 0.9874499440193176, "learning_rate": 2e-05, "loss": 0.03790975, "step": 23050 }, { "epoch": 46.102, "grad_norm": 0.995373547077179, "learning_rate": 2e-05, "loss": 0.03889161, "step": 23051 }, { "epoch": 46.104, "grad_norm": 0.9990857243537903, "learning_rate": 2e-05, "loss": 0.05412283, "step": 23052 }, { "epoch": 46.106, "grad_norm": 1.2271995544433594, "learning_rate": 2e-05, "loss": 0.05424345, "step": 23053 }, { "epoch": 46.108, "grad_norm": 1.2135752439498901, "learning_rate": 2e-05, "loss": 0.05443169, "step": 23054 }, { "epoch": 46.11, "grad_norm": 1.0776118040084839, "learning_rate": 2e-05, "loss": 0.04149918, "step": 23055 }, { "epoch": 46.112, "grad_norm": 1.795495629310608, "learning_rate": 2e-05, "loss": 0.05155989, "step": 23056 }, { "epoch": 46.114, "grad_norm": 0.8689935803413391, "learning_rate": 2e-05, "loss": 0.03793199, "step": 23057 }, { "epoch": 46.116, "grad_norm": 1.0192699432373047, "learning_rate": 2e-05, "loss": 0.04543822, "step": 23058 }, { "epoch": 46.118, "grad_norm": 1.0082510709762573, "learning_rate": 2e-05, "loss": 0.03871465, "step": 23059 }, { "epoch": 46.12, "grad_norm": 0.9681976437568665, "learning_rate": 2e-05, "loss": 0.03222036, "step": 23060 }, { "epoch": 46.122, "grad_norm": 1.0463489294052124, "learning_rate": 2e-05, "loss": 0.04720536, "step": 23061 }, { "epoch": 46.124, "grad_norm": 0.8213353157043457, "learning_rate": 2e-05, "loss": 0.02444438, "step": 23062 }, { "epoch": 46.126, "grad_norm": 1.083770513534546, "learning_rate": 2e-05, "loss": 0.03636256, "step": 23063 }, { "epoch": 46.128, "grad_norm": 1.0933585166931152, "learning_rate": 2e-05, "loss": 0.03505598, "step": 23064 }, { "epoch": 46.13, "grad_norm": 1.116616129875183, "learning_rate": 2e-05, "loss": 0.04948156, "step": 23065 }, { "epoch": 46.132, "grad_norm": 1.1201558113098145, "learning_rate": 2e-05, "loss": 0.03833582, "step": 23066 }, { "epoch": 46.134, "grad_norm": 1.004522681236267, "learning_rate": 2e-05, "loss": 0.03476013, "step": 23067 }, { "epoch": 46.136, "grad_norm": 1.4105441570281982, "learning_rate": 2e-05, "loss": 0.05704508, "step": 23068 }, { "epoch": 46.138, "grad_norm": 0.8976925015449524, "learning_rate": 2e-05, "loss": 0.02931217, "step": 23069 }, { "epoch": 46.14, "grad_norm": 1.0339791774749756, "learning_rate": 2e-05, "loss": 0.03369311, "step": 23070 }, { "epoch": 46.142, "grad_norm": 1.1785179376602173, "learning_rate": 2e-05, "loss": 0.05449614, "step": 23071 }, { "epoch": 46.144, "grad_norm": 1.1329402923583984, "learning_rate": 2e-05, "loss": 0.03269614, "step": 23072 }, { "epoch": 46.146, "grad_norm": 1.3512535095214844, "learning_rate": 2e-05, "loss": 0.03866613, "step": 23073 }, { "epoch": 46.148, "grad_norm": 1.152319312095642, "learning_rate": 2e-05, "loss": 0.04731422, "step": 23074 }, { "epoch": 46.15, "grad_norm": 1.7177672386169434, "learning_rate": 2e-05, "loss": 0.03752731, "step": 23075 }, { "epoch": 46.152, "grad_norm": 3.9961130619049072, "learning_rate": 2e-05, "loss": 0.04979374, "step": 23076 }, { "epoch": 46.154, "grad_norm": 1.1938670873641968, "learning_rate": 2e-05, "loss": 0.05413944, "step": 23077 }, { "epoch": 46.156, "grad_norm": 0.942374050617218, "learning_rate": 2e-05, "loss": 0.03267742, "step": 23078 }, { "epoch": 46.158, "grad_norm": 1.2797523736953735, "learning_rate": 2e-05, "loss": 0.0434095, "step": 23079 }, { "epoch": 46.16, "grad_norm": 1.3832670450210571, "learning_rate": 2e-05, "loss": 0.06774931, "step": 23080 }, { "epoch": 46.162, "grad_norm": 1.2374390363693237, "learning_rate": 2e-05, "loss": 0.04107068, "step": 23081 }, { "epoch": 46.164, "grad_norm": 1.001096487045288, "learning_rate": 2e-05, "loss": 0.03511919, "step": 23082 }, { "epoch": 46.166, "grad_norm": 1.7459917068481445, "learning_rate": 2e-05, "loss": 0.04571071, "step": 23083 }, { "epoch": 46.168, "grad_norm": 1.1298414468765259, "learning_rate": 2e-05, "loss": 0.05018446, "step": 23084 }, { "epoch": 46.17, "grad_norm": 1.841927170753479, "learning_rate": 2e-05, "loss": 0.04758389, "step": 23085 }, { "epoch": 46.172, "grad_norm": 0.9143821001052856, "learning_rate": 2e-05, "loss": 0.03706867, "step": 23086 }, { "epoch": 46.174, "grad_norm": 1.040521502494812, "learning_rate": 2e-05, "loss": 0.04647394, "step": 23087 }, { "epoch": 46.176, "grad_norm": 0.8499959111213684, "learning_rate": 2e-05, "loss": 0.02701177, "step": 23088 }, { "epoch": 46.178, "grad_norm": 2.281643867492676, "learning_rate": 2e-05, "loss": 0.04630378, "step": 23089 }, { "epoch": 46.18, "grad_norm": 1.0684516429901123, "learning_rate": 2e-05, "loss": 0.04115474, "step": 23090 }, { "epoch": 46.182, "grad_norm": 8.098402976989746, "learning_rate": 2e-05, "loss": 0.0657938, "step": 23091 }, { "epoch": 46.184, "grad_norm": 1.3023324012756348, "learning_rate": 2e-05, "loss": 0.06075235, "step": 23092 }, { "epoch": 46.186, "grad_norm": 1.6482465267181396, "learning_rate": 2e-05, "loss": 0.03885299, "step": 23093 }, { "epoch": 46.188, "grad_norm": 2.2150909900665283, "learning_rate": 2e-05, "loss": 0.04685253, "step": 23094 }, { "epoch": 46.19, "grad_norm": 1.4253731966018677, "learning_rate": 2e-05, "loss": 0.03169043, "step": 23095 }, { "epoch": 46.192, "grad_norm": 1.1500582695007324, "learning_rate": 2e-05, "loss": 0.04959618, "step": 23096 }, { "epoch": 46.194, "grad_norm": 1.7867132425308228, "learning_rate": 2e-05, "loss": 0.0311187, "step": 23097 }, { "epoch": 46.196, "grad_norm": 1.0293912887573242, "learning_rate": 2e-05, "loss": 0.04199155, "step": 23098 }, { "epoch": 46.198, "grad_norm": 1.1349602937698364, "learning_rate": 2e-05, "loss": 0.04046186, "step": 23099 }, { "epoch": 46.2, "grad_norm": 1.0206743478775024, "learning_rate": 2e-05, "loss": 0.03941879, "step": 23100 }, { "epoch": 46.202, "grad_norm": 1.2527531385421753, "learning_rate": 2e-05, "loss": 0.04891253, "step": 23101 }, { "epoch": 46.204, "grad_norm": 1.7768917083740234, "learning_rate": 2e-05, "loss": 0.04748125, "step": 23102 }, { "epoch": 46.206, "grad_norm": 1.1661195755004883, "learning_rate": 2e-05, "loss": 0.03509434, "step": 23103 }, { "epoch": 46.208, "grad_norm": 1.221444845199585, "learning_rate": 2e-05, "loss": 0.0468955, "step": 23104 }, { "epoch": 46.21, "grad_norm": 1.0466692447662354, "learning_rate": 2e-05, "loss": 0.04122563, "step": 23105 }, { "epoch": 46.212, "grad_norm": 1.4518938064575195, "learning_rate": 2e-05, "loss": 0.04789133, "step": 23106 }, { "epoch": 46.214, "grad_norm": 1.1273928880691528, "learning_rate": 2e-05, "loss": 0.04992475, "step": 23107 }, { "epoch": 46.216, "grad_norm": 1.077121376991272, "learning_rate": 2e-05, "loss": 0.0458326, "step": 23108 }, { "epoch": 46.218, "grad_norm": 1.1216061115264893, "learning_rate": 2e-05, "loss": 0.03857627, "step": 23109 }, { "epoch": 46.22, "grad_norm": 1.1717976331710815, "learning_rate": 2e-05, "loss": 0.03152472, "step": 23110 }, { "epoch": 46.222, "grad_norm": 1.4770869016647339, "learning_rate": 2e-05, "loss": 0.03796066, "step": 23111 }, { "epoch": 46.224, "grad_norm": 1.5723117589950562, "learning_rate": 2e-05, "loss": 0.05366229, "step": 23112 }, { "epoch": 46.226, "grad_norm": 1.0255879163742065, "learning_rate": 2e-05, "loss": 0.04378489, "step": 23113 }, { "epoch": 46.228, "grad_norm": 1.8259856700897217, "learning_rate": 2e-05, "loss": 0.05994576, "step": 23114 }, { "epoch": 46.23, "grad_norm": 0.9747894406318665, "learning_rate": 2e-05, "loss": 0.03734994, "step": 23115 }, { "epoch": 46.232, "grad_norm": 1.0502595901489258, "learning_rate": 2e-05, "loss": 0.04518704, "step": 23116 }, { "epoch": 46.234, "grad_norm": 1.010096549987793, "learning_rate": 2e-05, "loss": 0.0455133, "step": 23117 }, { "epoch": 46.236, "grad_norm": 1.231152057647705, "learning_rate": 2e-05, "loss": 0.0393187, "step": 23118 }, { "epoch": 46.238, "grad_norm": 1.2258777618408203, "learning_rate": 2e-05, "loss": 0.03397952, "step": 23119 }, { "epoch": 46.24, "grad_norm": 1.0938799381256104, "learning_rate": 2e-05, "loss": 0.04228405, "step": 23120 }, { "epoch": 46.242, "grad_norm": 1.639535903930664, "learning_rate": 2e-05, "loss": 0.07059435, "step": 23121 }, { "epoch": 46.244, "grad_norm": 1.216543436050415, "learning_rate": 2e-05, "loss": 0.04951358, "step": 23122 }, { "epoch": 46.246, "grad_norm": 0.9466731548309326, "learning_rate": 2e-05, "loss": 0.03183299, "step": 23123 }, { "epoch": 46.248, "grad_norm": 3.59386944770813, "learning_rate": 2e-05, "loss": 0.05288833, "step": 23124 }, { "epoch": 46.25, "grad_norm": 1.3006633520126343, "learning_rate": 2e-05, "loss": 0.04736608, "step": 23125 }, { "epoch": 46.252, "grad_norm": 1.0427287817001343, "learning_rate": 2e-05, "loss": 0.03730084, "step": 23126 }, { "epoch": 46.254, "grad_norm": 1.8576079607009888, "learning_rate": 2e-05, "loss": 0.04765089, "step": 23127 }, { "epoch": 46.256, "grad_norm": 1.1918314695358276, "learning_rate": 2e-05, "loss": 0.05522872, "step": 23128 }, { "epoch": 46.258, "grad_norm": 1.1226614713668823, "learning_rate": 2e-05, "loss": 0.04539641, "step": 23129 }, { "epoch": 46.26, "grad_norm": 1.0184094905853271, "learning_rate": 2e-05, "loss": 0.04293793, "step": 23130 }, { "epoch": 46.262, "grad_norm": 1.1844059228897095, "learning_rate": 2e-05, "loss": 0.04822522, "step": 23131 }, { "epoch": 46.264, "grad_norm": 0.9364579319953918, "learning_rate": 2e-05, "loss": 0.04798924, "step": 23132 }, { "epoch": 46.266, "grad_norm": 1.411163330078125, "learning_rate": 2e-05, "loss": 0.03959201, "step": 23133 }, { "epoch": 46.268, "grad_norm": 1.0259884595870972, "learning_rate": 2e-05, "loss": 0.03633114, "step": 23134 }, { "epoch": 46.27, "grad_norm": 1.0832723379135132, "learning_rate": 2e-05, "loss": 0.05034465, "step": 23135 }, { "epoch": 46.272, "grad_norm": 0.7679703831672668, "learning_rate": 2e-05, "loss": 0.02650321, "step": 23136 }, { "epoch": 46.274, "grad_norm": 1.0095412731170654, "learning_rate": 2e-05, "loss": 0.0415194, "step": 23137 }, { "epoch": 46.276, "grad_norm": 1.2131091356277466, "learning_rate": 2e-05, "loss": 0.05910186, "step": 23138 }, { "epoch": 46.278, "grad_norm": 1.048909306526184, "learning_rate": 2e-05, "loss": 0.03543817, "step": 23139 }, { "epoch": 46.28, "grad_norm": 1.1752052307128906, "learning_rate": 2e-05, "loss": 0.02536641, "step": 23140 }, { "epoch": 46.282, "grad_norm": 0.9702813029289246, "learning_rate": 2e-05, "loss": 0.04765216, "step": 23141 }, { "epoch": 46.284, "grad_norm": 1.0956237316131592, "learning_rate": 2e-05, "loss": 0.05443672, "step": 23142 }, { "epoch": 46.286, "grad_norm": 1.1319798231124878, "learning_rate": 2e-05, "loss": 0.05342951, "step": 23143 }, { "epoch": 46.288, "grad_norm": 2.1958091259002686, "learning_rate": 2e-05, "loss": 0.03490547, "step": 23144 }, { "epoch": 46.29, "grad_norm": 1.0097414255142212, "learning_rate": 2e-05, "loss": 0.03385946, "step": 23145 }, { "epoch": 46.292, "grad_norm": 1.076635718345642, "learning_rate": 2e-05, "loss": 0.03888563, "step": 23146 }, { "epoch": 46.294, "grad_norm": 0.9494529962539673, "learning_rate": 2e-05, "loss": 0.03757528, "step": 23147 }, { "epoch": 46.296, "grad_norm": 1.1100249290466309, "learning_rate": 2e-05, "loss": 0.03757155, "step": 23148 }, { "epoch": 46.298, "grad_norm": 1.0450470447540283, "learning_rate": 2e-05, "loss": 0.04235532, "step": 23149 }, { "epoch": 46.3, "grad_norm": 0.9347238540649414, "learning_rate": 2e-05, "loss": 0.04283817, "step": 23150 }, { "epoch": 46.302, "grad_norm": 1.0288108587265015, "learning_rate": 2e-05, "loss": 0.04069893, "step": 23151 }, { "epoch": 46.304, "grad_norm": 0.9015113115310669, "learning_rate": 2e-05, "loss": 0.02779032, "step": 23152 }, { "epoch": 46.306, "grad_norm": 0.9887634515762329, "learning_rate": 2e-05, "loss": 0.04353015, "step": 23153 }, { "epoch": 46.308, "grad_norm": 1.2101191282272339, "learning_rate": 2e-05, "loss": 0.03850481, "step": 23154 }, { "epoch": 46.31, "grad_norm": 1.157342553138733, "learning_rate": 2e-05, "loss": 0.04204225, "step": 23155 }, { "epoch": 46.312, "grad_norm": 0.9774017930030823, "learning_rate": 2e-05, "loss": 0.04283302, "step": 23156 }, { "epoch": 46.314, "grad_norm": 0.9889634251594543, "learning_rate": 2e-05, "loss": 0.04440223, "step": 23157 }, { "epoch": 46.316, "grad_norm": 1.2580952644348145, "learning_rate": 2e-05, "loss": 0.04796872, "step": 23158 }, { "epoch": 46.318, "grad_norm": 1.1707109212875366, "learning_rate": 2e-05, "loss": 0.04368784, "step": 23159 }, { "epoch": 46.32, "grad_norm": 1.3061223030090332, "learning_rate": 2e-05, "loss": 0.03672884, "step": 23160 }, { "epoch": 46.322, "grad_norm": 1.0868148803710938, "learning_rate": 2e-05, "loss": 0.04590251, "step": 23161 }, { "epoch": 46.324, "grad_norm": 1.1951301097869873, "learning_rate": 2e-05, "loss": 0.06367991, "step": 23162 }, { "epoch": 46.326, "grad_norm": 1.242278814315796, "learning_rate": 2e-05, "loss": 0.03271909, "step": 23163 }, { "epoch": 46.328, "grad_norm": 1.3260518312454224, "learning_rate": 2e-05, "loss": 0.04263738, "step": 23164 }, { "epoch": 46.33, "grad_norm": 1.110639214515686, "learning_rate": 2e-05, "loss": 0.03704098, "step": 23165 }, { "epoch": 46.332, "grad_norm": 1.2617560625076294, "learning_rate": 2e-05, "loss": 0.04218528, "step": 23166 }, { "epoch": 46.334, "grad_norm": 1.0754046440124512, "learning_rate": 2e-05, "loss": 0.04144373, "step": 23167 }, { "epoch": 46.336, "grad_norm": 1.0428435802459717, "learning_rate": 2e-05, "loss": 0.03930929, "step": 23168 }, { "epoch": 46.338, "grad_norm": 0.8598251938819885, "learning_rate": 2e-05, "loss": 0.03141431, "step": 23169 }, { "epoch": 46.34, "grad_norm": 0.9979385733604431, "learning_rate": 2e-05, "loss": 0.03066301, "step": 23170 }, { "epoch": 46.342, "grad_norm": 1.1016943454742432, "learning_rate": 2e-05, "loss": 0.04615101, "step": 23171 }, { "epoch": 46.344, "grad_norm": 1.0208326578140259, "learning_rate": 2e-05, "loss": 0.03933339, "step": 23172 }, { "epoch": 46.346, "grad_norm": 1.2449842691421509, "learning_rate": 2e-05, "loss": 0.0369473, "step": 23173 }, { "epoch": 46.348, "grad_norm": 1.333295464515686, "learning_rate": 2e-05, "loss": 0.04923746, "step": 23174 }, { "epoch": 46.35, "grad_norm": 1.2592405080795288, "learning_rate": 2e-05, "loss": 0.05858491, "step": 23175 }, { "epoch": 46.352, "grad_norm": 1.1182934045791626, "learning_rate": 2e-05, "loss": 0.05057877, "step": 23176 }, { "epoch": 46.354, "grad_norm": 1.1294434070587158, "learning_rate": 2e-05, "loss": 0.05497491, "step": 23177 }, { "epoch": 46.356, "grad_norm": 1.1283516883850098, "learning_rate": 2e-05, "loss": 0.03130652, "step": 23178 }, { "epoch": 46.358, "grad_norm": 0.9358503818511963, "learning_rate": 2e-05, "loss": 0.02673047, "step": 23179 }, { "epoch": 46.36, "grad_norm": 1.0426762104034424, "learning_rate": 2e-05, "loss": 0.04429452, "step": 23180 }, { "epoch": 46.362, "grad_norm": 1.1305820941925049, "learning_rate": 2e-05, "loss": 0.04509917, "step": 23181 }, { "epoch": 46.364, "grad_norm": 1.2027643918991089, "learning_rate": 2e-05, "loss": 0.05950527, "step": 23182 }, { "epoch": 46.366, "grad_norm": 1.3041430711746216, "learning_rate": 2e-05, "loss": 0.05860223, "step": 23183 }, { "epoch": 46.368, "grad_norm": 3.1757078170776367, "learning_rate": 2e-05, "loss": 0.05310137, "step": 23184 }, { "epoch": 46.37, "grad_norm": 1.5121148824691772, "learning_rate": 2e-05, "loss": 0.05002122, "step": 23185 }, { "epoch": 46.372, "grad_norm": 2.1533706188201904, "learning_rate": 2e-05, "loss": 0.06442088, "step": 23186 }, { "epoch": 46.374, "grad_norm": 6.473106861114502, "learning_rate": 2e-05, "loss": 0.0365488, "step": 23187 }, { "epoch": 46.376, "grad_norm": 1.1027642488479614, "learning_rate": 2e-05, "loss": 0.04938496, "step": 23188 }, { "epoch": 46.378, "grad_norm": 1.1982091665267944, "learning_rate": 2e-05, "loss": 0.05867774, "step": 23189 }, { "epoch": 46.38, "grad_norm": 1.0916969776153564, "learning_rate": 2e-05, "loss": 0.04816931, "step": 23190 }, { "epoch": 46.382, "grad_norm": 1.1230005025863647, "learning_rate": 2e-05, "loss": 0.03694833, "step": 23191 }, { "epoch": 46.384, "grad_norm": 1.0412832498550415, "learning_rate": 2e-05, "loss": 0.03650439, "step": 23192 }, { "epoch": 46.386, "grad_norm": 1.1216541528701782, "learning_rate": 2e-05, "loss": 0.04930431, "step": 23193 }, { "epoch": 46.388, "grad_norm": 1.952504277229309, "learning_rate": 2e-05, "loss": 0.03766615, "step": 23194 }, { "epoch": 46.39, "grad_norm": 1.2502418756484985, "learning_rate": 2e-05, "loss": 0.04829918, "step": 23195 }, { "epoch": 46.392, "grad_norm": 0.9572649598121643, "learning_rate": 2e-05, "loss": 0.04349612, "step": 23196 }, { "epoch": 46.394, "grad_norm": 1.0906199216842651, "learning_rate": 2e-05, "loss": 0.04060723, "step": 23197 }, { "epoch": 46.396, "grad_norm": 1.0450413227081299, "learning_rate": 2e-05, "loss": 0.04865713, "step": 23198 }, { "epoch": 46.398, "grad_norm": 1.3935647010803223, "learning_rate": 2e-05, "loss": 0.04055649, "step": 23199 }, { "epoch": 46.4, "grad_norm": 1.038891077041626, "learning_rate": 2e-05, "loss": 0.03608476, "step": 23200 }, { "epoch": 46.402, "grad_norm": 1.150534749031067, "learning_rate": 2e-05, "loss": 0.06265096, "step": 23201 }, { "epoch": 46.404, "grad_norm": 2.0182018280029297, "learning_rate": 2e-05, "loss": 0.04380609, "step": 23202 }, { "epoch": 46.406, "grad_norm": 1.0963680744171143, "learning_rate": 2e-05, "loss": 0.0363346, "step": 23203 }, { "epoch": 46.408, "grad_norm": 1.4175118207931519, "learning_rate": 2e-05, "loss": 0.04934303, "step": 23204 }, { "epoch": 46.41, "grad_norm": 1.4840285778045654, "learning_rate": 2e-05, "loss": 0.03351284, "step": 23205 }, { "epoch": 46.412, "grad_norm": 1.1490660905838013, "learning_rate": 2e-05, "loss": 0.04890034, "step": 23206 }, { "epoch": 46.414, "grad_norm": 1.6888747215270996, "learning_rate": 2e-05, "loss": 0.04819397, "step": 23207 }, { "epoch": 46.416, "grad_norm": 0.9434964060783386, "learning_rate": 2e-05, "loss": 0.03211683, "step": 23208 }, { "epoch": 46.418, "grad_norm": 1.0164737701416016, "learning_rate": 2e-05, "loss": 0.0403863, "step": 23209 }, { "epoch": 46.42, "grad_norm": 1.2772653102874756, "learning_rate": 2e-05, "loss": 0.02574966, "step": 23210 }, { "epoch": 46.422, "grad_norm": 0.8171355128288269, "learning_rate": 2e-05, "loss": 0.02824315, "step": 23211 }, { "epoch": 46.424, "grad_norm": 0.9972546696662903, "learning_rate": 2e-05, "loss": 0.04242121, "step": 23212 }, { "epoch": 46.426, "grad_norm": 1.4012272357940674, "learning_rate": 2e-05, "loss": 0.0244806, "step": 23213 }, { "epoch": 46.428, "grad_norm": 2.119521379470825, "learning_rate": 2e-05, "loss": 0.03294837, "step": 23214 }, { "epoch": 46.43, "grad_norm": 1.2812317609786987, "learning_rate": 2e-05, "loss": 0.05517238, "step": 23215 }, { "epoch": 46.432, "grad_norm": 0.9635146856307983, "learning_rate": 2e-05, "loss": 0.03169879, "step": 23216 }, { "epoch": 46.434, "grad_norm": 0.8792754411697388, "learning_rate": 2e-05, "loss": 0.03489258, "step": 23217 }, { "epoch": 46.436, "grad_norm": 1.111374855041504, "learning_rate": 2e-05, "loss": 0.0448352, "step": 23218 }, { "epoch": 46.438, "grad_norm": 1.0885506868362427, "learning_rate": 2e-05, "loss": 0.05479189, "step": 23219 }, { "epoch": 46.44, "grad_norm": 1.125364899635315, "learning_rate": 2e-05, "loss": 0.03700017, "step": 23220 }, { "epoch": 46.442, "grad_norm": 1.4658682346343994, "learning_rate": 2e-05, "loss": 0.05805753, "step": 23221 }, { "epoch": 46.444, "grad_norm": 0.81126868724823, "learning_rate": 2e-05, "loss": 0.0312567, "step": 23222 }, { "epoch": 46.446, "grad_norm": 1.2564938068389893, "learning_rate": 2e-05, "loss": 0.05650258, "step": 23223 }, { "epoch": 46.448, "grad_norm": 0.9850262403488159, "learning_rate": 2e-05, "loss": 0.03819663, "step": 23224 }, { "epoch": 46.45, "grad_norm": 1.1063578128814697, "learning_rate": 2e-05, "loss": 0.03865373, "step": 23225 }, { "epoch": 46.452, "grad_norm": 1.2721880674362183, "learning_rate": 2e-05, "loss": 0.04457515, "step": 23226 }, { "epoch": 46.454, "grad_norm": 1.0539815425872803, "learning_rate": 2e-05, "loss": 0.03491284, "step": 23227 }, { "epoch": 46.456, "grad_norm": 1.023163914680481, "learning_rate": 2e-05, "loss": 0.0340073, "step": 23228 }, { "epoch": 46.458, "grad_norm": 1.134781002998352, "learning_rate": 2e-05, "loss": 0.06038333, "step": 23229 }, { "epoch": 46.46, "grad_norm": 1.1179391145706177, "learning_rate": 2e-05, "loss": 0.04021306, "step": 23230 }, { "epoch": 46.462, "grad_norm": 1.1528855562210083, "learning_rate": 2e-05, "loss": 0.04735453, "step": 23231 }, { "epoch": 46.464, "grad_norm": 1.3129547834396362, "learning_rate": 2e-05, "loss": 0.05114962, "step": 23232 }, { "epoch": 46.466, "grad_norm": 1.27275550365448, "learning_rate": 2e-05, "loss": 0.0420683, "step": 23233 }, { "epoch": 46.468, "grad_norm": 1.5291706323623657, "learning_rate": 2e-05, "loss": 0.03138549, "step": 23234 }, { "epoch": 46.47, "grad_norm": 0.9122021794319153, "learning_rate": 2e-05, "loss": 0.02888708, "step": 23235 }, { "epoch": 46.472, "grad_norm": 1.5571200847625732, "learning_rate": 2e-05, "loss": 0.07933204, "step": 23236 }, { "epoch": 46.474, "grad_norm": 1.0114980936050415, "learning_rate": 2e-05, "loss": 0.03738384, "step": 23237 }, { "epoch": 46.476, "grad_norm": 1.1580678224563599, "learning_rate": 2e-05, "loss": 0.04629489, "step": 23238 }, { "epoch": 46.478, "grad_norm": 1.065160870552063, "learning_rate": 2e-05, "loss": 0.04427964, "step": 23239 }, { "epoch": 46.48, "grad_norm": 1.788196086883545, "learning_rate": 2e-05, "loss": 0.04720298, "step": 23240 }, { "epoch": 46.482, "grad_norm": 0.9789221286773682, "learning_rate": 2e-05, "loss": 0.04213333, "step": 23241 }, { "epoch": 46.484, "grad_norm": 1.0629030466079712, "learning_rate": 2e-05, "loss": 0.03892981, "step": 23242 }, { "epoch": 46.486, "grad_norm": 1.209897756576538, "learning_rate": 2e-05, "loss": 0.04201607, "step": 23243 }, { "epoch": 46.488, "grad_norm": 0.963427722454071, "learning_rate": 2e-05, "loss": 0.03765631, "step": 23244 }, { "epoch": 46.49, "grad_norm": 0.9009614586830139, "learning_rate": 2e-05, "loss": 0.03484703, "step": 23245 }, { "epoch": 46.492, "grad_norm": 0.930345356464386, "learning_rate": 2e-05, "loss": 0.03707998, "step": 23246 }, { "epoch": 46.494, "grad_norm": 1.4231634140014648, "learning_rate": 2e-05, "loss": 0.06464037, "step": 23247 }, { "epoch": 46.496, "grad_norm": 0.9478148818016052, "learning_rate": 2e-05, "loss": 0.03427771, "step": 23248 }, { "epoch": 46.498, "grad_norm": 3.2437808513641357, "learning_rate": 2e-05, "loss": 0.05237136, "step": 23249 }, { "epoch": 46.5, "grad_norm": 0.9787329435348511, "learning_rate": 2e-05, "loss": 0.03360103, "step": 23250 }, { "epoch": 46.502, "grad_norm": 0.9565979838371277, "learning_rate": 2e-05, "loss": 0.03792033, "step": 23251 }, { "epoch": 46.504, "grad_norm": 1.102731704711914, "learning_rate": 2e-05, "loss": 0.03952245, "step": 23252 }, { "epoch": 46.506, "grad_norm": 4.102285385131836, "learning_rate": 2e-05, "loss": 0.04865433, "step": 23253 }, { "epoch": 46.508, "grad_norm": 0.9329944849014282, "learning_rate": 2e-05, "loss": 0.03761422, "step": 23254 }, { "epoch": 46.51, "grad_norm": 1.331762433052063, "learning_rate": 2e-05, "loss": 0.05577781, "step": 23255 }, { "epoch": 46.512, "grad_norm": 0.9442405700683594, "learning_rate": 2e-05, "loss": 0.0310113, "step": 23256 }, { "epoch": 46.514, "grad_norm": 1.2367945909500122, "learning_rate": 2e-05, "loss": 0.0441806, "step": 23257 }, { "epoch": 46.516, "grad_norm": 1.0528897047042847, "learning_rate": 2e-05, "loss": 0.04318698, "step": 23258 }, { "epoch": 46.518, "grad_norm": 1.836319923400879, "learning_rate": 2e-05, "loss": 0.04725133, "step": 23259 }, { "epoch": 46.52, "grad_norm": 0.9609118700027466, "learning_rate": 2e-05, "loss": 0.03715866, "step": 23260 }, { "epoch": 46.522, "grad_norm": 0.9954544305801392, "learning_rate": 2e-05, "loss": 0.04023976, "step": 23261 }, { "epoch": 46.524, "grad_norm": 1.10040283203125, "learning_rate": 2e-05, "loss": 0.05213868, "step": 23262 }, { "epoch": 46.526, "grad_norm": 1.2348756790161133, "learning_rate": 2e-05, "loss": 0.04602022, "step": 23263 }, { "epoch": 46.528, "grad_norm": 1.0019196271896362, "learning_rate": 2e-05, "loss": 0.02935033, "step": 23264 }, { "epoch": 46.53, "grad_norm": 1.4098018407821655, "learning_rate": 2e-05, "loss": 0.0497877, "step": 23265 }, { "epoch": 46.532, "grad_norm": 1.0995022058486938, "learning_rate": 2e-05, "loss": 0.05081838, "step": 23266 }, { "epoch": 46.534, "grad_norm": 1.0687205791473389, "learning_rate": 2e-05, "loss": 0.04965843, "step": 23267 }, { "epoch": 46.536, "grad_norm": 1.3429203033447266, "learning_rate": 2e-05, "loss": 0.0459254, "step": 23268 }, { "epoch": 46.538, "grad_norm": 1.007907509803772, "learning_rate": 2e-05, "loss": 0.0390831, "step": 23269 }, { "epoch": 46.54, "grad_norm": 1.3568775653839111, "learning_rate": 2e-05, "loss": 0.06248835, "step": 23270 }, { "epoch": 46.542, "grad_norm": 1.1344226598739624, "learning_rate": 2e-05, "loss": 0.04208028, "step": 23271 }, { "epoch": 46.544, "grad_norm": 1.0721518993377686, "learning_rate": 2e-05, "loss": 0.03604388, "step": 23272 }, { "epoch": 46.546, "grad_norm": 1.1106624603271484, "learning_rate": 2e-05, "loss": 0.04831469, "step": 23273 }, { "epoch": 46.548, "grad_norm": 1.229658603668213, "learning_rate": 2e-05, "loss": 0.04561464, "step": 23274 }, { "epoch": 46.55, "grad_norm": 1.105913519859314, "learning_rate": 2e-05, "loss": 0.03615031, "step": 23275 }, { "epoch": 46.552, "grad_norm": 1.452368140220642, "learning_rate": 2e-05, "loss": 0.04106815, "step": 23276 }, { "epoch": 46.554, "grad_norm": 0.9597733020782471, "learning_rate": 2e-05, "loss": 0.03648468, "step": 23277 }, { "epoch": 46.556, "grad_norm": 2.0021674633026123, "learning_rate": 2e-05, "loss": 0.04483432, "step": 23278 }, { "epoch": 46.558, "grad_norm": 1.786195158958435, "learning_rate": 2e-05, "loss": 0.03000396, "step": 23279 }, { "epoch": 46.56, "grad_norm": 1.1532331705093384, "learning_rate": 2e-05, "loss": 0.05007486, "step": 23280 }, { "epoch": 46.562, "grad_norm": 1.4126094579696655, "learning_rate": 2e-05, "loss": 0.04259964, "step": 23281 }, { "epoch": 46.564, "grad_norm": 0.9883242249488831, "learning_rate": 2e-05, "loss": 0.04840852, "step": 23282 }, { "epoch": 46.566, "grad_norm": 1.0877867937088013, "learning_rate": 2e-05, "loss": 0.0479309, "step": 23283 }, { "epoch": 46.568, "grad_norm": 1.0399949550628662, "learning_rate": 2e-05, "loss": 0.0427972, "step": 23284 }, { "epoch": 46.57, "grad_norm": 1.3175089359283447, "learning_rate": 2e-05, "loss": 0.03760595, "step": 23285 }, { "epoch": 46.572, "grad_norm": 0.9953736662864685, "learning_rate": 2e-05, "loss": 0.04915586, "step": 23286 }, { "epoch": 46.574, "grad_norm": 1.0398504734039307, "learning_rate": 2e-05, "loss": 0.04454892, "step": 23287 }, { "epoch": 46.576, "grad_norm": 1.220216989517212, "learning_rate": 2e-05, "loss": 0.04175328, "step": 23288 }, { "epoch": 46.578, "grad_norm": 1.0529553890228271, "learning_rate": 2e-05, "loss": 0.03489523, "step": 23289 }, { "epoch": 46.58, "grad_norm": 1.2509406805038452, "learning_rate": 2e-05, "loss": 0.04870979, "step": 23290 }, { "epoch": 46.582, "grad_norm": 1.1102439165115356, "learning_rate": 2e-05, "loss": 0.04033348, "step": 23291 }, { "epoch": 46.584, "grad_norm": 1.4240092039108276, "learning_rate": 2e-05, "loss": 0.04159205, "step": 23292 }, { "epoch": 46.586, "grad_norm": 1.0110493898391724, "learning_rate": 2e-05, "loss": 0.0420402, "step": 23293 }, { "epoch": 46.588, "grad_norm": 3.1553103923797607, "learning_rate": 2e-05, "loss": 0.05928943, "step": 23294 }, { "epoch": 46.59, "grad_norm": 1.039392113685608, "learning_rate": 2e-05, "loss": 0.04712161, "step": 23295 }, { "epoch": 46.592, "grad_norm": 2.349179983139038, "learning_rate": 2e-05, "loss": 0.04519066, "step": 23296 }, { "epoch": 46.594, "grad_norm": 1.074487328529358, "learning_rate": 2e-05, "loss": 0.03515197, "step": 23297 }, { "epoch": 46.596, "grad_norm": 1.0758991241455078, "learning_rate": 2e-05, "loss": 0.03169068, "step": 23298 }, { "epoch": 46.598, "grad_norm": 0.9676783680915833, "learning_rate": 2e-05, "loss": 0.03293062, "step": 23299 }, { "epoch": 46.6, "grad_norm": 0.9267943501472473, "learning_rate": 2e-05, "loss": 0.03839142, "step": 23300 }, { "epoch": 46.602, "grad_norm": 0.9260231256484985, "learning_rate": 2e-05, "loss": 0.03471829, "step": 23301 }, { "epoch": 46.604, "grad_norm": 1.4940166473388672, "learning_rate": 2e-05, "loss": 0.0551348, "step": 23302 }, { "epoch": 46.606, "grad_norm": 0.9714502692222595, "learning_rate": 2e-05, "loss": 0.04158224, "step": 23303 }, { "epoch": 46.608, "grad_norm": 2.784233808517456, "learning_rate": 2e-05, "loss": 0.04192377, "step": 23304 }, { "epoch": 46.61, "grad_norm": 0.9113459587097168, "learning_rate": 2e-05, "loss": 0.02875915, "step": 23305 }, { "epoch": 46.612, "grad_norm": 1.2626380920410156, "learning_rate": 2e-05, "loss": 0.04703204, "step": 23306 }, { "epoch": 46.614, "grad_norm": 0.9688354134559631, "learning_rate": 2e-05, "loss": 0.03371166, "step": 23307 }, { "epoch": 46.616, "grad_norm": 2.0443971157073975, "learning_rate": 2e-05, "loss": 0.05565372, "step": 23308 }, { "epoch": 46.618, "grad_norm": 0.9629517793655396, "learning_rate": 2e-05, "loss": 0.03795207, "step": 23309 }, { "epoch": 46.62, "grad_norm": 2.6458683013916016, "learning_rate": 2e-05, "loss": 0.04849132, "step": 23310 }, { "epoch": 46.622, "grad_norm": 0.8832199573516846, "learning_rate": 2e-05, "loss": 0.03363426, "step": 23311 }, { "epoch": 46.624, "grad_norm": 1.1957794427871704, "learning_rate": 2e-05, "loss": 0.0341556, "step": 23312 }, { "epoch": 46.626, "grad_norm": 0.9851163029670715, "learning_rate": 2e-05, "loss": 0.03968139, "step": 23313 }, { "epoch": 46.628, "grad_norm": 1.182287335395813, "learning_rate": 2e-05, "loss": 0.04088139, "step": 23314 }, { "epoch": 46.63, "grad_norm": 1.118200421333313, "learning_rate": 2e-05, "loss": 0.04111746, "step": 23315 }, { "epoch": 46.632, "grad_norm": 4.04936408996582, "learning_rate": 2e-05, "loss": 0.03530582, "step": 23316 }, { "epoch": 46.634, "grad_norm": 1.379470705986023, "learning_rate": 2e-05, "loss": 0.05652231, "step": 23317 }, { "epoch": 46.636, "grad_norm": 1.2478636503219604, "learning_rate": 2e-05, "loss": 0.03696441, "step": 23318 }, { "epoch": 46.638, "grad_norm": 0.993472158908844, "learning_rate": 2e-05, "loss": 0.04053735, "step": 23319 }, { "epoch": 46.64, "grad_norm": 1.1251386404037476, "learning_rate": 2e-05, "loss": 0.04244269, "step": 23320 }, { "epoch": 46.642, "grad_norm": 1.0799177885055542, "learning_rate": 2e-05, "loss": 0.04426109, "step": 23321 }, { "epoch": 46.644, "grad_norm": 0.9832642078399658, "learning_rate": 2e-05, "loss": 0.02977496, "step": 23322 }, { "epoch": 46.646, "grad_norm": 0.9070351719856262, "learning_rate": 2e-05, "loss": 0.03519815, "step": 23323 }, { "epoch": 46.648, "grad_norm": 1.1914373636245728, "learning_rate": 2e-05, "loss": 0.04469961, "step": 23324 }, { "epoch": 46.65, "grad_norm": 1.110277771949768, "learning_rate": 2e-05, "loss": 0.05107085, "step": 23325 }, { "epoch": 46.652, "grad_norm": 1.2887954711914062, "learning_rate": 2e-05, "loss": 0.03869878, "step": 23326 }, { "epoch": 46.654, "grad_norm": 1.2037444114685059, "learning_rate": 2e-05, "loss": 0.04324391, "step": 23327 }, { "epoch": 46.656, "grad_norm": 1.3407886028289795, "learning_rate": 2e-05, "loss": 0.05299862, "step": 23328 }, { "epoch": 46.658, "grad_norm": 1.2127498388290405, "learning_rate": 2e-05, "loss": 0.04703096, "step": 23329 }, { "epoch": 46.66, "grad_norm": 1.1498994827270508, "learning_rate": 2e-05, "loss": 0.03947148, "step": 23330 }, { "epoch": 46.662, "grad_norm": 1.1454975605010986, "learning_rate": 2e-05, "loss": 0.03360817, "step": 23331 }, { "epoch": 46.664, "grad_norm": 1.0070302486419678, "learning_rate": 2e-05, "loss": 0.04571046, "step": 23332 }, { "epoch": 46.666, "grad_norm": 1.9762816429138184, "learning_rate": 2e-05, "loss": 0.04655224, "step": 23333 }, { "epoch": 46.668, "grad_norm": 1.5753240585327148, "learning_rate": 2e-05, "loss": 0.04521073, "step": 23334 }, { "epoch": 46.67, "grad_norm": 1.0748071670532227, "learning_rate": 2e-05, "loss": 0.04236377, "step": 23335 }, { "epoch": 46.672, "grad_norm": 1.2510230541229248, "learning_rate": 2e-05, "loss": 0.05184109, "step": 23336 }, { "epoch": 46.674, "grad_norm": 1.231233835220337, "learning_rate": 2e-05, "loss": 0.04542288, "step": 23337 }, { "epoch": 46.676, "grad_norm": 1.110720157623291, "learning_rate": 2e-05, "loss": 0.04179211, "step": 23338 }, { "epoch": 46.678, "grad_norm": 1.2005581855773926, "learning_rate": 2e-05, "loss": 0.03305534, "step": 23339 }, { "epoch": 46.68, "grad_norm": 1.430365800857544, "learning_rate": 2e-05, "loss": 0.05259969, "step": 23340 }, { "epoch": 46.682, "grad_norm": 1.3016529083251953, "learning_rate": 2e-05, "loss": 0.04999658, "step": 23341 }, { "epoch": 46.684, "grad_norm": 1.0478612184524536, "learning_rate": 2e-05, "loss": 0.03303352, "step": 23342 }, { "epoch": 46.686, "grad_norm": 1.3386330604553223, "learning_rate": 2e-05, "loss": 0.05317756, "step": 23343 }, { "epoch": 46.688, "grad_norm": 2.2787442207336426, "learning_rate": 2e-05, "loss": 0.05527909, "step": 23344 }, { "epoch": 46.69, "grad_norm": 0.9911088347434998, "learning_rate": 2e-05, "loss": 0.03851809, "step": 23345 }, { "epoch": 46.692, "grad_norm": 1.0447211265563965, "learning_rate": 2e-05, "loss": 0.05080811, "step": 23346 }, { "epoch": 46.694, "grad_norm": 2.4731099605560303, "learning_rate": 2e-05, "loss": 0.05663422, "step": 23347 }, { "epoch": 46.696, "grad_norm": 1.0687483549118042, "learning_rate": 2e-05, "loss": 0.03417762, "step": 23348 }, { "epoch": 46.698, "grad_norm": 1.227199673652649, "learning_rate": 2e-05, "loss": 0.03788904, "step": 23349 }, { "epoch": 46.7, "grad_norm": 1.2819288969039917, "learning_rate": 2e-05, "loss": 0.04048603, "step": 23350 }, { "epoch": 46.702, "grad_norm": 1.258254051208496, "learning_rate": 2e-05, "loss": 0.055741, "step": 23351 }, { "epoch": 46.704, "grad_norm": 1.2302697896957397, "learning_rate": 2e-05, "loss": 0.05696668, "step": 23352 }, { "epoch": 46.706, "grad_norm": 0.8961069583892822, "learning_rate": 2e-05, "loss": 0.01879746, "step": 23353 }, { "epoch": 46.708, "grad_norm": 1.171879529953003, "learning_rate": 2e-05, "loss": 0.04014631, "step": 23354 }, { "epoch": 46.71, "grad_norm": 1.755340337753296, "learning_rate": 2e-05, "loss": 0.03253373, "step": 23355 }, { "epoch": 46.712, "grad_norm": 1.1351804733276367, "learning_rate": 2e-05, "loss": 0.04328355, "step": 23356 }, { "epoch": 46.714, "grad_norm": 1.2111117839813232, "learning_rate": 2e-05, "loss": 0.06130699, "step": 23357 }, { "epoch": 46.716, "grad_norm": 1.171247124671936, "learning_rate": 2e-05, "loss": 0.05082038, "step": 23358 }, { "epoch": 46.718, "grad_norm": 1.3078460693359375, "learning_rate": 2e-05, "loss": 0.04412971, "step": 23359 }, { "epoch": 46.72, "grad_norm": 1.2860954999923706, "learning_rate": 2e-05, "loss": 0.06714284, "step": 23360 }, { "epoch": 46.722, "grad_norm": 1.3779325485229492, "learning_rate": 2e-05, "loss": 0.03861318, "step": 23361 }, { "epoch": 46.724, "grad_norm": 0.8829256892204285, "learning_rate": 2e-05, "loss": 0.02897969, "step": 23362 }, { "epoch": 46.726, "grad_norm": 6.1703972816467285, "learning_rate": 2e-05, "loss": 0.07785551, "step": 23363 }, { "epoch": 46.728, "grad_norm": 0.9674871563911438, "learning_rate": 2e-05, "loss": 0.04069287, "step": 23364 }, { "epoch": 46.73, "grad_norm": 2.6620755195617676, "learning_rate": 2e-05, "loss": 0.05367862, "step": 23365 }, { "epoch": 46.732, "grad_norm": 1.0895183086395264, "learning_rate": 2e-05, "loss": 0.03536549, "step": 23366 }, { "epoch": 46.734, "grad_norm": 1.093003749847412, "learning_rate": 2e-05, "loss": 0.03801016, "step": 23367 }, { "epoch": 46.736, "grad_norm": 1.834889531135559, "learning_rate": 2e-05, "loss": 0.06434356, "step": 23368 }, { "epoch": 46.738, "grad_norm": 0.9485624432563782, "learning_rate": 2e-05, "loss": 0.03658834, "step": 23369 }, { "epoch": 46.74, "grad_norm": 1.064595341682434, "learning_rate": 2e-05, "loss": 0.05353557, "step": 23370 }, { "epoch": 46.742, "grad_norm": 1.1055799722671509, "learning_rate": 2e-05, "loss": 0.0522572, "step": 23371 }, { "epoch": 46.744, "grad_norm": 1.0701597929000854, "learning_rate": 2e-05, "loss": 0.05175813, "step": 23372 }, { "epoch": 46.746, "grad_norm": 1.1633923053741455, "learning_rate": 2e-05, "loss": 0.04744084, "step": 23373 }, { "epoch": 46.748, "grad_norm": 0.9946088790893555, "learning_rate": 2e-05, "loss": 0.03310145, "step": 23374 }, { "epoch": 46.75, "grad_norm": 1.0466705560684204, "learning_rate": 2e-05, "loss": 0.04635129, "step": 23375 }, { "epoch": 46.752, "grad_norm": 0.9806078672409058, "learning_rate": 2e-05, "loss": 0.03260729, "step": 23376 }, { "epoch": 46.754, "grad_norm": 1.1320241689682007, "learning_rate": 2e-05, "loss": 0.04779102, "step": 23377 }, { "epoch": 46.756, "grad_norm": 0.7105060815811157, "learning_rate": 2e-05, "loss": 0.01983283, "step": 23378 }, { "epoch": 46.758, "grad_norm": 1.2922025918960571, "learning_rate": 2e-05, "loss": 0.05513033, "step": 23379 }, { "epoch": 46.76, "grad_norm": 1.0686273574829102, "learning_rate": 2e-05, "loss": 0.03961165, "step": 23380 }, { "epoch": 46.762, "grad_norm": 2.589683771133423, "learning_rate": 2e-05, "loss": 0.05010585, "step": 23381 }, { "epoch": 46.764, "grad_norm": 1.0334157943725586, "learning_rate": 2e-05, "loss": 0.04501469, "step": 23382 }, { "epoch": 46.766, "grad_norm": 1.7817224264144897, "learning_rate": 2e-05, "loss": 0.04858459, "step": 23383 }, { "epoch": 46.768, "grad_norm": 1.2763837575912476, "learning_rate": 2e-05, "loss": 0.05586781, "step": 23384 }, { "epoch": 46.77, "grad_norm": 2.2101364135742188, "learning_rate": 2e-05, "loss": 0.05837087, "step": 23385 }, { "epoch": 46.772, "grad_norm": 1.0066190958023071, "learning_rate": 2e-05, "loss": 0.03223064, "step": 23386 }, { "epoch": 46.774, "grad_norm": 1.1118357181549072, "learning_rate": 2e-05, "loss": 0.03732941, "step": 23387 }, { "epoch": 46.776, "grad_norm": 0.9892827272415161, "learning_rate": 2e-05, "loss": 0.0357661, "step": 23388 }, { "epoch": 46.778, "grad_norm": 1.47239351272583, "learning_rate": 2e-05, "loss": 0.0442114, "step": 23389 }, { "epoch": 46.78, "grad_norm": 1.3400282859802246, "learning_rate": 2e-05, "loss": 0.05252708, "step": 23390 }, { "epoch": 46.782, "grad_norm": 0.9811768531799316, "learning_rate": 2e-05, "loss": 0.03414439, "step": 23391 }, { "epoch": 46.784, "grad_norm": 1.5074758529663086, "learning_rate": 2e-05, "loss": 0.05456636, "step": 23392 }, { "epoch": 46.786, "grad_norm": 1.0421936511993408, "learning_rate": 2e-05, "loss": 0.04471853, "step": 23393 }, { "epoch": 46.788, "grad_norm": 1.049480676651001, "learning_rate": 2e-05, "loss": 0.0399674, "step": 23394 }, { "epoch": 46.79, "grad_norm": 1.3068121671676636, "learning_rate": 2e-05, "loss": 0.0450139, "step": 23395 }, { "epoch": 46.792, "grad_norm": 0.9889963865280151, "learning_rate": 2e-05, "loss": 0.03285018, "step": 23396 }, { "epoch": 46.794, "grad_norm": 1.1804039478302002, "learning_rate": 2e-05, "loss": 0.03092896, "step": 23397 }, { "epoch": 46.796, "grad_norm": 1.1131017208099365, "learning_rate": 2e-05, "loss": 0.0409885, "step": 23398 }, { "epoch": 46.798, "grad_norm": 1.7551406621932983, "learning_rate": 2e-05, "loss": 0.05623586, "step": 23399 }, { "epoch": 46.8, "grad_norm": 1.2987489700317383, "learning_rate": 2e-05, "loss": 0.04326867, "step": 23400 }, { "epoch": 46.802, "grad_norm": 0.9242444634437561, "learning_rate": 2e-05, "loss": 0.03309409, "step": 23401 }, { "epoch": 46.804, "grad_norm": 1.0238709449768066, "learning_rate": 2e-05, "loss": 0.04528648, "step": 23402 }, { "epoch": 46.806, "grad_norm": 1.2049877643585205, "learning_rate": 2e-05, "loss": 0.04348021, "step": 23403 }, { "epoch": 46.808, "grad_norm": 0.8730500340461731, "learning_rate": 2e-05, "loss": 0.02885314, "step": 23404 }, { "epoch": 46.81, "grad_norm": 2.0668938159942627, "learning_rate": 2e-05, "loss": 0.07564835, "step": 23405 }, { "epoch": 46.812, "grad_norm": 1.0278151035308838, "learning_rate": 2e-05, "loss": 0.04307159, "step": 23406 }, { "epoch": 46.814, "grad_norm": 1.6166255474090576, "learning_rate": 2e-05, "loss": 0.04567122, "step": 23407 }, { "epoch": 46.816, "grad_norm": 1.3521454334259033, "learning_rate": 2e-05, "loss": 0.05484287, "step": 23408 }, { "epoch": 46.818, "grad_norm": 1.2860374450683594, "learning_rate": 2e-05, "loss": 0.05745607, "step": 23409 }, { "epoch": 46.82, "grad_norm": 1.20570969581604, "learning_rate": 2e-05, "loss": 0.04650894, "step": 23410 }, { "epoch": 46.822, "grad_norm": 0.981250524520874, "learning_rate": 2e-05, "loss": 0.04414473, "step": 23411 }, { "epoch": 46.824, "grad_norm": 1.147842288017273, "learning_rate": 2e-05, "loss": 0.04157857, "step": 23412 }, { "epoch": 46.826, "grad_norm": 1.2135146856307983, "learning_rate": 2e-05, "loss": 0.04350588, "step": 23413 }, { "epoch": 46.828, "grad_norm": 1.3109846115112305, "learning_rate": 2e-05, "loss": 0.05259135, "step": 23414 }, { "epoch": 46.83, "grad_norm": 1.2338848114013672, "learning_rate": 2e-05, "loss": 0.03786198, "step": 23415 }, { "epoch": 46.832, "grad_norm": 1.0936768054962158, "learning_rate": 2e-05, "loss": 0.04643222, "step": 23416 }, { "epoch": 46.834, "grad_norm": 1.0407130718231201, "learning_rate": 2e-05, "loss": 0.0380646, "step": 23417 }, { "epoch": 46.836, "grad_norm": 1.1514661312103271, "learning_rate": 2e-05, "loss": 0.0421597, "step": 23418 }, { "epoch": 46.838, "grad_norm": 1.1070467233657837, "learning_rate": 2e-05, "loss": 0.0480284, "step": 23419 }, { "epoch": 46.84, "grad_norm": 1.201880931854248, "learning_rate": 2e-05, "loss": 0.05106621, "step": 23420 }, { "epoch": 46.842, "grad_norm": 1.3661859035491943, "learning_rate": 2e-05, "loss": 0.04395133, "step": 23421 }, { "epoch": 46.844, "grad_norm": 1.0115083456039429, "learning_rate": 2e-05, "loss": 0.03932782, "step": 23422 }, { "epoch": 46.846, "grad_norm": 1.2211161851882935, "learning_rate": 2e-05, "loss": 0.04679502, "step": 23423 }, { "epoch": 46.848, "grad_norm": 1.2210298776626587, "learning_rate": 2e-05, "loss": 0.05247185, "step": 23424 }, { "epoch": 46.85, "grad_norm": 1.0659769773483276, "learning_rate": 2e-05, "loss": 0.04389524, "step": 23425 }, { "epoch": 46.852, "grad_norm": 2.051483154296875, "learning_rate": 2e-05, "loss": 0.03795972, "step": 23426 }, { "epoch": 46.854, "grad_norm": 1.4533435106277466, "learning_rate": 2e-05, "loss": 0.04525005, "step": 23427 }, { "epoch": 46.856, "grad_norm": 1.5629005432128906, "learning_rate": 2e-05, "loss": 0.03902795, "step": 23428 }, { "epoch": 46.858, "grad_norm": 1.0804848670959473, "learning_rate": 2e-05, "loss": 0.0634065, "step": 23429 }, { "epoch": 46.86, "grad_norm": 1.070578694343567, "learning_rate": 2e-05, "loss": 0.03439271, "step": 23430 }, { "epoch": 46.862, "grad_norm": 1.0677157640457153, "learning_rate": 2e-05, "loss": 0.03756397, "step": 23431 }, { "epoch": 46.864, "grad_norm": 0.8192936182022095, "learning_rate": 2e-05, "loss": 0.02615477, "step": 23432 }, { "epoch": 46.866, "grad_norm": 1.075719952583313, "learning_rate": 2e-05, "loss": 0.04199068, "step": 23433 }, { "epoch": 46.868, "grad_norm": 1.275301218032837, "learning_rate": 2e-05, "loss": 0.05247433, "step": 23434 }, { "epoch": 46.87, "grad_norm": 1.218895435333252, "learning_rate": 2e-05, "loss": 0.03936178, "step": 23435 }, { "epoch": 46.872, "grad_norm": 1.0126792192459106, "learning_rate": 2e-05, "loss": 0.04379427, "step": 23436 }, { "epoch": 46.874, "grad_norm": 1.0719962120056152, "learning_rate": 2e-05, "loss": 0.04096608, "step": 23437 }, { "epoch": 46.876, "grad_norm": 0.9388536810874939, "learning_rate": 2e-05, "loss": 0.04559058, "step": 23438 }, { "epoch": 46.878, "grad_norm": 1.0722471475601196, "learning_rate": 2e-05, "loss": 0.03498413, "step": 23439 }, { "epoch": 46.88, "grad_norm": 1.0731477737426758, "learning_rate": 2e-05, "loss": 0.04017755, "step": 23440 }, { "epoch": 46.882, "grad_norm": 0.9033427834510803, "learning_rate": 2e-05, "loss": 0.03798786, "step": 23441 }, { "epoch": 46.884, "grad_norm": 1.0361868143081665, "learning_rate": 2e-05, "loss": 0.04350713, "step": 23442 }, { "epoch": 46.886, "grad_norm": 0.9431261420249939, "learning_rate": 2e-05, "loss": 0.0305834, "step": 23443 }, { "epoch": 46.888, "grad_norm": 1.1714198589324951, "learning_rate": 2e-05, "loss": 0.04634073, "step": 23444 }, { "epoch": 46.89, "grad_norm": 1.6199334859848022, "learning_rate": 2e-05, "loss": 0.04108413, "step": 23445 }, { "epoch": 46.892, "grad_norm": 1.5586405992507935, "learning_rate": 2e-05, "loss": 0.0259655, "step": 23446 }, { "epoch": 46.894, "grad_norm": 0.9801030158996582, "learning_rate": 2e-05, "loss": 0.04357532, "step": 23447 }, { "epoch": 46.896, "grad_norm": 1.2208236455917358, "learning_rate": 2e-05, "loss": 0.04441687, "step": 23448 }, { "epoch": 46.898, "grad_norm": 0.9452463984489441, "learning_rate": 2e-05, "loss": 0.04175538, "step": 23449 }, { "epoch": 46.9, "grad_norm": 1.0137697458267212, "learning_rate": 2e-05, "loss": 0.0466395, "step": 23450 }, { "epoch": 46.902, "grad_norm": 1.2201143503189087, "learning_rate": 2e-05, "loss": 0.05274017, "step": 23451 }, { "epoch": 46.904, "grad_norm": 1.3885806798934937, "learning_rate": 2e-05, "loss": 0.0605853, "step": 23452 }, { "epoch": 46.906, "grad_norm": 4.058651447296143, "learning_rate": 2e-05, "loss": 0.05843858, "step": 23453 }, { "epoch": 46.908, "grad_norm": 0.7276335954666138, "learning_rate": 2e-05, "loss": 0.02517085, "step": 23454 }, { "epoch": 46.91, "grad_norm": 1.7695754766464233, "learning_rate": 2e-05, "loss": 0.04758018, "step": 23455 }, { "epoch": 46.912, "grad_norm": 1.2489367723464966, "learning_rate": 2e-05, "loss": 0.05046308, "step": 23456 }, { "epoch": 46.914, "grad_norm": 1.119614601135254, "learning_rate": 2e-05, "loss": 0.04163967, "step": 23457 }, { "epoch": 46.916, "grad_norm": 0.9581904411315918, "learning_rate": 2e-05, "loss": 0.0428218, "step": 23458 }, { "epoch": 46.918, "grad_norm": 0.9647688269615173, "learning_rate": 2e-05, "loss": 0.03843924, "step": 23459 }, { "epoch": 46.92, "grad_norm": 0.952667236328125, "learning_rate": 2e-05, "loss": 0.04137881, "step": 23460 }, { "epoch": 46.922, "grad_norm": 1.644375205039978, "learning_rate": 2e-05, "loss": 0.05739506, "step": 23461 }, { "epoch": 46.924, "grad_norm": 1.0552964210510254, "learning_rate": 2e-05, "loss": 0.04328398, "step": 23462 }, { "epoch": 46.926, "grad_norm": 1.6630902290344238, "learning_rate": 2e-05, "loss": 0.05835466, "step": 23463 }, { "epoch": 46.928, "grad_norm": 0.9677048921585083, "learning_rate": 2e-05, "loss": 0.03489691, "step": 23464 }, { "epoch": 46.93, "grad_norm": 0.7833137512207031, "learning_rate": 2e-05, "loss": 0.03356097, "step": 23465 }, { "epoch": 46.932, "grad_norm": 1.2217217683792114, "learning_rate": 2e-05, "loss": 0.04482928, "step": 23466 }, { "epoch": 46.934, "grad_norm": 1.0929977893829346, "learning_rate": 2e-05, "loss": 0.03894136, "step": 23467 }, { "epoch": 46.936, "grad_norm": 1.2253386974334717, "learning_rate": 2e-05, "loss": 0.03752672, "step": 23468 }, { "epoch": 46.938, "grad_norm": 0.988569438457489, "learning_rate": 2e-05, "loss": 0.03998517, "step": 23469 }, { "epoch": 46.94, "grad_norm": 0.9294567108154297, "learning_rate": 2e-05, "loss": 0.03735808, "step": 23470 }, { "epoch": 46.942, "grad_norm": 0.9444395303726196, "learning_rate": 2e-05, "loss": 0.03604439, "step": 23471 }, { "epoch": 46.944, "grad_norm": 1.017524242401123, "learning_rate": 2e-05, "loss": 0.03795053, "step": 23472 }, { "epoch": 46.946, "grad_norm": 1.0971325635910034, "learning_rate": 2e-05, "loss": 0.04276225, "step": 23473 }, { "epoch": 46.948, "grad_norm": 1.3760910034179688, "learning_rate": 2e-05, "loss": 0.05344362, "step": 23474 }, { "epoch": 46.95, "grad_norm": 0.9680789113044739, "learning_rate": 2e-05, "loss": 0.03834612, "step": 23475 }, { "epoch": 46.952, "grad_norm": 1.0283191204071045, "learning_rate": 2e-05, "loss": 0.04401558, "step": 23476 }, { "epoch": 46.954, "grad_norm": 1.2421773672103882, "learning_rate": 2e-05, "loss": 0.03678168, "step": 23477 }, { "epoch": 46.956, "grad_norm": 1.8620381355285645, "learning_rate": 2e-05, "loss": 0.05966516, "step": 23478 }, { "epoch": 46.958, "grad_norm": 1.000375747680664, "learning_rate": 2e-05, "loss": 0.03885657, "step": 23479 }, { "epoch": 46.96, "grad_norm": 0.9980706572532654, "learning_rate": 2e-05, "loss": 0.03927665, "step": 23480 }, { "epoch": 46.962, "grad_norm": 1.071062684059143, "learning_rate": 2e-05, "loss": 0.05022153, "step": 23481 }, { "epoch": 46.964, "grad_norm": 1.0247992277145386, "learning_rate": 2e-05, "loss": 0.03759687, "step": 23482 }, { "epoch": 46.966, "grad_norm": 1.1279823780059814, "learning_rate": 2e-05, "loss": 0.04084118, "step": 23483 }, { "epoch": 46.968, "grad_norm": 1.159861445426941, "learning_rate": 2e-05, "loss": 0.03957793, "step": 23484 }, { "epoch": 46.97, "grad_norm": 1.2503230571746826, "learning_rate": 2e-05, "loss": 0.0498237, "step": 23485 }, { "epoch": 46.972, "grad_norm": 2.550555944442749, "learning_rate": 2e-05, "loss": 0.05201169, "step": 23486 }, { "epoch": 46.974, "grad_norm": 1.0189933776855469, "learning_rate": 2e-05, "loss": 0.03422182, "step": 23487 }, { "epoch": 46.976, "grad_norm": 1.1015764474868774, "learning_rate": 2e-05, "loss": 0.05597252, "step": 23488 }, { "epoch": 46.978, "grad_norm": 1.0649232864379883, "learning_rate": 2e-05, "loss": 0.03823482, "step": 23489 }, { "epoch": 46.98, "grad_norm": 1.2358747720718384, "learning_rate": 2e-05, "loss": 0.03571894, "step": 23490 }, { "epoch": 46.982, "grad_norm": 1.0739089250564575, "learning_rate": 2e-05, "loss": 0.0457719, "step": 23491 }, { "epoch": 46.984, "grad_norm": 0.9738242626190186, "learning_rate": 2e-05, "loss": 0.04676471, "step": 23492 }, { "epoch": 46.986, "grad_norm": 0.9358574748039246, "learning_rate": 2e-05, "loss": 0.02672759, "step": 23493 }, { "epoch": 46.988, "grad_norm": 1.2666215896606445, "learning_rate": 2e-05, "loss": 0.04723007, "step": 23494 }, { "epoch": 46.99, "grad_norm": 1.1249349117279053, "learning_rate": 2e-05, "loss": 0.04890946, "step": 23495 }, { "epoch": 46.992, "grad_norm": 1.0160354375839233, "learning_rate": 2e-05, "loss": 0.02733931, "step": 23496 }, { "epoch": 46.994, "grad_norm": 1.1112245321273804, "learning_rate": 2e-05, "loss": 0.04088189, "step": 23497 }, { "epoch": 46.996, "grad_norm": 0.9411455988883972, "learning_rate": 2e-05, "loss": 0.03731472, "step": 23498 }, { "epoch": 46.998, "grad_norm": 0.8207102417945862, "learning_rate": 2e-05, "loss": 0.02463753, "step": 23499 }, { "epoch": 47.0, "grad_norm": 1.2974495887756348, "learning_rate": 2e-05, "loss": 0.05239491, "step": 23500 }, { "epoch": 47.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 0.998, "AngleClassification_3": 0.9920159680638723, "Equal_1": 0.994, "Equal_2": 0.9760479041916168, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.994, "Perpendicular_2": 0.998, "Perpendicular_3": 0.8917835671342685, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.996, "PointLiesOnCircle_3": 0.99, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9979959919839679, "PointLiesOnLine_3": 0.9800399201596807 }, "eval_runtime": 224.9457, "eval_samples_per_second": 46.678, "eval_steps_per_second": 0.934, "step": 23500 }, { "epoch": 47.002, "grad_norm": 1.0781254768371582, "learning_rate": 2e-05, "loss": 0.04405306, "step": 23501 }, { "epoch": 47.004, "grad_norm": 1.0324925184249878, "learning_rate": 2e-05, "loss": 0.03204145, "step": 23502 }, { "epoch": 47.006, "grad_norm": 0.8261357545852661, "learning_rate": 2e-05, "loss": 0.02656678, "step": 23503 }, { "epoch": 47.008, "grad_norm": 1.126706838607788, "learning_rate": 2e-05, "loss": 0.03624219, "step": 23504 }, { "epoch": 47.01, "grad_norm": 1.1089459657669067, "learning_rate": 2e-05, "loss": 0.04325495, "step": 23505 }, { "epoch": 47.012, "grad_norm": 0.9955711960792542, "learning_rate": 2e-05, "loss": 0.03128721, "step": 23506 }, { "epoch": 47.014, "grad_norm": 1.2665793895721436, "learning_rate": 2e-05, "loss": 0.05461794, "step": 23507 }, { "epoch": 47.016, "grad_norm": 1.2938201427459717, "learning_rate": 2e-05, "loss": 0.04263186, "step": 23508 }, { "epoch": 47.018, "grad_norm": 1.4284344911575317, "learning_rate": 2e-05, "loss": 0.03279409, "step": 23509 }, { "epoch": 47.02, "grad_norm": 1.1825913190841675, "learning_rate": 2e-05, "loss": 0.05792944, "step": 23510 }, { "epoch": 47.022, "grad_norm": 1.2886711359024048, "learning_rate": 2e-05, "loss": 0.044133, "step": 23511 }, { "epoch": 47.024, "grad_norm": 0.9183266162872314, "learning_rate": 2e-05, "loss": 0.03341915, "step": 23512 }, { "epoch": 47.026, "grad_norm": 1.1567893028259277, "learning_rate": 2e-05, "loss": 0.04644934, "step": 23513 }, { "epoch": 47.028, "grad_norm": 0.9019851088523865, "learning_rate": 2e-05, "loss": 0.02687375, "step": 23514 }, { "epoch": 47.03, "grad_norm": 0.9611225128173828, "learning_rate": 2e-05, "loss": 0.04624844, "step": 23515 }, { "epoch": 47.032, "grad_norm": 1.248719573020935, "learning_rate": 2e-05, "loss": 0.05373285, "step": 23516 }, { "epoch": 47.034, "grad_norm": 2.152841567993164, "learning_rate": 2e-05, "loss": 0.04623188, "step": 23517 }, { "epoch": 47.036, "grad_norm": 1.8600836992263794, "learning_rate": 2e-05, "loss": 0.0380694, "step": 23518 }, { "epoch": 47.038, "grad_norm": 1.2856528759002686, "learning_rate": 2e-05, "loss": 0.05561623, "step": 23519 }, { "epoch": 47.04, "grad_norm": 1.2357491254806519, "learning_rate": 2e-05, "loss": 0.05758506, "step": 23520 }, { "epoch": 47.042, "grad_norm": 1.1394767761230469, "learning_rate": 2e-05, "loss": 0.04425605, "step": 23521 }, { "epoch": 47.044, "grad_norm": 1.0952931642532349, "learning_rate": 2e-05, "loss": 0.04991034, "step": 23522 }, { "epoch": 47.046, "grad_norm": 0.9734818339347839, "learning_rate": 2e-05, "loss": 0.03328012, "step": 23523 }, { "epoch": 47.048, "grad_norm": 1.109634280204773, "learning_rate": 2e-05, "loss": 0.0376914, "step": 23524 }, { "epoch": 47.05, "grad_norm": 1.0621169805526733, "learning_rate": 2e-05, "loss": 0.04123484, "step": 23525 }, { "epoch": 47.052, "grad_norm": 1.26069974899292, "learning_rate": 2e-05, "loss": 0.04173406, "step": 23526 }, { "epoch": 47.054, "grad_norm": 1.231325387954712, "learning_rate": 2e-05, "loss": 0.05455248, "step": 23527 }, { "epoch": 47.056, "grad_norm": 1.131585717201233, "learning_rate": 2e-05, "loss": 0.05570379, "step": 23528 }, { "epoch": 47.058, "grad_norm": 0.9795395731925964, "learning_rate": 2e-05, "loss": 0.03424241, "step": 23529 }, { "epoch": 47.06, "grad_norm": 1.0329655408859253, "learning_rate": 2e-05, "loss": 0.03544684, "step": 23530 }, { "epoch": 47.062, "grad_norm": 1.1134518384933472, "learning_rate": 2e-05, "loss": 0.04340722, "step": 23531 }, { "epoch": 47.064, "grad_norm": 1.0628446340560913, "learning_rate": 2e-05, "loss": 0.03253817, "step": 23532 }, { "epoch": 47.066, "grad_norm": 1.7172572612762451, "learning_rate": 2e-05, "loss": 0.04656677, "step": 23533 }, { "epoch": 47.068, "grad_norm": 1.6046242713928223, "learning_rate": 2e-05, "loss": 0.03172398, "step": 23534 }, { "epoch": 47.07, "grad_norm": 1.06123948097229, "learning_rate": 2e-05, "loss": 0.04691318, "step": 23535 }, { "epoch": 47.072, "grad_norm": 0.9456564784049988, "learning_rate": 2e-05, "loss": 0.03791856, "step": 23536 }, { "epoch": 47.074, "grad_norm": 2.4248647689819336, "learning_rate": 2e-05, "loss": 0.03875469, "step": 23537 }, { "epoch": 47.076, "grad_norm": 1.0077589750289917, "learning_rate": 2e-05, "loss": 0.04863484, "step": 23538 }, { "epoch": 47.078, "grad_norm": 1.2945468425750732, "learning_rate": 2e-05, "loss": 0.04328693, "step": 23539 }, { "epoch": 47.08, "grad_norm": 1.0359041690826416, "learning_rate": 2e-05, "loss": 0.03994237, "step": 23540 }, { "epoch": 47.082, "grad_norm": 1.2336264848709106, "learning_rate": 2e-05, "loss": 0.03381227, "step": 23541 }, { "epoch": 47.084, "grad_norm": 1.3015300035476685, "learning_rate": 2e-05, "loss": 0.06318841, "step": 23542 }, { "epoch": 47.086, "grad_norm": 0.7790587544441223, "learning_rate": 2e-05, "loss": 0.02497177, "step": 23543 }, { "epoch": 47.088, "grad_norm": 1.2157649993896484, "learning_rate": 2e-05, "loss": 0.0419163, "step": 23544 }, { "epoch": 47.09, "grad_norm": 1.306869387626648, "learning_rate": 2e-05, "loss": 0.05824634, "step": 23545 }, { "epoch": 47.092, "grad_norm": 1.5953513383865356, "learning_rate": 2e-05, "loss": 0.0449495, "step": 23546 }, { "epoch": 47.094, "grad_norm": 0.846473753452301, "learning_rate": 2e-05, "loss": 0.02792708, "step": 23547 }, { "epoch": 47.096, "grad_norm": 0.9308416247367859, "learning_rate": 2e-05, "loss": 0.03195363, "step": 23548 }, { "epoch": 47.098, "grad_norm": 1.2852615118026733, "learning_rate": 2e-05, "loss": 0.04297009, "step": 23549 }, { "epoch": 47.1, "grad_norm": 1.8347728252410889, "learning_rate": 2e-05, "loss": 0.03445425, "step": 23550 }, { "epoch": 47.102, "grad_norm": 1.1911333799362183, "learning_rate": 2e-05, "loss": 0.04958761, "step": 23551 }, { "epoch": 47.104, "grad_norm": 1.0476586818695068, "learning_rate": 2e-05, "loss": 0.03151809, "step": 23552 }, { "epoch": 47.106, "grad_norm": 1.0703850984573364, "learning_rate": 2e-05, "loss": 0.05285926, "step": 23553 }, { "epoch": 47.108, "grad_norm": 1.1344631910324097, "learning_rate": 2e-05, "loss": 0.05219762, "step": 23554 }, { "epoch": 47.11, "grad_norm": 1.105231523513794, "learning_rate": 2e-05, "loss": 0.04228245, "step": 23555 }, { "epoch": 47.112, "grad_norm": 1.2150284051895142, "learning_rate": 2e-05, "loss": 0.04210718, "step": 23556 }, { "epoch": 47.114, "grad_norm": 1.133745551109314, "learning_rate": 2e-05, "loss": 0.05958553, "step": 23557 }, { "epoch": 47.116, "grad_norm": 1.008095622062683, "learning_rate": 2e-05, "loss": 0.04621772, "step": 23558 }, { "epoch": 47.118, "grad_norm": 1.185854196548462, "learning_rate": 2e-05, "loss": 0.04172992, "step": 23559 }, { "epoch": 47.12, "grad_norm": 1.1502045392990112, "learning_rate": 2e-05, "loss": 0.05718469, "step": 23560 }, { "epoch": 47.122, "grad_norm": 0.8861024975776672, "learning_rate": 2e-05, "loss": 0.02941658, "step": 23561 }, { "epoch": 47.124, "grad_norm": 1.054580569267273, "learning_rate": 2e-05, "loss": 0.0477374, "step": 23562 }, { "epoch": 47.126, "grad_norm": 0.8965249061584473, "learning_rate": 2e-05, "loss": 0.03786917, "step": 23563 }, { "epoch": 47.128, "grad_norm": 0.9903602004051208, "learning_rate": 2e-05, "loss": 0.03476927, "step": 23564 }, { "epoch": 47.13, "grad_norm": 0.9678831696510315, "learning_rate": 2e-05, "loss": 0.03639551, "step": 23565 }, { "epoch": 47.132, "grad_norm": 0.8923867344856262, "learning_rate": 2e-05, "loss": 0.03386071, "step": 23566 }, { "epoch": 47.134, "grad_norm": 1.0544780492782593, "learning_rate": 2e-05, "loss": 0.05235102, "step": 23567 }, { "epoch": 47.136, "grad_norm": 0.8595101833343506, "learning_rate": 2e-05, "loss": 0.02970533, "step": 23568 }, { "epoch": 47.138, "grad_norm": 0.9327841401100159, "learning_rate": 2e-05, "loss": 0.0373207, "step": 23569 }, { "epoch": 47.14, "grad_norm": 1.1017405986785889, "learning_rate": 2e-05, "loss": 0.04250918, "step": 23570 }, { "epoch": 47.142, "grad_norm": 1.0893855094909668, "learning_rate": 2e-05, "loss": 0.04513121, "step": 23571 }, { "epoch": 47.144, "grad_norm": 1.665575623512268, "learning_rate": 2e-05, "loss": 0.05010333, "step": 23572 }, { "epoch": 47.146, "grad_norm": 1.1385687589645386, "learning_rate": 2e-05, "loss": 0.04394225, "step": 23573 }, { "epoch": 47.148, "grad_norm": 1.4844821691513062, "learning_rate": 2e-05, "loss": 0.05615442, "step": 23574 }, { "epoch": 47.15, "grad_norm": 0.9226142764091492, "learning_rate": 2e-05, "loss": 0.02722756, "step": 23575 }, { "epoch": 47.152, "grad_norm": 0.9305194616317749, "learning_rate": 2e-05, "loss": 0.03090802, "step": 23576 }, { "epoch": 47.154, "grad_norm": 0.9162293076515198, "learning_rate": 2e-05, "loss": 0.0378013, "step": 23577 }, { "epoch": 47.156, "grad_norm": 1.0273809432983398, "learning_rate": 2e-05, "loss": 0.0412821, "step": 23578 }, { "epoch": 47.158, "grad_norm": 1.036539077758789, "learning_rate": 2e-05, "loss": 0.03353439, "step": 23579 }, { "epoch": 47.16, "grad_norm": 1.4034755229949951, "learning_rate": 2e-05, "loss": 0.04220682, "step": 23580 }, { "epoch": 47.162, "grad_norm": 0.9738513231277466, "learning_rate": 2e-05, "loss": 0.04337395, "step": 23581 }, { "epoch": 47.164, "grad_norm": 1.1822750568389893, "learning_rate": 2e-05, "loss": 0.04882853, "step": 23582 }, { "epoch": 47.166, "grad_norm": 1.1742006540298462, "learning_rate": 2e-05, "loss": 0.05063605, "step": 23583 }, { "epoch": 47.168, "grad_norm": 1.4821701049804688, "learning_rate": 2e-05, "loss": 0.03784803, "step": 23584 }, { "epoch": 47.17, "grad_norm": 1.2474571466445923, "learning_rate": 2e-05, "loss": 0.06040697, "step": 23585 }, { "epoch": 47.172, "grad_norm": 0.9976972937583923, "learning_rate": 2e-05, "loss": 0.04251818, "step": 23586 }, { "epoch": 47.174, "grad_norm": 1.699065923690796, "learning_rate": 2e-05, "loss": 0.04327475, "step": 23587 }, { "epoch": 47.176, "grad_norm": 1.358112096786499, "learning_rate": 2e-05, "loss": 0.05906098, "step": 23588 }, { "epoch": 47.178, "grad_norm": 1.7188794612884521, "learning_rate": 2e-05, "loss": 0.03638441, "step": 23589 }, { "epoch": 47.18, "grad_norm": 1.2896263599395752, "learning_rate": 2e-05, "loss": 0.04122547, "step": 23590 }, { "epoch": 47.182, "grad_norm": 1.0092145204544067, "learning_rate": 2e-05, "loss": 0.03651688, "step": 23591 }, { "epoch": 47.184, "grad_norm": 1.051220417022705, "learning_rate": 2e-05, "loss": 0.05658867, "step": 23592 }, { "epoch": 47.186, "grad_norm": 0.9565996527671814, "learning_rate": 2e-05, "loss": 0.04069651, "step": 23593 }, { "epoch": 47.188, "grad_norm": 0.8673154711723328, "learning_rate": 2e-05, "loss": 0.03294455, "step": 23594 }, { "epoch": 47.19, "grad_norm": 0.9085294604301453, "learning_rate": 2e-05, "loss": 0.0377614, "step": 23595 }, { "epoch": 47.192, "grad_norm": 1.1526522636413574, "learning_rate": 2e-05, "loss": 0.03889897, "step": 23596 }, { "epoch": 47.194, "grad_norm": 1.1717661619186401, "learning_rate": 2e-05, "loss": 0.04644834, "step": 23597 }, { "epoch": 47.196, "grad_norm": 0.984303891658783, "learning_rate": 2e-05, "loss": 0.04189226, "step": 23598 }, { "epoch": 47.198, "grad_norm": 0.9345293045043945, "learning_rate": 2e-05, "loss": 0.03786005, "step": 23599 }, { "epoch": 47.2, "grad_norm": 1.2306392192840576, "learning_rate": 2e-05, "loss": 0.04945513, "step": 23600 }, { "epoch": 47.202, "grad_norm": 1.2996742725372314, "learning_rate": 2e-05, "loss": 0.04291711, "step": 23601 }, { "epoch": 47.204, "grad_norm": 1.0471516847610474, "learning_rate": 2e-05, "loss": 0.04189179, "step": 23602 }, { "epoch": 47.206, "grad_norm": 1.1448955535888672, "learning_rate": 2e-05, "loss": 0.04963057, "step": 23603 }, { "epoch": 47.208, "grad_norm": 1.157342553138733, "learning_rate": 2e-05, "loss": 0.04905769, "step": 23604 }, { "epoch": 47.21, "grad_norm": 2.5493202209472656, "learning_rate": 2e-05, "loss": 0.04556019, "step": 23605 }, { "epoch": 47.212, "grad_norm": 0.9914862513542175, "learning_rate": 2e-05, "loss": 0.03875187, "step": 23606 }, { "epoch": 47.214, "grad_norm": 3.6287899017333984, "learning_rate": 2e-05, "loss": 0.05658118, "step": 23607 }, { "epoch": 47.216, "grad_norm": 1.2580697536468506, "learning_rate": 2e-05, "loss": 0.03819752, "step": 23608 }, { "epoch": 47.218, "grad_norm": 1.4296969175338745, "learning_rate": 2e-05, "loss": 0.04502263, "step": 23609 }, { "epoch": 47.22, "grad_norm": 1.3018399477005005, "learning_rate": 2e-05, "loss": 0.05487642, "step": 23610 }, { "epoch": 47.222, "grad_norm": 1.282131552696228, "learning_rate": 2e-05, "loss": 0.04581036, "step": 23611 }, { "epoch": 47.224, "grad_norm": 0.986998438835144, "learning_rate": 2e-05, "loss": 0.03718111, "step": 23612 }, { "epoch": 47.226, "grad_norm": 0.9979808330535889, "learning_rate": 2e-05, "loss": 0.0399188, "step": 23613 }, { "epoch": 47.228, "grad_norm": 0.8975911140441895, "learning_rate": 2e-05, "loss": 0.03411479, "step": 23614 }, { "epoch": 47.23, "grad_norm": 7.970768928527832, "learning_rate": 2e-05, "loss": 0.05464906, "step": 23615 }, { "epoch": 47.232, "grad_norm": 0.9351603984832764, "learning_rate": 2e-05, "loss": 0.03354009, "step": 23616 }, { "epoch": 47.234, "grad_norm": 1.0210729837417603, "learning_rate": 2e-05, "loss": 0.03628429, "step": 23617 }, { "epoch": 47.236, "grad_norm": 1.159812331199646, "learning_rate": 2e-05, "loss": 0.05169687, "step": 23618 }, { "epoch": 47.238, "grad_norm": 1.0336287021636963, "learning_rate": 2e-05, "loss": 0.04751596, "step": 23619 }, { "epoch": 47.24, "grad_norm": 1.0557957887649536, "learning_rate": 2e-05, "loss": 0.03742843, "step": 23620 }, { "epoch": 47.242, "grad_norm": 1.939700722694397, "learning_rate": 2e-05, "loss": 0.05456325, "step": 23621 }, { "epoch": 47.244, "grad_norm": 1.4773844480514526, "learning_rate": 2e-05, "loss": 0.04167607, "step": 23622 }, { "epoch": 47.246, "grad_norm": 0.8941033482551575, "learning_rate": 2e-05, "loss": 0.03019752, "step": 23623 }, { "epoch": 47.248, "grad_norm": 1.7313764095306396, "learning_rate": 2e-05, "loss": 0.04194053, "step": 23624 }, { "epoch": 47.25, "grad_norm": 0.8988141417503357, "learning_rate": 2e-05, "loss": 0.03685491, "step": 23625 }, { "epoch": 47.252, "grad_norm": 1.287552833557129, "learning_rate": 2e-05, "loss": 0.05267394, "step": 23626 }, { "epoch": 47.254, "grad_norm": 1.877610206604004, "learning_rate": 2e-05, "loss": 0.04317475, "step": 23627 }, { "epoch": 47.256, "grad_norm": 2.1020545959472656, "learning_rate": 2e-05, "loss": 0.05109879, "step": 23628 }, { "epoch": 47.258, "grad_norm": 1.2036796808242798, "learning_rate": 2e-05, "loss": 0.04731586, "step": 23629 }, { "epoch": 47.26, "grad_norm": 0.9833993911743164, "learning_rate": 2e-05, "loss": 0.0435365, "step": 23630 }, { "epoch": 47.262, "grad_norm": 0.9937183856964111, "learning_rate": 2e-05, "loss": 0.03255787, "step": 23631 }, { "epoch": 47.264, "grad_norm": 1.0237723588943481, "learning_rate": 2e-05, "loss": 0.03379956, "step": 23632 }, { "epoch": 47.266, "grad_norm": 0.9280229806900024, "learning_rate": 2e-05, "loss": 0.02844493, "step": 23633 }, { "epoch": 47.268, "grad_norm": 1.1949222087860107, "learning_rate": 2e-05, "loss": 0.04337098, "step": 23634 }, { "epoch": 47.27, "grad_norm": 1.1081969738006592, "learning_rate": 2e-05, "loss": 0.05048704, "step": 23635 }, { "epoch": 47.272, "grad_norm": 1.1787029504776, "learning_rate": 2e-05, "loss": 0.03981124, "step": 23636 }, { "epoch": 47.274, "grad_norm": 1.1143083572387695, "learning_rate": 2e-05, "loss": 0.03959722, "step": 23637 }, { "epoch": 47.276, "grad_norm": 1.0143859386444092, "learning_rate": 2e-05, "loss": 0.03672746, "step": 23638 }, { "epoch": 47.278, "grad_norm": 1.0333231687545776, "learning_rate": 2e-05, "loss": 0.03811924, "step": 23639 }, { "epoch": 47.28, "grad_norm": 1.236826777458191, "learning_rate": 2e-05, "loss": 0.04729872, "step": 23640 }, { "epoch": 47.282, "grad_norm": 1.3357667922973633, "learning_rate": 2e-05, "loss": 0.05370675, "step": 23641 }, { "epoch": 47.284, "grad_norm": 1.1280430555343628, "learning_rate": 2e-05, "loss": 0.04815095, "step": 23642 }, { "epoch": 47.286, "grad_norm": 0.9794661402702332, "learning_rate": 2e-05, "loss": 0.03532641, "step": 23643 }, { "epoch": 47.288, "grad_norm": 0.8724026679992676, "learning_rate": 2e-05, "loss": 0.02886418, "step": 23644 }, { "epoch": 47.29, "grad_norm": 0.983603835105896, "learning_rate": 2e-05, "loss": 0.04332222, "step": 23645 }, { "epoch": 47.292, "grad_norm": 0.970591127872467, "learning_rate": 2e-05, "loss": 0.03312496, "step": 23646 }, { "epoch": 47.294, "grad_norm": 3.4974524974823, "learning_rate": 2e-05, "loss": 0.04759998, "step": 23647 }, { "epoch": 47.296, "grad_norm": 1.2718149423599243, "learning_rate": 2e-05, "loss": 0.03222886, "step": 23648 }, { "epoch": 47.298, "grad_norm": 1.115829586982727, "learning_rate": 2e-05, "loss": 0.03760251, "step": 23649 }, { "epoch": 47.3, "grad_norm": 1.045224905014038, "learning_rate": 2e-05, "loss": 0.0388329, "step": 23650 }, { "epoch": 47.302, "grad_norm": 0.9225600957870483, "learning_rate": 2e-05, "loss": 0.02854843, "step": 23651 }, { "epoch": 47.304, "grad_norm": 1.1297686100006104, "learning_rate": 2e-05, "loss": 0.05503256, "step": 23652 }, { "epoch": 47.306, "grad_norm": 1.1783931255340576, "learning_rate": 2e-05, "loss": 0.04552291, "step": 23653 }, { "epoch": 47.308, "grad_norm": 0.9302802681922913, "learning_rate": 2e-05, "loss": 0.03579133, "step": 23654 }, { "epoch": 47.31, "grad_norm": 1.1339316368103027, "learning_rate": 2e-05, "loss": 0.05099681, "step": 23655 }, { "epoch": 47.312, "grad_norm": 2.024000644683838, "learning_rate": 2e-05, "loss": 0.03895874, "step": 23656 }, { "epoch": 47.314, "grad_norm": 1.1051119565963745, "learning_rate": 2e-05, "loss": 0.04664847, "step": 23657 }, { "epoch": 47.316, "grad_norm": 1.0856823921203613, "learning_rate": 2e-05, "loss": 0.04398474, "step": 23658 }, { "epoch": 47.318, "grad_norm": 1.074838638305664, "learning_rate": 2e-05, "loss": 0.04200622, "step": 23659 }, { "epoch": 47.32, "grad_norm": 1.1644055843353271, "learning_rate": 2e-05, "loss": 0.03961263, "step": 23660 }, { "epoch": 47.322, "grad_norm": 1.0396311283111572, "learning_rate": 2e-05, "loss": 0.03801247, "step": 23661 }, { "epoch": 47.324, "grad_norm": 1.0235298871994019, "learning_rate": 2e-05, "loss": 0.04713449, "step": 23662 }, { "epoch": 47.326, "grad_norm": 8.510767936706543, "learning_rate": 2e-05, "loss": 0.05402602, "step": 23663 }, { "epoch": 47.328, "grad_norm": 3.11049222946167, "learning_rate": 2e-05, "loss": 0.05721381, "step": 23664 }, { "epoch": 47.33, "grad_norm": 1.0492740869522095, "learning_rate": 2e-05, "loss": 0.04260036, "step": 23665 }, { "epoch": 47.332, "grad_norm": 1.6513819694519043, "learning_rate": 2e-05, "loss": 0.04281262, "step": 23666 }, { "epoch": 47.334, "grad_norm": 0.9504252672195435, "learning_rate": 2e-05, "loss": 0.0319023, "step": 23667 }, { "epoch": 47.336, "grad_norm": 1.008528232574463, "learning_rate": 2e-05, "loss": 0.0386165, "step": 23668 }, { "epoch": 47.338, "grad_norm": 1.1323277950286865, "learning_rate": 2e-05, "loss": 0.04989149, "step": 23669 }, { "epoch": 47.34, "grad_norm": 1.0840983390808105, "learning_rate": 2e-05, "loss": 0.05219552, "step": 23670 }, { "epoch": 47.342, "grad_norm": 1.3293006420135498, "learning_rate": 2e-05, "loss": 0.04114286, "step": 23671 }, { "epoch": 47.344, "grad_norm": 1.5028505325317383, "learning_rate": 2e-05, "loss": 0.07402927, "step": 23672 }, { "epoch": 47.346, "grad_norm": 1.0673332214355469, "learning_rate": 2e-05, "loss": 0.03196111, "step": 23673 }, { "epoch": 47.348, "grad_norm": 2.343308687210083, "learning_rate": 2e-05, "loss": 0.07152461, "step": 23674 }, { "epoch": 47.35, "grad_norm": 0.8959829807281494, "learning_rate": 2e-05, "loss": 0.02780672, "step": 23675 }, { "epoch": 47.352, "grad_norm": 1.0516010522842407, "learning_rate": 2e-05, "loss": 0.04981563, "step": 23676 }, { "epoch": 47.354, "grad_norm": 1.0763719081878662, "learning_rate": 2e-05, "loss": 0.03350294, "step": 23677 }, { "epoch": 47.356, "grad_norm": 0.8678815960884094, "learning_rate": 2e-05, "loss": 0.02785424, "step": 23678 }, { "epoch": 47.358, "grad_norm": 1.0789281129837036, "learning_rate": 2e-05, "loss": 0.05663691, "step": 23679 }, { "epoch": 47.36, "grad_norm": 1.133872628211975, "learning_rate": 2e-05, "loss": 0.04536335, "step": 23680 }, { "epoch": 47.362, "grad_norm": 1.0617961883544922, "learning_rate": 2e-05, "loss": 0.03840012, "step": 23681 }, { "epoch": 47.364, "grad_norm": 1.141158103942871, "learning_rate": 2e-05, "loss": 0.04564988, "step": 23682 }, { "epoch": 47.366, "grad_norm": 1.1426880359649658, "learning_rate": 2e-05, "loss": 0.05492597, "step": 23683 }, { "epoch": 47.368, "grad_norm": 1.2891075611114502, "learning_rate": 2e-05, "loss": 0.04475649, "step": 23684 }, { "epoch": 47.37, "grad_norm": 0.9464418888092041, "learning_rate": 2e-05, "loss": 0.04402163, "step": 23685 }, { "epoch": 47.372, "grad_norm": 1.2587634325027466, "learning_rate": 2e-05, "loss": 0.06171493, "step": 23686 }, { "epoch": 47.374, "grad_norm": 0.9983220100402832, "learning_rate": 2e-05, "loss": 0.03821462, "step": 23687 }, { "epoch": 47.376, "grad_norm": 1.907941460609436, "learning_rate": 2e-05, "loss": 0.05385751, "step": 23688 }, { "epoch": 47.378, "grad_norm": 0.8500486612319946, "learning_rate": 2e-05, "loss": 0.02971169, "step": 23689 }, { "epoch": 47.38, "grad_norm": 1.171464443206787, "learning_rate": 2e-05, "loss": 0.03766859, "step": 23690 }, { "epoch": 47.382, "grad_norm": 1.092240571975708, "learning_rate": 2e-05, "loss": 0.04867549, "step": 23691 }, { "epoch": 47.384, "grad_norm": 0.9573045969009399, "learning_rate": 2e-05, "loss": 0.0367619, "step": 23692 }, { "epoch": 47.386, "grad_norm": 1.5154966115951538, "learning_rate": 2e-05, "loss": 0.06257027, "step": 23693 }, { "epoch": 47.388, "grad_norm": 0.9772753715515137, "learning_rate": 2e-05, "loss": 0.03962587, "step": 23694 }, { "epoch": 47.39, "grad_norm": 0.9933499097824097, "learning_rate": 2e-05, "loss": 0.03265154, "step": 23695 }, { "epoch": 47.392, "grad_norm": 1.2628803253173828, "learning_rate": 2e-05, "loss": 0.05668861, "step": 23696 }, { "epoch": 47.394, "grad_norm": 1.07578444480896, "learning_rate": 2e-05, "loss": 0.04437926, "step": 23697 }, { "epoch": 47.396, "grad_norm": 1.0292528867721558, "learning_rate": 2e-05, "loss": 0.034625, "step": 23698 }, { "epoch": 47.398, "grad_norm": 0.9514861702919006, "learning_rate": 2e-05, "loss": 0.0248908, "step": 23699 }, { "epoch": 47.4, "grad_norm": 1.0663495063781738, "learning_rate": 2e-05, "loss": 0.0498912, "step": 23700 }, { "epoch": 47.402, "grad_norm": 0.9290273785591125, "learning_rate": 2e-05, "loss": 0.03897344, "step": 23701 }, { "epoch": 47.404, "grad_norm": 1.1901650428771973, "learning_rate": 2e-05, "loss": 0.04514945, "step": 23702 }, { "epoch": 47.406, "grad_norm": 1.036565899848938, "learning_rate": 2e-05, "loss": 0.03996158, "step": 23703 }, { "epoch": 47.408, "grad_norm": 1.1399861574172974, "learning_rate": 2e-05, "loss": 0.04928694, "step": 23704 }, { "epoch": 47.41, "grad_norm": 1.053160309791565, "learning_rate": 2e-05, "loss": 0.0492807, "step": 23705 }, { "epoch": 47.412, "grad_norm": 1.0769565105438232, "learning_rate": 2e-05, "loss": 0.04557002, "step": 23706 }, { "epoch": 47.414, "grad_norm": 1.5144438743591309, "learning_rate": 2e-05, "loss": 0.04284724, "step": 23707 }, { "epoch": 47.416, "grad_norm": 0.8692823052406311, "learning_rate": 2e-05, "loss": 0.02953464, "step": 23708 }, { "epoch": 47.418, "grad_norm": 1.766055703163147, "learning_rate": 2e-05, "loss": 0.03496955, "step": 23709 }, { "epoch": 47.42, "grad_norm": 1.0351566076278687, "learning_rate": 2e-05, "loss": 0.03808454, "step": 23710 }, { "epoch": 47.422, "grad_norm": 1.0329164266586304, "learning_rate": 2e-05, "loss": 0.04154298, "step": 23711 }, { "epoch": 47.424, "grad_norm": 0.9106941223144531, "learning_rate": 2e-05, "loss": 0.03471414, "step": 23712 }, { "epoch": 47.426, "grad_norm": 0.9985986351966858, "learning_rate": 2e-05, "loss": 0.03932318, "step": 23713 }, { "epoch": 47.428, "grad_norm": 1.0269774198532104, "learning_rate": 2e-05, "loss": 0.03581782, "step": 23714 }, { "epoch": 47.43, "grad_norm": 1.129923701286316, "learning_rate": 2e-05, "loss": 0.05765588, "step": 23715 }, { "epoch": 47.432, "grad_norm": 1.278012990951538, "learning_rate": 2e-05, "loss": 0.04962663, "step": 23716 }, { "epoch": 47.434, "grad_norm": 0.7766261696815491, "learning_rate": 2e-05, "loss": 0.02193315, "step": 23717 }, { "epoch": 47.436, "grad_norm": 1.0411211252212524, "learning_rate": 2e-05, "loss": 0.05195874, "step": 23718 }, { "epoch": 47.438, "grad_norm": 0.9886196255683899, "learning_rate": 2e-05, "loss": 0.03056406, "step": 23719 }, { "epoch": 47.44, "grad_norm": 1.0291918516159058, "learning_rate": 2e-05, "loss": 0.04513706, "step": 23720 }, { "epoch": 47.442, "grad_norm": 1.032611608505249, "learning_rate": 2e-05, "loss": 0.0375468, "step": 23721 }, { "epoch": 47.444, "grad_norm": 1.009380578994751, "learning_rate": 2e-05, "loss": 0.04524942, "step": 23722 }, { "epoch": 47.446, "grad_norm": 0.9575527310371399, "learning_rate": 2e-05, "loss": 0.03913, "step": 23723 }, { "epoch": 47.448, "grad_norm": 1.4188814163208008, "learning_rate": 2e-05, "loss": 0.05222564, "step": 23724 }, { "epoch": 47.45, "grad_norm": 1.3722389936447144, "learning_rate": 2e-05, "loss": 0.04753223, "step": 23725 }, { "epoch": 47.452, "grad_norm": 0.9300989508628845, "learning_rate": 2e-05, "loss": 0.0421416, "step": 23726 }, { "epoch": 47.454, "grad_norm": 1.0331085920333862, "learning_rate": 2e-05, "loss": 0.04828961, "step": 23727 }, { "epoch": 47.456, "grad_norm": 1.0350401401519775, "learning_rate": 2e-05, "loss": 0.04444535, "step": 23728 }, { "epoch": 47.458, "grad_norm": 0.9884103536605835, "learning_rate": 2e-05, "loss": 0.03949423, "step": 23729 }, { "epoch": 47.46, "grad_norm": 1.2454828023910522, "learning_rate": 2e-05, "loss": 0.04331077, "step": 23730 }, { "epoch": 47.462, "grad_norm": 1.0359313488006592, "learning_rate": 2e-05, "loss": 0.0441678, "step": 23731 }, { "epoch": 47.464, "grad_norm": 1.063355565071106, "learning_rate": 2e-05, "loss": 0.04690478, "step": 23732 }, { "epoch": 47.466, "grad_norm": 0.8971952795982361, "learning_rate": 2e-05, "loss": 0.0273696, "step": 23733 }, { "epoch": 47.468, "grad_norm": 0.9673405289649963, "learning_rate": 2e-05, "loss": 0.02509457, "step": 23734 }, { "epoch": 47.47, "grad_norm": 1.0418877601623535, "learning_rate": 2e-05, "loss": 0.04500167, "step": 23735 }, { "epoch": 47.472, "grad_norm": 1.8244998455047607, "learning_rate": 2e-05, "loss": 0.06962796, "step": 23736 }, { "epoch": 47.474, "grad_norm": 1.1241260766983032, "learning_rate": 2e-05, "loss": 0.04309035, "step": 23737 }, { "epoch": 47.476, "grad_norm": 0.9888151288032532, "learning_rate": 2e-05, "loss": 0.03744877, "step": 23738 }, { "epoch": 47.478, "grad_norm": 1.8089641332626343, "learning_rate": 2e-05, "loss": 0.04753342, "step": 23739 }, { "epoch": 47.48, "grad_norm": 1.054860234260559, "learning_rate": 2e-05, "loss": 0.0452208, "step": 23740 }, { "epoch": 47.482, "grad_norm": 0.907122790813446, "learning_rate": 2e-05, "loss": 0.03199127, "step": 23741 }, { "epoch": 47.484, "grad_norm": 1.561472773551941, "learning_rate": 2e-05, "loss": 0.04225186, "step": 23742 }, { "epoch": 47.486, "grad_norm": 0.9772884845733643, "learning_rate": 2e-05, "loss": 0.03815705, "step": 23743 }, { "epoch": 47.488, "grad_norm": 1.0331170558929443, "learning_rate": 2e-05, "loss": 0.04061892, "step": 23744 }, { "epoch": 47.49, "grad_norm": 1.2093883752822876, "learning_rate": 2e-05, "loss": 0.05806877, "step": 23745 }, { "epoch": 47.492, "grad_norm": 0.9849857687950134, "learning_rate": 2e-05, "loss": 0.03468157, "step": 23746 }, { "epoch": 47.494, "grad_norm": 0.9634231925010681, "learning_rate": 2e-05, "loss": 0.04161776, "step": 23747 }, { "epoch": 47.496, "grad_norm": 0.9344093203544617, "learning_rate": 2e-05, "loss": 0.03972005, "step": 23748 }, { "epoch": 47.498, "grad_norm": 1.2977590560913086, "learning_rate": 2e-05, "loss": 0.05432611, "step": 23749 }, { "epoch": 47.5, "grad_norm": 1.1892510652542114, "learning_rate": 2e-05, "loss": 0.04734399, "step": 23750 }, { "epoch": 47.502, "grad_norm": 1.108474612236023, "learning_rate": 2e-05, "loss": 0.04448011, "step": 23751 }, { "epoch": 47.504, "grad_norm": 1.697208046913147, "learning_rate": 2e-05, "loss": 0.04337654, "step": 23752 }, { "epoch": 47.506, "grad_norm": 1.451831340789795, "learning_rate": 2e-05, "loss": 0.03583319, "step": 23753 }, { "epoch": 47.508, "grad_norm": 2.994732141494751, "learning_rate": 2e-05, "loss": 0.05489254, "step": 23754 }, { "epoch": 47.51, "grad_norm": 1.6029448509216309, "learning_rate": 2e-05, "loss": 0.04859427, "step": 23755 }, { "epoch": 47.512, "grad_norm": 1.36371648311615, "learning_rate": 2e-05, "loss": 0.04144289, "step": 23756 }, { "epoch": 47.514, "grad_norm": 0.9255298972129822, "learning_rate": 2e-05, "loss": 0.03162189, "step": 23757 }, { "epoch": 47.516, "grad_norm": 1.2513339519500732, "learning_rate": 2e-05, "loss": 0.04965008, "step": 23758 }, { "epoch": 47.518, "grad_norm": 1.1039848327636719, "learning_rate": 2e-05, "loss": 0.05336472, "step": 23759 }, { "epoch": 47.52, "grad_norm": 1.9740835428237915, "learning_rate": 2e-05, "loss": 0.04386981, "step": 23760 }, { "epoch": 47.522, "grad_norm": 1.2000198364257812, "learning_rate": 2e-05, "loss": 0.05311165, "step": 23761 }, { "epoch": 47.524, "grad_norm": 0.9251067638397217, "learning_rate": 2e-05, "loss": 0.0350918, "step": 23762 }, { "epoch": 47.526, "grad_norm": 1.2484253644943237, "learning_rate": 2e-05, "loss": 0.05372336, "step": 23763 }, { "epoch": 47.528, "grad_norm": 1.3712886571884155, "learning_rate": 2e-05, "loss": 0.04213833, "step": 23764 }, { "epoch": 47.53, "grad_norm": 1.0598891973495483, "learning_rate": 2e-05, "loss": 0.03917694, "step": 23765 }, { "epoch": 47.532, "grad_norm": 0.9596943855285645, "learning_rate": 2e-05, "loss": 0.03351412, "step": 23766 }, { "epoch": 47.534, "grad_norm": 1.182827115058899, "learning_rate": 2e-05, "loss": 0.05069726, "step": 23767 }, { "epoch": 47.536, "grad_norm": 1.1593616008758545, "learning_rate": 2e-05, "loss": 0.04609878, "step": 23768 }, { "epoch": 47.538, "grad_norm": 1.1060556173324585, "learning_rate": 2e-05, "loss": 0.04310281, "step": 23769 }, { "epoch": 47.54, "grad_norm": 1.1579822301864624, "learning_rate": 2e-05, "loss": 0.05171558, "step": 23770 }, { "epoch": 47.542, "grad_norm": 0.8731370568275452, "learning_rate": 2e-05, "loss": 0.02871848, "step": 23771 }, { "epoch": 47.544, "grad_norm": 0.7383250594139099, "learning_rate": 2e-05, "loss": 0.01965765, "step": 23772 }, { "epoch": 47.546, "grad_norm": 1.9708306789398193, "learning_rate": 2e-05, "loss": 0.05063381, "step": 23773 }, { "epoch": 47.548, "grad_norm": 1.1777607202529907, "learning_rate": 2e-05, "loss": 0.03309356, "step": 23774 }, { "epoch": 47.55, "grad_norm": 1.8396910429000854, "learning_rate": 2e-05, "loss": 0.04351158, "step": 23775 }, { "epoch": 47.552, "grad_norm": 1.2143644094467163, "learning_rate": 2e-05, "loss": 0.04505132, "step": 23776 }, { "epoch": 47.554, "grad_norm": 1.2833964824676514, "learning_rate": 2e-05, "loss": 0.05171269, "step": 23777 }, { "epoch": 47.556, "grad_norm": 0.9669860005378723, "learning_rate": 2e-05, "loss": 0.02665868, "step": 23778 }, { "epoch": 47.558, "grad_norm": 1.3167015314102173, "learning_rate": 2e-05, "loss": 0.04074793, "step": 23779 }, { "epoch": 47.56, "grad_norm": 1.9248770475387573, "learning_rate": 2e-05, "loss": 0.03849473, "step": 23780 }, { "epoch": 47.562, "grad_norm": 1.2845475673675537, "learning_rate": 2e-05, "loss": 0.04953473, "step": 23781 }, { "epoch": 47.564, "grad_norm": 0.8561571836471558, "learning_rate": 2e-05, "loss": 0.03033238, "step": 23782 }, { "epoch": 47.566, "grad_norm": 1.010486125946045, "learning_rate": 2e-05, "loss": 0.03866864, "step": 23783 }, { "epoch": 47.568, "grad_norm": 0.9784160852432251, "learning_rate": 2e-05, "loss": 0.0382061, "step": 23784 }, { "epoch": 47.57, "grad_norm": 1.116437315940857, "learning_rate": 2e-05, "loss": 0.04971761, "step": 23785 }, { "epoch": 47.572, "grad_norm": 1.558053731918335, "learning_rate": 2e-05, "loss": 0.05700563, "step": 23786 }, { "epoch": 47.574, "grad_norm": 1.1035943031311035, "learning_rate": 2e-05, "loss": 0.03902214, "step": 23787 }, { "epoch": 47.576, "grad_norm": 1.0028114318847656, "learning_rate": 2e-05, "loss": 0.03977474, "step": 23788 }, { "epoch": 47.578, "grad_norm": 0.9390711188316345, "learning_rate": 2e-05, "loss": 0.03180095, "step": 23789 }, { "epoch": 47.58, "grad_norm": 2.1146061420440674, "learning_rate": 2e-05, "loss": 0.03937341, "step": 23790 }, { "epoch": 47.582, "grad_norm": 1.0367655754089355, "learning_rate": 2e-05, "loss": 0.02810624, "step": 23791 }, { "epoch": 47.584, "grad_norm": 1.0060728788375854, "learning_rate": 2e-05, "loss": 0.04509538, "step": 23792 }, { "epoch": 47.586, "grad_norm": 1.1776868104934692, "learning_rate": 2e-05, "loss": 0.04948791, "step": 23793 }, { "epoch": 47.588, "grad_norm": 1.3009662628173828, "learning_rate": 2e-05, "loss": 0.04440022, "step": 23794 }, { "epoch": 47.59, "grad_norm": 1.1032781600952148, "learning_rate": 2e-05, "loss": 0.04495643, "step": 23795 }, { "epoch": 47.592, "grad_norm": 1.1553269624710083, "learning_rate": 2e-05, "loss": 0.05822661, "step": 23796 }, { "epoch": 47.594, "grad_norm": 1.029470443725586, "learning_rate": 2e-05, "loss": 0.03393674, "step": 23797 }, { "epoch": 47.596, "grad_norm": 0.8912624716758728, "learning_rate": 2e-05, "loss": 0.03417216, "step": 23798 }, { "epoch": 47.598, "grad_norm": 1.0182663202285767, "learning_rate": 2e-05, "loss": 0.03266668, "step": 23799 }, { "epoch": 47.6, "grad_norm": 0.9908261895179749, "learning_rate": 2e-05, "loss": 0.0391544, "step": 23800 }, { "epoch": 47.602, "grad_norm": 1.1588066816329956, "learning_rate": 2e-05, "loss": 0.04909838, "step": 23801 }, { "epoch": 47.604, "grad_norm": 0.9654898643493652, "learning_rate": 2e-05, "loss": 0.03736303, "step": 23802 }, { "epoch": 47.606, "grad_norm": 1.5304967164993286, "learning_rate": 2e-05, "loss": 0.05671806, "step": 23803 }, { "epoch": 47.608, "grad_norm": 1.0687037706375122, "learning_rate": 2e-05, "loss": 0.03987087, "step": 23804 }, { "epoch": 47.61, "grad_norm": 0.911521852016449, "learning_rate": 2e-05, "loss": 0.03052905, "step": 23805 }, { "epoch": 47.612, "grad_norm": 1.4880046844482422, "learning_rate": 2e-05, "loss": 0.04811275, "step": 23806 }, { "epoch": 47.614, "grad_norm": 0.96980220079422, "learning_rate": 2e-05, "loss": 0.03221977, "step": 23807 }, { "epoch": 47.616, "grad_norm": 1.1340968608856201, "learning_rate": 2e-05, "loss": 0.03261177, "step": 23808 }, { "epoch": 47.618, "grad_norm": 0.9944406747817993, "learning_rate": 2e-05, "loss": 0.03609553, "step": 23809 }, { "epoch": 47.62, "grad_norm": 1.8252726793289185, "learning_rate": 2e-05, "loss": 0.04522093, "step": 23810 }, { "epoch": 47.622, "grad_norm": 2.8293583393096924, "learning_rate": 2e-05, "loss": 0.0598088, "step": 23811 }, { "epoch": 47.624, "grad_norm": 1.1677912473678589, "learning_rate": 2e-05, "loss": 0.04475021, "step": 23812 }, { "epoch": 47.626, "grad_norm": 1.335666298866272, "learning_rate": 2e-05, "loss": 0.04401172, "step": 23813 }, { "epoch": 47.628, "grad_norm": 1.1240755319595337, "learning_rate": 2e-05, "loss": 0.05180898, "step": 23814 }, { "epoch": 47.63, "grad_norm": 0.9937646985054016, "learning_rate": 2e-05, "loss": 0.03748678, "step": 23815 }, { "epoch": 47.632, "grad_norm": 1.1094286441802979, "learning_rate": 2e-05, "loss": 0.04813768, "step": 23816 }, { "epoch": 47.634, "grad_norm": 1.1459041833877563, "learning_rate": 2e-05, "loss": 0.05247568, "step": 23817 }, { "epoch": 47.636, "grad_norm": 0.9695457220077515, "learning_rate": 2e-05, "loss": 0.0421034, "step": 23818 }, { "epoch": 47.638, "grad_norm": 0.9304220080375671, "learning_rate": 2e-05, "loss": 0.02977398, "step": 23819 }, { "epoch": 47.64, "grad_norm": 1.2126814126968384, "learning_rate": 2e-05, "loss": 0.05115549, "step": 23820 }, { "epoch": 47.642, "grad_norm": 1.9591553211212158, "learning_rate": 2e-05, "loss": 0.06373169, "step": 23821 }, { "epoch": 47.644, "grad_norm": 1.011116862297058, "learning_rate": 2e-05, "loss": 0.02203134, "step": 23822 }, { "epoch": 47.646, "grad_norm": 1.1005288362503052, "learning_rate": 2e-05, "loss": 0.04589593, "step": 23823 }, { "epoch": 47.648, "grad_norm": 1.2178517580032349, "learning_rate": 2e-05, "loss": 0.05600277, "step": 23824 }, { "epoch": 47.65, "grad_norm": 1.7519043684005737, "learning_rate": 2e-05, "loss": 0.06558504, "step": 23825 }, { "epoch": 47.652, "grad_norm": 1.8203493356704712, "learning_rate": 2e-05, "loss": 0.04869806, "step": 23826 }, { "epoch": 47.654, "grad_norm": 1.139139175415039, "learning_rate": 2e-05, "loss": 0.0433061, "step": 23827 }, { "epoch": 47.656, "grad_norm": 0.9019608497619629, "learning_rate": 2e-05, "loss": 0.03334299, "step": 23828 }, { "epoch": 47.658, "grad_norm": 1.0268332958221436, "learning_rate": 2e-05, "loss": 0.03980953, "step": 23829 }, { "epoch": 47.66, "grad_norm": 1.2267796993255615, "learning_rate": 2e-05, "loss": 0.03700276, "step": 23830 }, { "epoch": 47.662, "grad_norm": 1.1980990171432495, "learning_rate": 2e-05, "loss": 0.04482572, "step": 23831 }, { "epoch": 47.664, "grad_norm": 0.9795111417770386, "learning_rate": 2e-05, "loss": 0.03125534, "step": 23832 }, { "epoch": 47.666, "grad_norm": 1.0323877334594727, "learning_rate": 2e-05, "loss": 0.04212714, "step": 23833 }, { "epoch": 47.668, "grad_norm": 1.0116699934005737, "learning_rate": 2e-05, "loss": 0.04056319, "step": 23834 }, { "epoch": 47.67, "grad_norm": 1.3453792333602905, "learning_rate": 2e-05, "loss": 0.04402475, "step": 23835 }, { "epoch": 47.672, "grad_norm": 1.0550172328948975, "learning_rate": 2e-05, "loss": 0.04239871, "step": 23836 }, { "epoch": 47.674, "grad_norm": 1.3831201791763306, "learning_rate": 2e-05, "loss": 0.04013387, "step": 23837 }, { "epoch": 47.676, "grad_norm": 1.455228567123413, "learning_rate": 2e-05, "loss": 0.05773637, "step": 23838 }, { "epoch": 47.678, "grad_norm": 1.1021493673324585, "learning_rate": 2e-05, "loss": 0.04290526, "step": 23839 }, { "epoch": 47.68, "grad_norm": 3.093343496322632, "learning_rate": 2e-05, "loss": 0.05561545, "step": 23840 }, { "epoch": 47.682, "grad_norm": 1.1181625127792358, "learning_rate": 2e-05, "loss": 0.04381544, "step": 23841 }, { "epoch": 47.684, "grad_norm": 0.9953621625900269, "learning_rate": 2e-05, "loss": 0.03505832, "step": 23842 }, { "epoch": 47.686, "grad_norm": 1.3041609525680542, "learning_rate": 2e-05, "loss": 0.04813976, "step": 23843 }, { "epoch": 47.688, "grad_norm": 1.140155553817749, "learning_rate": 2e-05, "loss": 0.05691821, "step": 23844 }, { "epoch": 47.69, "grad_norm": 1.0486043691635132, "learning_rate": 2e-05, "loss": 0.05118111, "step": 23845 }, { "epoch": 47.692, "grad_norm": 1.4789204597473145, "learning_rate": 2e-05, "loss": 0.03433579, "step": 23846 }, { "epoch": 47.694, "grad_norm": 1.0473746061325073, "learning_rate": 2e-05, "loss": 0.03203419, "step": 23847 }, { "epoch": 47.696, "grad_norm": 1.1864687204360962, "learning_rate": 2e-05, "loss": 0.04299153, "step": 23848 }, { "epoch": 47.698, "grad_norm": 1.2044426202774048, "learning_rate": 2e-05, "loss": 0.03248603, "step": 23849 }, { "epoch": 47.7, "grad_norm": 1.1632757186889648, "learning_rate": 2e-05, "loss": 0.04769236, "step": 23850 }, { "epoch": 47.702, "grad_norm": 1.2773628234863281, "learning_rate": 2e-05, "loss": 0.04456393, "step": 23851 }, { "epoch": 47.704, "grad_norm": 0.996157169342041, "learning_rate": 2e-05, "loss": 0.03646937, "step": 23852 }, { "epoch": 47.706, "grad_norm": 1.1221864223480225, "learning_rate": 2e-05, "loss": 0.03868204, "step": 23853 }, { "epoch": 47.708, "grad_norm": 1.112217664718628, "learning_rate": 2e-05, "loss": 0.02747077, "step": 23854 }, { "epoch": 47.71, "grad_norm": 1.0908452272415161, "learning_rate": 2e-05, "loss": 0.04687804, "step": 23855 }, { "epoch": 47.712, "grad_norm": 1.0508463382720947, "learning_rate": 2e-05, "loss": 0.03368923, "step": 23856 }, { "epoch": 47.714, "grad_norm": 1.5289148092269897, "learning_rate": 2e-05, "loss": 0.03400174, "step": 23857 }, { "epoch": 47.716, "grad_norm": 1.4201314449310303, "learning_rate": 2e-05, "loss": 0.05422761, "step": 23858 }, { "epoch": 47.718, "grad_norm": 1.1497098207473755, "learning_rate": 2e-05, "loss": 0.0567955, "step": 23859 }, { "epoch": 47.72, "grad_norm": 1.1772329807281494, "learning_rate": 2e-05, "loss": 0.04190015, "step": 23860 }, { "epoch": 47.722, "grad_norm": 1.1032140254974365, "learning_rate": 2e-05, "loss": 0.04648539, "step": 23861 }, { "epoch": 47.724, "grad_norm": 1.627834439277649, "learning_rate": 2e-05, "loss": 0.02917214, "step": 23862 }, { "epoch": 47.726, "grad_norm": 1.1580886840820312, "learning_rate": 2e-05, "loss": 0.03432341, "step": 23863 }, { "epoch": 47.728, "grad_norm": 0.8925065994262695, "learning_rate": 2e-05, "loss": 0.02663673, "step": 23864 }, { "epoch": 47.73, "grad_norm": 3.2009060382843018, "learning_rate": 2e-05, "loss": 0.04521102, "step": 23865 }, { "epoch": 47.732, "grad_norm": 1.4593100547790527, "learning_rate": 2e-05, "loss": 0.0459123, "step": 23866 }, { "epoch": 47.734, "grad_norm": 2.065211772918701, "learning_rate": 2e-05, "loss": 0.04068485, "step": 23867 }, { "epoch": 47.736, "grad_norm": 1.241451621055603, "learning_rate": 2e-05, "loss": 0.03518833, "step": 23868 }, { "epoch": 47.738, "grad_norm": 0.8956775069236755, "learning_rate": 2e-05, "loss": 0.03247586, "step": 23869 }, { "epoch": 47.74, "grad_norm": 1.089477300643921, "learning_rate": 2e-05, "loss": 0.04340686, "step": 23870 }, { "epoch": 47.742, "grad_norm": 1.285570740699768, "learning_rate": 2e-05, "loss": 0.05197541, "step": 23871 }, { "epoch": 47.744, "grad_norm": 1.2594149112701416, "learning_rate": 2e-05, "loss": 0.06486955, "step": 23872 }, { "epoch": 47.746, "grad_norm": 1.2255566120147705, "learning_rate": 2e-05, "loss": 0.03215192, "step": 23873 }, { "epoch": 47.748, "grad_norm": 1.3099251985549927, "learning_rate": 2e-05, "loss": 0.049097, "step": 23874 }, { "epoch": 47.75, "grad_norm": 0.9618019461631775, "learning_rate": 2e-05, "loss": 0.0319085, "step": 23875 }, { "epoch": 47.752, "grad_norm": 5.491909027099609, "learning_rate": 2e-05, "loss": 0.06010506, "step": 23876 }, { "epoch": 47.754, "grad_norm": 1.1435209512710571, "learning_rate": 2e-05, "loss": 0.0391231, "step": 23877 }, { "epoch": 47.756, "grad_norm": 1.0011411905288696, "learning_rate": 2e-05, "loss": 0.04336783, "step": 23878 }, { "epoch": 47.758, "grad_norm": 1.356178641319275, "learning_rate": 2e-05, "loss": 0.04064961, "step": 23879 }, { "epoch": 47.76, "grad_norm": 1.0911813974380493, "learning_rate": 2e-05, "loss": 0.05042182, "step": 23880 }, { "epoch": 47.762, "grad_norm": 1.0417226552963257, "learning_rate": 2e-05, "loss": 0.03660811, "step": 23881 }, { "epoch": 47.764, "grad_norm": 1.377353310585022, "learning_rate": 2e-05, "loss": 0.0593021, "step": 23882 }, { "epoch": 47.766, "grad_norm": 1.1589546203613281, "learning_rate": 2e-05, "loss": 0.03872295, "step": 23883 }, { "epoch": 47.768, "grad_norm": 1.9420932531356812, "learning_rate": 2e-05, "loss": 0.04654341, "step": 23884 }, { "epoch": 47.77, "grad_norm": 1.6046696901321411, "learning_rate": 2e-05, "loss": 0.03534497, "step": 23885 }, { "epoch": 47.772, "grad_norm": 1.0587979555130005, "learning_rate": 2e-05, "loss": 0.04580415, "step": 23886 }, { "epoch": 47.774, "grad_norm": 1.0873771905899048, "learning_rate": 2e-05, "loss": 0.05293707, "step": 23887 }, { "epoch": 47.776, "grad_norm": 0.9115861654281616, "learning_rate": 2e-05, "loss": 0.02041997, "step": 23888 }, { "epoch": 47.778, "grad_norm": 1.300163984298706, "learning_rate": 2e-05, "loss": 0.04854197, "step": 23889 }, { "epoch": 47.78, "grad_norm": 1.0665444135665894, "learning_rate": 2e-05, "loss": 0.04010332, "step": 23890 }, { "epoch": 47.782, "grad_norm": 1.2631436586380005, "learning_rate": 2e-05, "loss": 0.04193042, "step": 23891 }, { "epoch": 47.784, "grad_norm": 1.455126166343689, "learning_rate": 2e-05, "loss": 0.04916675, "step": 23892 }, { "epoch": 47.786, "grad_norm": 1.249609112739563, "learning_rate": 2e-05, "loss": 0.04592934, "step": 23893 }, { "epoch": 47.788, "grad_norm": 1.2425901889801025, "learning_rate": 2e-05, "loss": 0.0466116, "step": 23894 }, { "epoch": 47.79, "grad_norm": 1.2255349159240723, "learning_rate": 2e-05, "loss": 0.04162162, "step": 23895 }, { "epoch": 47.792, "grad_norm": 0.8573364019393921, "learning_rate": 2e-05, "loss": 0.03105495, "step": 23896 }, { "epoch": 47.794, "grad_norm": 1.0638104677200317, "learning_rate": 2e-05, "loss": 0.04472728, "step": 23897 }, { "epoch": 47.796, "grad_norm": 1.8433798551559448, "learning_rate": 2e-05, "loss": 0.06242759, "step": 23898 }, { "epoch": 47.798, "grad_norm": 1.0222809314727783, "learning_rate": 2e-05, "loss": 0.04283288, "step": 23899 }, { "epoch": 47.8, "grad_norm": 0.8191377520561218, "learning_rate": 2e-05, "loss": 0.02410599, "step": 23900 }, { "epoch": 47.802, "grad_norm": 0.7901560068130493, "learning_rate": 2e-05, "loss": 0.03071816, "step": 23901 }, { "epoch": 47.804, "grad_norm": 1.2709864377975464, "learning_rate": 2e-05, "loss": 0.0388941, "step": 23902 }, { "epoch": 47.806, "grad_norm": 1.0754424333572388, "learning_rate": 2e-05, "loss": 0.05183701, "step": 23903 }, { "epoch": 47.808, "grad_norm": 1.0208688974380493, "learning_rate": 2e-05, "loss": 0.03213278, "step": 23904 }, { "epoch": 47.81, "grad_norm": 1.1351845264434814, "learning_rate": 2e-05, "loss": 0.04791225, "step": 23905 }, { "epoch": 47.812, "grad_norm": 0.9763931632041931, "learning_rate": 2e-05, "loss": 0.02834095, "step": 23906 }, { "epoch": 47.814, "grad_norm": 1.45331609249115, "learning_rate": 2e-05, "loss": 0.03742079, "step": 23907 }, { "epoch": 47.816, "grad_norm": 1.1720417737960815, "learning_rate": 2e-05, "loss": 0.04424246, "step": 23908 }, { "epoch": 47.818, "grad_norm": 1.2223548889160156, "learning_rate": 2e-05, "loss": 0.05388419, "step": 23909 }, { "epoch": 47.82, "grad_norm": 1.1633915901184082, "learning_rate": 2e-05, "loss": 0.05120426, "step": 23910 }, { "epoch": 47.822, "grad_norm": 1.53520667552948, "learning_rate": 2e-05, "loss": 0.0560784, "step": 23911 }, { "epoch": 47.824, "grad_norm": 2.0739471912384033, "learning_rate": 2e-05, "loss": 0.04297794, "step": 23912 }, { "epoch": 47.826, "grad_norm": 1.0577243566513062, "learning_rate": 2e-05, "loss": 0.03656144, "step": 23913 }, { "epoch": 47.828, "grad_norm": 1.0901859998703003, "learning_rate": 2e-05, "loss": 0.04160123, "step": 23914 }, { "epoch": 47.83, "grad_norm": 0.9747123122215271, "learning_rate": 2e-05, "loss": 0.03230017, "step": 23915 }, { "epoch": 47.832, "grad_norm": 0.6412887573242188, "learning_rate": 2e-05, "loss": 0.0156299, "step": 23916 }, { "epoch": 47.834, "grad_norm": 1.339723825454712, "learning_rate": 2e-05, "loss": 0.05191834, "step": 23917 }, { "epoch": 47.836, "grad_norm": 1.0116145610809326, "learning_rate": 2e-05, "loss": 0.03968196, "step": 23918 }, { "epoch": 47.838, "grad_norm": 1.3960124254226685, "learning_rate": 2e-05, "loss": 0.06375577, "step": 23919 }, { "epoch": 47.84, "grad_norm": 1.4913820028305054, "learning_rate": 2e-05, "loss": 0.04322405, "step": 23920 }, { "epoch": 47.842, "grad_norm": 1.3400276899337769, "learning_rate": 2e-05, "loss": 0.05357553, "step": 23921 }, { "epoch": 47.844, "grad_norm": 0.9345904588699341, "learning_rate": 2e-05, "loss": 0.03811813, "step": 23922 }, { "epoch": 47.846, "grad_norm": 1.6370712518692017, "learning_rate": 2e-05, "loss": 0.02961573, "step": 23923 }, { "epoch": 47.848, "grad_norm": 0.9856836199760437, "learning_rate": 2e-05, "loss": 0.03857559, "step": 23924 }, { "epoch": 47.85, "grad_norm": 1.200377345085144, "learning_rate": 2e-05, "loss": 0.0601544, "step": 23925 }, { "epoch": 47.852, "grad_norm": 1.2700010538101196, "learning_rate": 2e-05, "loss": 0.04210861, "step": 23926 }, { "epoch": 47.854, "grad_norm": 1.0622698068618774, "learning_rate": 2e-05, "loss": 0.03952032, "step": 23927 }, { "epoch": 47.856, "grad_norm": 1.6621780395507812, "learning_rate": 2e-05, "loss": 0.03952279, "step": 23928 }, { "epoch": 47.858, "grad_norm": 1.2052236795425415, "learning_rate": 2e-05, "loss": 0.04863922, "step": 23929 }, { "epoch": 47.86, "grad_norm": 1.105750322341919, "learning_rate": 2e-05, "loss": 0.04230881, "step": 23930 }, { "epoch": 47.862, "grad_norm": 1.081488847732544, "learning_rate": 2e-05, "loss": 0.03648605, "step": 23931 }, { "epoch": 47.864, "grad_norm": 1.1100019216537476, "learning_rate": 2e-05, "loss": 0.04908447, "step": 23932 }, { "epoch": 47.866, "grad_norm": 0.9001398086547852, "learning_rate": 2e-05, "loss": 0.03521951, "step": 23933 }, { "epoch": 47.868, "grad_norm": 2.9443519115448, "learning_rate": 2e-05, "loss": 0.05508538, "step": 23934 }, { "epoch": 47.87, "grad_norm": 1.1646082401275635, "learning_rate": 2e-05, "loss": 0.04268147, "step": 23935 }, { "epoch": 47.872, "grad_norm": 1.017196536064148, "learning_rate": 2e-05, "loss": 0.04618394, "step": 23936 }, { "epoch": 47.874, "grad_norm": 1.1412867307662964, "learning_rate": 2e-05, "loss": 0.05102488, "step": 23937 }, { "epoch": 47.876, "grad_norm": 2.0027015209198, "learning_rate": 2e-05, "loss": 0.03871121, "step": 23938 }, { "epoch": 47.878, "grad_norm": 1.046513557434082, "learning_rate": 2e-05, "loss": 0.04383466, "step": 23939 }, { "epoch": 47.88, "grad_norm": 1.7802237272262573, "learning_rate": 2e-05, "loss": 0.03680856, "step": 23940 }, { "epoch": 47.882, "grad_norm": 1.2104276418685913, "learning_rate": 2e-05, "loss": 0.05254406, "step": 23941 }, { "epoch": 47.884, "grad_norm": 0.8044677972793579, "learning_rate": 2e-05, "loss": 0.02953346, "step": 23942 }, { "epoch": 47.886, "grad_norm": 1.8584107160568237, "learning_rate": 2e-05, "loss": 0.05697693, "step": 23943 }, { "epoch": 47.888, "grad_norm": 0.8562052249908447, "learning_rate": 2e-05, "loss": 0.03730064, "step": 23944 }, { "epoch": 47.89, "grad_norm": 1.434039831161499, "learning_rate": 2e-05, "loss": 0.05032197, "step": 23945 }, { "epoch": 47.892, "grad_norm": 1.0825904607772827, "learning_rate": 2e-05, "loss": 0.04828491, "step": 23946 }, { "epoch": 47.894, "grad_norm": 1.0735224485397339, "learning_rate": 2e-05, "loss": 0.04570151, "step": 23947 }, { "epoch": 47.896, "grad_norm": 1.3844759464263916, "learning_rate": 2e-05, "loss": 0.06105404, "step": 23948 }, { "epoch": 47.898, "grad_norm": 1.1327170133590698, "learning_rate": 2e-05, "loss": 0.04334442, "step": 23949 }, { "epoch": 47.9, "grad_norm": 1.0723904371261597, "learning_rate": 2e-05, "loss": 0.04543884, "step": 23950 }, { "epoch": 47.902, "grad_norm": 1.3100708723068237, "learning_rate": 2e-05, "loss": 0.04745476, "step": 23951 }, { "epoch": 47.904, "grad_norm": 1.4559745788574219, "learning_rate": 2e-05, "loss": 0.04406179, "step": 23952 }, { "epoch": 47.906, "grad_norm": 1.3097699880599976, "learning_rate": 2e-05, "loss": 0.04379947, "step": 23953 }, { "epoch": 47.908, "grad_norm": 1.1045814752578735, "learning_rate": 2e-05, "loss": 0.04502226, "step": 23954 }, { "epoch": 47.91, "grad_norm": 1.1529544591903687, "learning_rate": 2e-05, "loss": 0.01744421, "step": 23955 }, { "epoch": 47.912, "grad_norm": 1.3042271137237549, "learning_rate": 2e-05, "loss": 0.05292697, "step": 23956 }, { "epoch": 47.914, "grad_norm": 2.7201201915740967, "learning_rate": 2e-05, "loss": 0.05306742, "step": 23957 }, { "epoch": 47.916, "grad_norm": 0.916701078414917, "learning_rate": 2e-05, "loss": 0.03236423, "step": 23958 }, { "epoch": 47.918, "grad_norm": 0.8871091604232788, "learning_rate": 2e-05, "loss": 0.03117875, "step": 23959 }, { "epoch": 47.92, "grad_norm": 2.142869472503662, "learning_rate": 2e-05, "loss": 0.05827209, "step": 23960 }, { "epoch": 47.922, "grad_norm": 2.4643468856811523, "learning_rate": 2e-05, "loss": 0.04971191, "step": 23961 }, { "epoch": 47.924, "grad_norm": 0.8512781262397766, "learning_rate": 2e-05, "loss": 0.02833353, "step": 23962 }, { "epoch": 47.926, "grad_norm": 1.081555962562561, "learning_rate": 2e-05, "loss": 0.05200005, "step": 23963 }, { "epoch": 47.928, "grad_norm": 1.1040608882904053, "learning_rate": 2e-05, "loss": 0.02534536, "step": 23964 }, { "epoch": 47.93, "grad_norm": 1.0904732942581177, "learning_rate": 2e-05, "loss": 0.04215907, "step": 23965 }, { "epoch": 47.932, "grad_norm": 1.4276522397994995, "learning_rate": 2e-05, "loss": 0.04699912, "step": 23966 }, { "epoch": 47.934, "grad_norm": 1.1870702505111694, "learning_rate": 2e-05, "loss": 0.04519269, "step": 23967 }, { "epoch": 47.936, "grad_norm": 0.894260823726654, "learning_rate": 2e-05, "loss": 0.02578199, "step": 23968 }, { "epoch": 47.938, "grad_norm": 1.0634931325912476, "learning_rate": 2e-05, "loss": 0.03927767, "step": 23969 }, { "epoch": 47.94, "grad_norm": 1.3898274898529053, "learning_rate": 2e-05, "loss": 0.04579826, "step": 23970 }, { "epoch": 47.942, "grad_norm": 0.939034104347229, "learning_rate": 2e-05, "loss": 0.03640142, "step": 23971 }, { "epoch": 47.944, "grad_norm": 1.1093189716339111, "learning_rate": 2e-05, "loss": 0.04051351, "step": 23972 }, { "epoch": 47.946, "grad_norm": 1.3459272384643555, "learning_rate": 2e-05, "loss": 0.06513465, "step": 23973 }, { "epoch": 47.948, "grad_norm": 0.8493332862854004, "learning_rate": 2e-05, "loss": 0.02616568, "step": 23974 }, { "epoch": 47.95, "grad_norm": 1.2915202379226685, "learning_rate": 2e-05, "loss": 0.0405372, "step": 23975 }, { "epoch": 47.952, "grad_norm": 1.3592990636825562, "learning_rate": 2e-05, "loss": 0.04476197, "step": 23976 }, { "epoch": 47.954, "grad_norm": 1.0893676280975342, "learning_rate": 2e-05, "loss": 0.04218225, "step": 23977 }, { "epoch": 47.956, "grad_norm": 2.6969592571258545, "learning_rate": 2e-05, "loss": 0.04906473, "step": 23978 }, { "epoch": 47.958, "grad_norm": 1.1934820413589478, "learning_rate": 2e-05, "loss": 0.0492557, "step": 23979 }, { "epoch": 47.96, "grad_norm": 1.1213953495025635, "learning_rate": 2e-05, "loss": 0.04141422, "step": 23980 }, { "epoch": 47.962, "grad_norm": 1.6654391288757324, "learning_rate": 2e-05, "loss": 0.03252709, "step": 23981 }, { "epoch": 47.964, "grad_norm": 1.0095394849777222, "learning_rate": 2e-05, "loss": 0.03018342, "step": 23982 }, { "epoch": 47.966, "grad_norm": 8.535420417785645, "learning_rate": 2e-05, "loss": 0.04510078, "step": 23983 }, { "epoch": 47.968, "grad_norm": 1.198800802230835, "learning_rate": 2e-05, "loss": 0.04911582, "step": 23984 }, { "epoch": 47.97, "grad_norm": 1.0677434206008911, "learning_rate": 2e-05, "loss": 0.04196424, "step": 23985 }, { "epoch": 47.972, "grad_norm": 0.988368570804596, "learning_rate": 2e-05, "loss": 0.03747898, "step": 23986 }, { "epoch": 47.974, "grad_norm": 1.2724394798278809, "learning_rate": 2e-05, "loss": 0.04838841, "step": 23987 }, { "epoch": 47.976, "grad_norm": 1.0219782590866089, "learning_rate": 2e-05, "loss": 0.03595531, "step": 23988 }, { "epoch": 47.978, "grad_norm": 1.457436203956604, "learning_rate": 2e-05, "loss": 0.05869904, "step": 23989 }, { "epoch": 47.98, "grad_norm": 1.9029045104980469, "learning_rate": 2e-05, "loss": 0.05415683, "step": 23990 }, { "epoch": 47.982, "grad_norm": 1.030660629272461, "learning_rate": 2e-05, "loss": 0.03746786, "step": 23991 }, { "epoch": 47.984, "grad_norm": 2.008253574371338, "learning_rate": 2e-05, "loss": 0.06038243, "step": 23992 }, { "epoch": 47.986, "grad_norm": 0.9582119584083557, "learning_rate": 2e-05, "loss": 0.0431345, "step": 23993 }, { "epoch": 47.988, "grad_norm": 1.0303829908370972, "learning_rate": 2e-05, "loss": 0.0458141, "step": 23994 }, { "epoch": 47.99, "grad_norm": 1.966546893119812, "learning_rate": 2e-05, "loss": 0.05228059, "step": 23995 }, { "epoch": 47.992, "grad_norm": 2.835991144180298, "learning_rate": 2e-05, "loss": 0.0535783, "step": 23996 }, { "epoch": 47.994, "grad_norm": 1.101151704788208, "learning_rate": 2e-05, "loss": 0.04589385, "step": 23997 }, { "epoch": 47.996, "grad_norm": 1.0546307563781738, "learning_rate": 2e-05, "loss": 0.03454264, "step": 23998 }, { "epoch": 47.998, "grad_norm": 1.2539098262786865, "learning_rate": 2e-05, "loss": 0.04156297, "step": 23999 }, { "epoch": 48.0, "grad_norm": 0.9454109072685242, "learning_rate": 2e-05, "loss": 0.0316497, "step": 24000 }, { "epoch": 48.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9900199600798403, "Equal_1": 0.994, "Equal_2": 0.9840319361277445, "Equal_3": 0.9940119760479041, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.998003992015968, "Parallel_1": 0.9899799599198397, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.992, "Perpendicular_1": 0.998, "Perpendicular_2": 0.996, "Perpendicular_3": 0.9078156312625251, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 0.998, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9939879759519038, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 224.352, "eval_samples_per_second": 46.801, "eval_steps_per_second": 0.936, "step": 24000 }, { "epoch": 48.002, "grad_norm": 1.0585200786590576, "learning_rate": 2e-05, "loss": 0.04020933, "step": 24001 }, { "epoch": 48.004, "grad_norm": 2.4168953895568848, "learning_rate": 2e-05, "loss": 0.03944024, "step": 24002 }, { "epoch": 48.006, "grad_norm": 0.6975319385528564, "learning_rate": 2e-05, "loss": 0.01659875, "step": 24003 }, { "epoch": 48.008, "grad_norm": 1.1666628122329712, "learning_rate": 2e-05, "loss": 0.03890518, "step": 24004 }, { "epoch": 48.01, "grad_norm": 1.1534910202026367, "learning_rate": 2e-05, "loss": 0.03839009, "step": 24005 }, { "epoch": 48.012, "grad_norm": 0.9748977422714233, "learning_rate": 2e-05, "loss": 0.03321246, "step": 24006 }, { "epoch": 48.014, "grad_norm": 1.1734809875488281, "learning_rate": 2e-05, "loss": 0.05604229, "step": 24007 }, { "epoch": 48.016, "grad_norm": 1.1300811767578125, "learning_rate": 2e-05, "loss": 0.05113455, "step": 24008 }, { "epoch": 48.018, "grad_norm": 1.0682682991027832, "learning_rate": 2e-05, "loss": 0.03408433, "step": 24009 }, { "epoch": 48.02, "grad_norm": 2.8409557342529297, "learning_rate": 2e-05, "loss": 0.05322661, "step": 24010 }, { "epoch": 48.022, "grad_norm": 1.5013360977172852, "learning_rate": 2e-05, "loss": 0.05106499, "step": 24011 }, { "epoch": 48.024, "grad_norm": 1.9971903562545776, "learning_rate": 2e-05, "loss": 0.04126588, "step": 24012 }, { "epoch": 48.026, "grad_norm": 1.0400359630584717, "learning_rate": 2e-05, "loss": 0.03624366, "step": 24013 }, { "epoch": 48.028, "grad_norm": 1.306491732597351, "learning_rate": 2e-05, "loss": 0.05151456, "step": 24014 }, { "epoch": 48.03, "grad_norm": 0.9899024963378906, "learning_rate": 2e-05, "loss": 0.03324458, "step": 24015 }, { "epoch": 48.032, "grad_norm": 1.1859395503997803, "learning_rate": 2e-05, "loss": 0.04399846, "step": 24016 }, { "epoch": 48.034, "grad_norm": 1.0433427095413208, "learning_rate": 2e-05, "loss": 0.04381704, "step": 24017 }, { "epoch": 48.036, "grad_norm": 1.0957852602005005, "learning_rate": 2e-05, "loss": 0.04393532, "step": 24018 }, { "epoch": 48.038, "grad_norm": 1.0296865701675415, "learning_rate": 2e-05, "loss": 0.02916445, "step": 24019 }, { "epoch": 48.04, "grad_norm": 1.0275839567184448, "learning_rate": 2e-05, "loss": 0.04477209, "step": 24020 }, { "epoch": 48.042, "grad_norm": 1.1662989854812622, "learning_rate": 2e-05, "loss": 0.05734527, "step": 24021 }, { "epoch": 48.044, "grad_norm": 1.2139095067977905, "learning_rate": 2e-05, "loss": 0.04160431, "step": 24022 }, { "epoch": 48.046, "grad_norm": 1.6533719301223755, "learning_rate": 2e-05, "loss": 0.05310958, "step": 24023 }, { "epoch": 48.048, "grad_norm": 1.05130934715271, "learning_rate": 2e-05, "loss": 0.04054154, "step": 24024 }, { "epoch": 48.05, "grad_norm": 1.297194242477417, "learning_rate": 2e-05, "loss": 0.04639637, "step": 24025 }, { "epoch": 48.052, "grad_norm": 1.6856399774551392, "learning_rate": 2e-05, "loss": 0.04775684, "step": 24026 }, { "epoch": 48.054, "grad_norm": 1.1158477067947388, "learning_rate": 2e-05, "loss": 0.05016273, "step": 24027 }, { "epoch": 48.056, "grad_norm": 1.101130723953247, "learning_rate": 2e-05, "loss": 0.03918815, "step": 24028 }, { "epoch": 48.058, "grad_norm": 0.9099655747413635, "learning_rate": 2e-05, "loss": 0.03572182, "step": 24029 }, { "epoch": 48.06, "grad_norm": 2.1670444011688232, "learning_rate": 2e-05, "loss": 0.03503799, "step": 24030 }, { "epoch": 48.062, "grad_norm": 0.8665119409561157, "learning_rate": 2e-05, "loss": 0.02643954, "step": 24031 }, { "epoch": 48.064, "grad_norm": 1.048344373703003, "learning_rate": 2e-05, "loss": 0.03780452, "step": 24032 }, { "epoch": 48.066, "grad_norm": 1.5224450826644897, "learning_rate": 2e-05, "loss": 0.04857984, "step": 24033 }, { "epoch": 48.068, "grad_norm": 1.0510910749435425, "learning_rate": 2e-05, "loss": 0.04771367, "step": 24034 }, { "epoch": 48.07, "grad_norm": 1.2036699056625366, "learning_rate": 2e-05, "loss": 0.04861332, "step": 24035 }, { "epoch": 48.072, "grad_norm": 0.987252950668335, "learning_rate": 2e-05, "loss": 0.04107942, "step": 24036 }, { "epoch": 48.074, "grad_norm": 1.2903460264205933, "learning_rate": 2e-05, "loss": 0.03605067, "step": 24037 }, { "epoch": 48.076, "grad_norm": 1.0169252157211304, "learning_rate": 2e-05, "loss": 0.04046835, "step": 24038 }, { "epoch": 48.078, "grad_norm": 1.0413953065872192, "learning_rate": 2e-05, "loss": 0.04654605, "step": 24039 }, { "epoch": 48.08, "grad_norm": 1.3490595817565918, "learning_rate": 2e-05, "loss": 0.06496443, "step": 24040 }, { "epoch": 48.082, "grad_norm": 1.1491061449050903, "learning_rate": 2e-05, "loss": 0.03919538, "step": 24041 }, { "epoch": 48.084, "grad_norm": 1.2369226217269897, "learning_rate": 2e-05, "loss": 0.05105065, "step": 24042 }, { "epoch": 48.086, "grad_norm": 1.1918967962265015, "learning_rate": 2e-05, "loss": 0.04221391, "step": 24043 }, { "epoch": 48.088, "grad_norm": 1.3993308544158936, "learning_rate": 2e-05, "loss": 0.04376852, "step": 24044 }, { "epoch": 48.09, "grad_norm": 1.6145975589752197, "learning_rate": 2e-05, "loss": 0.04266158, "step": 24045 }, { "epoch": 48.092, "grad_norm": 1.088741660118103, "learning_rate": 2e-05, "loss": 0.05069655, "step": 24046 }, { "epoch": 48.094, "grad_norm": 1.6371619701385498, "learning_rate": 2e-05, "loss": 0.04426225, "step": 24047 }, { "epoch": 48.096, "grad_norm": 1.2286287546157837, "learning_rate": 2e-05, "loss": 0.05310811, "step": 24048 }, { "epoch": 48.098, "grad_norm": 1.150545597076416, "learning_rate": 2e-05, "loss": 0.05518402, "step": 24049 }, { "epoch": 48.1, "grad_norm": 1.6020482778549194, "learning_rate": 2e-05, "loss": 0.03997891, "step": 24050 }, { "epoch": 48.102, "grad_norm": 2.0762016773223877, "learning_rate": 2e-05, "loss": 0.04192542, "step": 24051 }, { "epoch": 48.104, "grad_norm": 1.1126676797866821, "learning_rate": 2e-05, "loss": 0.05102539, "step": 24052 }, { "epoch": 48.106, "grad_norm": 1.1435058116912842, "learning_rate": 2e-05, "loss": 0.04193898, "step": 24053 }, { "epoch": 48.108, "grad_norm": 1.0798828601837158, "learning_rate": 2e-05, "loss": 0.04235696, "step": 24054 }, { "epoch": 48.11, "grad_norm": 1.190716028213501, "learning_rate": 2e-05, "loss": 0.05027313, "step": 24055 }, { "epoch": 48.112, "grad_norm": 2.0464377403259277, "learning_rate": 2e-05, "loss": 0.02551176, "step": 24056 }, { "epoch": 48.114, "grad_norm": 3.0386154651641846, "learning_rate": 2e-05, "loss": 0.04518011, "step": 24057 }, { "epoch": 48.116, "grad_norm": 1.0603337287902832, "learning_rate": 2e-05, "loss": 0.04088687, "step": 24058 }, { "epoch": 48.118, "grad_norm": 0.804222583770752, "learning_rate": 2e-05, "loss": 0.02038973, "step": 24059 }, { "epoch": 48.12, "grad_norm": 0.9675710797309875, "learning_rate": 2e-05, "loss": 0.03757504, "step": 24060 }, { "epoch": 48.122, "grad_norm": 1.158774971961975, "learning_rate": 2e-05, "loss": 0.03330313, "step": 24061 }, { "epoch": 48.124, "grad_norm": 1.0910955667495728, "learning_rate": 2e-05, "loss": 0.03416525, "step": 24062 }, { "epoch": 48.126, "grad_norm": 1.1997908353805542, "learning_rate": 2e-05, "loss": 0.04531018, "step": 24063 }, { "epoch": 48.128, "grad_norm": 1.223515510559082, "learning_rate": 2e-05, "loss": 0.0548752, "step": 24064 }, { "epoch": 48.13, "grad_norm": 1.1613900661468506, "learning_rate": 2e-05, "loss": 0.04808629, "step": 24065 }, { "epoch": 48.132, "grad_norm": 1.110720157623291, "learning_rate": 2e-05, "loss": 0.05487282, "step": 24066 }, { "epoch": 48.134, "grad_norm": 1.3976448774337769, "learning_rate": 2e-05, "loss": 0.04755714, "step": 24067 }, { "epoch": 48.136, "grad_norm": 0.8356794714927673, "learning_rate": 2e-05, "loss": 0.03247761, "step": 24068 }, { "epoch": 48.138, "grad_norm": 1.5441828966140747, "learning_rate": 2e-05, "loss": 0.04403179, "step": 24069 }, { "epoch": 48.14, "grad_norm": 1.1112858057022095, "learning_rate": 2e-05, "loss": 0.05304226, "step": 24070 }, { "epoch": 48.142, "grad_norm": 1.0913811922073364, "learning_rate": 2e-05, "loss": 0.04805337, "step": 24071 }, { "epoch": 48.144, "grad_norm": 1.1755995750427246, "learning_rate": 2e-05, "loss": 0.04096168, "step": 24072 }, { "epoch": 48.146, "grad_norm": 1.06121027469635, "learning_rate": 2e-05, "loss": 0.04569378, "step": 24073 }, { "epoch": 48.148, "grad_norm": 1.4051716327667236, "learning_rate": 2e-05, "loss": 0.06508437, "step": 24074 }, { "epoch": 48.15, "grad_norm": 1.4219509363174438, "learning_rate": 2e-05, "loss": 0.0414541, "step": 24075 }, { "epoch": 48.152, "grad_norm": 1.0712103843688965, "learning_rate": 2e-05, "loss": 0.03994721, "step": 24076 }, { "epoch": 48.154, "grad_norm": 1.32268226146698, "learning_rate": 2e-05, "loss": 0.03737288, "step": 24077 }, { "epoch": 48.156, "grad_norm": 1.0748323202133179, "learning_rate": 2e-05, "loss": 0.04750399, "step": 24078 }, { "epoch": 48.158, "grad_norm": 0.9537906050682068, "learning_rate": 2e-05, "loss": 0.04052457, "step": 24079 }, { "epoch": 48.16, "grad_norm": 1.1049823760986328, "learning_rate": 2e-05, "loss": 0.05609886, "step": 24080 }, { "epoch": 48.162, "grad_norm": 1.1439776420593262, "learning_rate": 2e-05, "loss": 0.04357223, "step": 24081 }, { "epoch": 48.164, "grad_norm": 1.0251052379608154, "learning_rate": 2e-05, "loss": 0.03643806, "step": 24082 }, { "epoch": 48.166, "grad_norm": 1.2846314907073975, "learning_rate": 2e-05, "loss": 0.05837364, "step": 24083 }, { "epoch": 48.168, "grad_norm": 1.2705838680267334, "learning_rate": 2e-05, "loss": 0.0587346, "step": 24084 }, { "epoch": 48.17, "grad_norm": 1.9749541282653809, "learning_rate": 2e-05, "loss": 0.05542563, "step": 24085 }, { "epoch": 48.172, "grad_norm": 1.9431688785552979, "learning_rate": 2e-05, "loss": 0.05142669, "step": 24086 }, { "epoch": 48.174, "grad_norm": 1.1629698276519775, "learning_rate": 2e-05, "loss": 0.03979198, "step": 24087 }, { "epoch": 48.176, "grad_norm": 1.2735978364944458, "learning_rate": 2e-05, "loss": 0.04574917, "step": 24088 }, { "epoch": 48.178, "grad_norm": 1.1088979244232178, "learning_rate": 2e-05, "loss": 0.045339, "step": 24089 }, { "epoch": 48.18, "grad_norm": 1.2304723262786865, "learning_rate": 2e-05, "loss": 0.04985692, "step": 24090 }, { "epoch": 48.182, "grad_norm": 0.968632698059082, "learning_rate": 2e-05, "loss": 0.03351081, "step": 24091 }, { "epoch": 48.184, "grad_norm": 1.0809069871902466, "learning_rate": 2e-05, "loss": 0.04593817, "step": 24092 }, { "epoch": 48.186, "grad_norm": 1.183622121810913, "learning_rate": 2e-05, "loss": 0.0475479, "step": 24093 }, { "epoch": 48.188, "grad_norm": 1.8864127397537231, "learning_rate": 2e-05, "loss": 0.04077492, "step": 24094 }, { "epoch": 48.19, "grad_norm": 0.9362359046936035, "learning_rate": 2e-05, "loss": 0.04031546, "step": 24095 }, { "epoch": 48.192, "grad_norm": 0.8645823001861572, "learning_rate": 2e-05, "loss": 0.03510315, "step": 24096 }, { "epoch": 48.194, "grad_norm": 0.8909531235694885, "learning_rate": 2e-05, "loss": 0.02867949, "step": 24097 }, { "epoch": 48.196, "grad_norm": 2.071213483810425, "learning_rate": 2e-05, "loss": 0.04632636, "step": 24098 }, { "epoch": 48.198, "grad_norm": 2.092963457107544, "learning_rate": 2e-05, "loss": 0.04260443, "step": 24099 }, { "epoch": 48.2, "grad_norm": 1.4926190376281738, "learning_rate": 2e-05, "loss": 0.04886392, "step": 24100 }, { "epoch": 48.202, "grad_norm": 1.0165867805480957, "learning_rate": 2e-05, "loss": 0.03357581, "step": 24101 }, { "epoch": 48.204, "grad_norm": 1.137345314025879, "learning_rate": 2e-05, "loss": 0.0497193, "step": 24102 }, { "epoch": 48.206, "grad_norm": 1.497851848602295, "learning_rate": 2e-05, "loss": 0.06260847, "step": 24103 }, { "epoch": 48.208, "grad_norm": 2.548647165298462, "learning_rate": 2e-05, "loss": 0.04292821, "step": 24104 }, { "epoch": 48.21, "grad_norm": 1.1839015483856201, "learning_rate": 2e-05, "loss": 0.04555358, "step": 24105 }, { "epoch": 48.212, "grad_norm": 2.083338737487793, "learning_rate": 2e-05, "loss": 0.05853738, "step": 24106 }, { "epoch": 48.214, "grad_norm": 5.700353145599365, "learning_rate": 2e-05, "loss": 0.05707981, "step": 24107 }, { "epoch": 48.216, "grad_norm": 1.6352933645248413, "learning_rate": 2e-05, "loss": 0.04555116, "step": 24108 }, { "epoch": 48.218, "grad_norm": 1.1690996885299683, "learning_rate": 2e-05, "loss": 0.05107299, "step": 24109 }, { "epoch": 48.22, "grad_norm": 0.9500452876091003, "learning_rate": 2e-05, "loss": 0.0399535, "step": 24110 }, { "epoch": 48.222, "grad_norm": 1.215437412261963, "learning_rate": 2e-05, "loss": 0.06137428, "step": 24111 }, { "epoch": 48.224, "grad_norm": 1.2029011249542236, "learning_rate": 2e-05, "loss": 0.04559404, "step": 24112 }, { "epoch": 48.226, "grad_norm": 1.1522444486618042, "learning_rate": 2e-05, "loss": 0.03674344, "step": 24113 }, { "epoch": 48.228, "grad_norm": 0.9397953152656555, "learning_rate": 2e-05, "loss": 0.03908242, "step": 24114 }, { "epoch": 48.23, "grad_norm": 1.1449769735336304, "learning_rate": 2e-05, "loss": 0.0305871, "step": 24115 }, { "epoch": 48.232, "grad_norm": 1.007838487625122, "learning_rate": 2e-05, "loss": 0.04913601, "step": 24116 }, { "epoch": 48.234, "grad_norm": 1.1319141387939453, "learning_rate": 2e-05, "loss": 0.03840301, "step": 24117 }, { "epoch": 48.236, "grad_norm": 1.0957351922988892, "learning_rate": 2e-05, "loss": 0.04426713, "step": 24118 }, { "epoch": 48.238, "grad_norm": 1.2394685745239258, "learning_rate": 2e-05, "loss": 0.04357488, "step": 24119 }, { "epoch": 48.24, "grad_norm": 1.0485479831695557, "learning_rate": 2e-05, "loss": 0.04554567, "step": 24120 }, { "epoch": 48.242, "grad_norm": 1.299285650253296, "learning_rate": 2e-05, "loss": 0.05235005, "step": 24121 }, { "epoch": 48.244, "grad_norm": 1.0098495483398438, "learning_rate": 2e-05, "loss": 0.03697013, "step": 24122 }, { "epoch": 48.246, "grad_norm": 1.1185497045516968, "learning_rate": 2e-05, "loss": 0.05296638, "step": 24123 }, { "epoch": 48.248, "grad_norm": 1.1930783987045288, "learning_rate": 2e-05, "loss": 0.04171202, "step": 24124 }, { "epoch": 48.25, "grad_norm": 1.351486325263977, "learning_rate": 2e-05, "loss": 0.04053985, "step": 24125 }, { "epoch": 48.252, "grad_norm": 1.1878384351730347, "learning_rate": 2e-05, "loss": 0.04597762, "step": 24126 }, { "epoch": 48.254, "grad_norm": 1.2218701839447021, "learning_rate": 2e-05, "loss": 0.04417387, "step": 24127 }, { "epoch": 48.256, "grad_norm": 1.373600721359253, "learning_rate": 2e-05, "loss": 0.0506381, "step": 24128 }, { "epoch": 48.258, "grad_norm": 1.180119514465332, "learning_rate": 2e-05, "loss": 0.05635491, "step": 24129 }, { "epoch": 48.26, "grad_norm": 1.3610702753067017, "learning_rate": 2e-05, "loss": 0.05045182, "step": 24130 }, { "epoch": 48.262, "grad_norm": 1.1098660230636597, "learning_rate": 2e-05, "loss": 0.03767511, "step": 24131 }, { "epoch": 48.264, "grad_norm": 1.0122102499008179, "learning_rate": 2e-05, "loss": 0.0440858, "step": 24132 }, { "epoch": 48.266, "grad_norm": 0.9213972687721252, "learning_rate": 2e-05, "loss": 0.04129899, "step": 24133 }, { "epoch": 48.268, "grad_norm": 2.2475976943969727, "learning_rate": 2e-05, "loss": 0.04196858, "step": 24134 }, { "epoch": 48.27, "grad_norm": 2.2175891399383545, "learning_rate": 2e-05, "loss": 0.0501932, "step": 24135 }, { "epoch": 48.272, "grad_norm": 1.0081956386566162, "learning_rate": 2e-05, "loss": 0.03680322, "step": 24136 }, { "epoch": 48.274, "grad_norm": 1.3544892072677612, "learning_rate": 2e-05, "loss": 0.03294618, "step": 24137 }, { "epoch": 48.276, "grad_norm": 1.0623961687088013, "learning_rate": 2e-05, "loss": 0.04522949, "step": 24138 }, { "epoch": 48.278, "grad_norm": 1.520250916481018, "learning_rate": 2e-05, "loss": 0.04707646, "step": 24139 }, { "epoch": 48.28, "grad_norm": 1.0357815027236938, "learning_rate": 2e-05, "loss": 0.04026045, "step": 24140 }, { "epoch": 48.282, "grad_norm": 1.9504812955856323, "learning_rate": 2e-05, "loss": 0.05459105, "step": 24141 }, { "epoch": 48.284, "grad_norm": 1.1651318073272705, "learning_rate": 2e-05, "loss": 0.05374034, "step": 24142 }, { "epoch": 48.286, "grad_norm": 1.7718772888183594, "learning_rate": 2e-05, "loss": 0.04945064, "step": 24143 }, { "epoch": 48.288, "grad_norm": 1.8275434970855713, "learning_rate": 2e-05, "loss": 0.05883159, "step": 24144 }, { "epoch": 48.29, "grad_norm": 1.6279141902923584, "learning_rate": 2e-05, "loss": 0.05613036, "step": 24145 }, { "epoch": 48.292, "grad_norm": 1.3604873418807983, "learning_rate": 2e-05, "loss": 0.03654074, "step": 24146 }, { "epoch": 48.294, "grad_norm": 1.2422846555709839, "learning_rate": 2e-05, "loss": 0.05106914, "step": 24147 }, { "epoch": 48.296, "grad_norm": 1.211463212966919, "learning_rate": 2e-05, "loss": 0.04813382, "step": 24148 }, { "epoch": 48.298, "grad_norm": 2.1813290119171143, "learning_rate": 2e-05, "loss": 0.04043988, "step": 24149 }, { "epoch": 48.3, "grad_norm": 0.9801517724990845, "learning_rate": 2e-05, "loss": 0.03209711, "step": 24150 }, { "epoch": 48.302, "grad_norm": 1.0106456279754639, "learning_rate": 2e-05, "loss": 0.04242552, "step": 24151 }, { "epoch": 48.304, "grad_norm": 1.4082107543945312, "learning_rate": 2e-05, "loss": 0.05142638, "step": 24152 }, { "epoch": 48.306, "grad_norm": 0.9951173663139343, "learning_rate": 2e-05, "loss": 0.03238222, "step": 24153 }, { "epoch": 48.308, "grad_norm": 0.9427967071533203, "learning_rate": 2e-05, "loss": 0.03944872, "step": 24154 }, { "epoch": 48.31, "grad_norm": 1.2679033279418945, "learning_rate": 2e-05, "loss": 0.03209253, "step": 24155 }, { "epoch": 48.312, "grad_norm": 1.0873744487762451, "learning_rate": 2e-05, "loss": 0.05010971, "step": 24156 }, { "epoch": 48.314, "grad_norm": 0.9255422353744507, "learning_rate": 2e-05, "loss": 0.02904349, "step": 24157 }, { "epoch": 48.316, "grad_norm": 1.2403888702392578, "learning_rate": 2e-05, "loss": 0.04734727, "step": 24158 }, { "epoch": 48.318, "grad_norm": 1.1870068311691284, "learning_rate": 2e-05, "loss": 0.05224151, "step": 24159 }, { "epoch": 48.32, "grad_norm": 1.175885796546936, "learning_rate": 2e-05, "loss": 0.06514286, "step": 24160 }, { "epoch": 48.322, "grad_norm": 1.0870989561080933, "learning_rate": 2e-05, "loss": 0.03403718, "step": 24161 }, { "epoch": 48.324, "grad_norm": 1.438126802444458, "learning_rate": 2e-05, "loss": 0.044659, "step": 24162 }, { "epoch": 48.326, "grad_norm": 1.0415043830871582, "learning_rate": 2e-05, "loss": 0.04579143, "step": 24163 }, { "epoch": 48.328, "grad_norm": 1.2841826677322388, "learning_rate": 2e-05, "loss": 0.0525934, "step": 24164 }, { "epoch": 48.33, "grad_norm": 1.0320045948028564, "learning_rate": 2e-05, "loss": 0.0498873, "step": 24165 }, { "epoch": 48.332, "grad_norm": 1.594804048538208, "learning_rate": 2e-05, "loss": 0.05714633, "step": 24166 }, { "epoch": 48.334, "grad_norm": 1.3862698078155518, "learning_rate": 2e-05, "loss": 0.05453505, "step": 24167 }, { "epoch": 48.336, "grad_norm": 0.9656899571418762, "learning_rate": 2e-05, "loss": 0.03555472, "step": 24168 }, { "epoch": 48.338, "grad_norm": 1.1010653972625732, "learning_rate": 2e-05, "loss": 0.03735081, "step": 24169 }, { "epoch": 48.34, "grad_norm": 0.8670578002929688, "learning_rate": 2e-05, "loss": 0.0351905, "step": 24170 }, { "epoch": 48.342, "grad_norm": 2.3454651832580566, "learning_rate": 2e-05, "loss": 0.06331472, "step": 24171 }, { "epoch": 48.344, "grad_norm": 0.8364679217338562, "learning_rate": 2e-05, "loss": 0.02702582, "step": 24172 }, { "epoch": 48.346, "grad_norm": 1.2493551969528198, "learning_rate": 2e-05, "loss": 0.04526371, "step": 24173 }, { "epoch": 48.348, "grad_norm": 0.9824249148368835, "learning_rate": 2e-05, "loss": 0.04113188, "step": 24174 }, { "epoch": 48.35, "grad_norm": 1.0546280145645142, "learning_rate": 2e-05, "loss": 0.04030831, "step": 24175 }, { "epoch": 48.352, "grad_norm": 1.2266684770584106, "learning_rate": 2e-05, "loss": 0.05371603, "step": 24176 }, { "epoch": 48.354, "grad_norm": 0.8720648288726807, "learning_rate": 2e-05, "loss": 0.0294428, "step": 24177 }, { "epoch": 48.356, "grad_norm": 1.0267912149429321, "learning_rate": 2e-05, "loss": 0.03346961, "step": 24178 }, { "epoch": 48.358, "grad_norm": 0.8224252462387085, "learning_rate": 2e-05, "loss": 0.03244222, "step": 24179 }, { "epoch": 48.36, "grad_norm": 1.0877106189727783, "learning_rate": 2e-05, "loss": 0.0302844, "step": 24180 }, { "epoch": 48.362, "grad_norm": 1.4461932182312012, "learning_rate": 2e-05, "loss": 0.06300613, "step": 24181 }, { "epoch": 48.364, "grad_norm": 1.026535987854004, "learning_rate": 2e-05, "loss": 0.03884218, "step": 24182 }, { "epoch": 48.366, "grad_norm": 1.0927294492721558, "learning_rate": 2e-05, "loss": 0.04676462, "step": 24183 }, { "epoch": 48.368, "grad_norm": 1.0597631931304932, "learning_rate": 2e-05, "loss": 0.04610288, "step": 24184 }, { "epoch": 48.37, "grad_norm": 1.020358920097351, "learning_rate": 2e-05, "loss": 0.0463963, "step": 24185 }, { "epoch": 48.372, "grad_norm": 1.218424916267395, "learning_rate": 2e-05, "loss": 0.05218824, "step": 24186 }, { "epoch": 48.374, "grad_norm": 0.9043522477149963, "learning_rate": 2e-05, "loss": 0.02462957, "step": 24187 }, { "epoch": 48.376, "grad_norm": 1.4082074165344238, "learning_rate": 2e-05, "loss": 0.04175066, "step": 24188 }, { "epoch": 48.378, "grad_norm": 1.5257251262664795, "learning_rate": 2e-05, "loss": 0.06049989, "step": 24189 }, { "epoch": 48.38, "grad_norm": 0.8296879529953003, "learning_rate": 2e-05, "loss": 0.03199176, "step": 24190 }, { "epoch": 48.382, "grad_norm": 1.1817295551300049, "learning_rate": 2e-05, "loss": 0.03712118, "step": 24191 }, { "epoch": 48.384, "grad_norm": 1.0457736253738403, "learning_rate": 2e-05, "loss": 0.04271453, "step": 24192 }, { "epoch": 48.386, "grad_norm": 1.0363644361495972, "learning_rate": 2e-05, "loss": 0.04200714, "step": 24193 }, { "epoch": 48.388, "grad_norm": 1.714218020439148, "learning_rate": 2e-05, "loss": 0.04391317, "step": 24194 }, { "epoch": 48.39, "grad_norm": 5.153040409088135, "learning_rate": 2e-05, "loss": 0.05754845, "step": 24195 }, { "epoch": 48.392, "grad_norm": 1.0365186929702759, "learning_rate": 2e-05, "loss": 0.0471337, "step": 24196 }, { "epoch": 48.394, "grad_norm": 1.2358766794204712, "learning_rate": 2e-05, "loss": 0.04129286, "step": 24197 }, { "epoch": 48.396, "grad_norm": 1.2240992784500122, "learning_rate": 2e-05, "loss": 0.04827642, "step": 24198 }, { "epoch": 48.398, "grad_norm": 1.7518669366836548, "learning_rate": 2e-05, "loss": 0.04430645, "step": 24199 }, { "epoch": 48.4, "grad_norm": 1.0152863264083862, "learning_rate": 2e-05, "loss": 0.03324828, "step": 24200 }, { "epoch": 48.402, "grad_norm": 1.0316367149353027, "learning_rate": 2e-05, "loss": 0.04925451, "step": 24201 }, { "epoch": 48.404, "grad_norm": 1.0148727893829346, "learning_rate": 2e-05, "loss": 0.0459566, "step": 24202 }, { "epoch": 48.406, "grad_norm": 1.6290829181671143, "learning_rate": 2e-05, "loss": 0.04747181, "step": 24203 }, { "epoch": 48.408, "grad_norm": 1.2166723012924194, "learning_rate": 2e-05, "loss": 0.05170629, "step": 24204 }, { "epoch": 48.41, "grad_norm": 0.7935415506362915, "learning_rate": 2e-05, "loss": 0.02572259, "step": 24205 }, { "epoch": 48.412, "grad_norm": 1.1070631742477417, "learning_rate": 2e-05, "loss": 0.03391981, "step": 24206 }, { "epoch": 48.414, "grad_norm": 0.8133532404899597, "learning_rate": 2e-05, "loss": 0.02641581, "step": 24207 }, { "epoch": 48.416, "grad_norm": 1.996878743171692, "learning_rate": 2e-05, "loss": 0.04484842, "step": 24208 }, { "epoch": 48.418, "grad_norm": 1.1942048072814941, "learning_rate": 2e-05, "loss": 0.05959399, "step": 24209 }, { "epoch": 48.42, "grad_norm": 0.8880040645599365, "learning_rate": 2e-05, "loss": 0.02957754, "step": 24210 }, { "epoch": 48.422, "grad_norm": 1.1661767959594727, "learning_rate": 2e-05, "loss": 0.04623494, "step": 24211 }, { "epoch": 48.424, "grad_norm": 0.9690030813217163, "learning_rate": 2e-05, "loss": 0.03455418, "step": 24212 }, { "epoch": 48.426, "grad_norm": 1.366040825843811, "learning_rate": 2e-05, "loss": 0.04644987, "step": 24213 }, { "epoch": 48.428, "grad_norm": 1.000235676765442, "learning_rate": 2e-05, "loss": 0.04174297, "step": 24214 }, { "epoch": 48.43, "grad_norm": 0.7845618724822998, "learning_rate": 2e-05, "loss": 0.02701226, "step": 24215 }, { "epoch": 48.432, "grad_norm": 1.18039071559906, "learning_rate": 2e-05, "loss": 0.04743581, "step": 24216 }, { "epoch": 48.434, "grad_norm": 1.6625279188156128, "learning_rate": 2e-05, "loss": 0.0576622, "step": 24217 }, { "epoch": 48.436, "grad_norm": 1.3956663608551025, "learning_rate": 2e-05, "loss": 0.05304787, "step": 24218 }, { "epoch": 48.438, "grad_norm": 1.4112004041671753, "learning_rate": 2e-05, "loss": 0.03804924, "step": 24219 }, { "epoch": 48.44, "grad_norm": 1.9366042613983154, "learning_rate": 2e-05, "loss": 0.04109778, "step": 24220 }, { "epoch": 48.442, "grad_norm": 0.9154440760612488, "learning_rate": 2e-05, "loss": 0.03123736, "step": 24221 }, { "epoch": 48.444, "grad_norm": 0.9950308799743652, "learning_rate": 2e-05, "loss": 0.03339973, "step": 24222 }, { "epoch": 48.446, "grad_norm": 1.0484952926635742, "learning_rate": 2e-05, "loss": 0.04734972, "step": 24223 }, { "epoch": 48.448, "grad_norm": 1.1667615175247192, "learning_rate": 2e-05, "loss": 0.03506687, "step": 24224 }, { "epoch": 48.45, "grad_norm": 2.051151752471924, "learning_rate": 2e-05, "loss": 0.05529873, "step": 24225 }, { "epoch": 48.452, "grad_norm": 0.9868109822273254, "learning_rate": 2e-05, "loss": 0.0372206, "step": 24226 }, { "epoch": 48.454, "grad_norm": 1.6365501880645752, "learning_rate": 2e-05, "loss": 0.05232105, "step": 24227 }, { "epoch": 48.456, "grad_norm": 0.9278736114501953, "learning_rate": 2e-05, "loss": 0.04580279, "step": 24228 }, { "epoch": 48.458, "grad_norm": 0.8635781407356262, "learning_rate": 2e-05, "loss": 0.03651277, "step": 24229 }, { "epoch": 48.46, "grad_norm": 0.8887329697608948, "learning_rate": 2e-05, "loss": 0.0302187, "step": 24230 }, { "epoch": 48.462, "grad_norm": 0.932221531867981, "learning_rate": 2e-05, "loss": 0.02393607, "step": 24231 }, { "epoch": 48.464, "grad_norm": 1.0179107189178467, "learning_rate": 2e-05, "loss": 0.03262734, "step": 24232 }, { "epoch": 48.466, "grad_norm": 1.1795079708099365, "learning_rate": 2e-05, "loss": 0.03840907, "step": 24233 }, { "epoch": 48.468, "grad_norm": 1.1912506818771362, "learning_rate": 2e-05, "loss": 0.05022033, "step": 24234 }, { "epoch": 48.47, "grad_norm": 1.3158849477767944, "learning_rate": 2e-05, "loss": 0.03381804, "step": 24235 }, { "epoch": 48.472, "grad_norm": 1.073093056678772, "learning_rate": 2e-05, "loss": 0.04341817, "step": 24236 }, { "epoch": 48.474, "grad_norm": 1.0367578268051147, "learning_rate": 2e-05, "loss": 0.035133, "step": 24237 }, { "epoch": 48.476, "grad_norm": 0.8915002346038818, "learning_rate": 2e-05, "loss": 0.03181299, "step": 24238 }, { "epoch": 48.478, "grad_norm": 1.0240304470062256, "learning_rate": 2e-05, "loss": 0.04212492, "step": 24239 }, { "epoch": 48.48, "grad_norm": 0.9523401260375977, "learning_rate": 2e-05, "loss": 0.04073003, "step": 24240 }, { "epoch": 48.482, "grad_norm": 2.9246506690979004, "learning_rate": 2e-05, "loss": 0.05362456, "step": 24241 }, { "epoch": 48.484, "grad_norm": 1.1026496887207031, "learning_rate": 2e-05, "loss": 0.05468607, "step": 24242 }, { "epoch": 48.486, "grad_norm": 1.0787307024002075, "learning_rate": 2e-05, "loss": 0.04684483, "step": 24243 }, { "epoch": 48.488, "grad_norm": 2.2853903770446777, "learning_rate": 2e-05, "loss": 0.03229876, "step": 24244 }, { "epoch": 48.49, "grad_norm": 1.1742076873779297, "learning_rate": 2e-05, "loss": 0.04197202, "step": 24245 }, { "epoch": 48.492, "grad_norm": 1.32828688621521, "learning_rate": 2e-05, "loss": 0.04950122, "step": 24246 }, { "epoch": 48.494, "grad_norm": 0.8825669288635254, "learning_rate": 2e-05, "loss": 0.04022647, "step": 24247 }, { "epoch": 48.496, "grad_norm": 0.6225824356079102, "learning_rate": 2e-05, "loss": 0.01386083, "step": 24248 }, { "epoch": 48.498, "grad_norm": 1.0200331211090088, "learning_rate": 2e-05, "loss": 0.03335999, "step": 24249 }, { "epoch": 48.5, "grad_norm": 1.264811635017395, "learning_rate": 2e-05, "loss": 0.05002368, "step": 24250 }, { "epoch": 48.502, "grad_norm": 1.1186511516571045, "learning_rate": 2e-05, "loss": 0.0462511, "step": 24251 }, { "epoch": 48.504, "grad_norm": 1.016188621520996, "learning_rate": 2e-05, "loss": 0.0427054, "step": 24252 }, { "epoch": 48.506, "grad_norm": 0.920356273651123, "learning_rate": 2e-05, "loss": 0.04023985, "step": 24253 }, { "epoch": 48.508, "grad_norm": 1.0661952495574951, "learning_rate": 2e-05, "loss": 0.03820534, "step": 24254 }, { "epoch": 48.51, "grad_norm": 1.0455533266067505, "learning_rate": 2e-05, "loss": 0.03811349, "step": 24255 }, { "epoch": 48.512, "grad_norm": 1.0246407985687256, "learning_rate": 2e-05, "loss": 0.04569801, "step": 24256 }, { "epoch": 48.514, "grad_norm": 1.0143934488296509, "learning_rate": 2e-05, "loss": 0.04163357, "step": 24257 }, { "epoch": 48.516, "grad_norm": 1.1070332527160645, "learning_rate": 2e-05, "loss": 0.05052846, "step": 24258 }, { "epoch": 48.518, "grad_norm": 1.4934407472610474, "learning_rate": 2e-05, "loss": 0.04815038, "step": 24259 }, { "epoch": 48.52, "grad_norm": 1.0891765356063843, "learning_rate": 2e-05, "loss": 0.03874873, "step": 24260 }, { "epoch": 48.522, "grad_norm": 1.2481971979141235, "learning_rate": 2e-05, "loss": 0.04874709, "step": 24261 }, { "epoch": 48.524, "grad_norm": 1.043181300163269, "learning_rate": 2e-05, "loss": 0.0442415, "step": 24262 }, { "epoch": 48.526, "grad_norm": 1.027824878692627, "learning_rate": 2e-05, "loss": 0.03014851, "step": 24263 }, { "epoch": 48.528, "grad_norm": 1.2298223972320557, "learning_rate": 2e-05, "loss": 0.05911855, "step": 24264 }, { "epoch": 48.53, "grad_norm": 1.0007586479187012, "learning_rate": 2e-05, "loss": 0.0386268, "step": 24265 }, { "epoch": 48.532, "grad_norm": 1.9829974174499512, "learning_rate": 2e-05, "loss": 0.03946251, "step": 24266 }, { "epoch": 48.534, "grad_norm": 1.0536763668060303, "learning_rate": 2e-05, "loss": 0.04384655, "step": 24267 }, { "epoch": 48.536, "grad_norm": 1.1048747301101685, "learning_rate": 2e-05, "loss": 0.04697331, "step": 24268 }, { "epoch": 48.538, "grad_norm": 1.25386643409729, "learning_rate": 2e-05, "loss": 0.05482823, "step": 24269 }, { "epoch": 48.54, "grad_norm": 1.1054425239562988, "learning_rate": 2e-05, "loss": 0.05671865, "step": 24270 }, { "epoch": 48.542, "grad_norm": 1.387277364730835, "learning_rate": 2e-05, "loss": 0.05447429, "step": 24271 }, { "epoch": 48.544, "grad_norm": 0.9112948775291443, "learning_rate": 2e-05, "loss": 0.03461436, "step": 24272 }, { "epoch": 48.546, "grad_norm": 1.212533712387085, "learning_rate": 2e-05, "loss": 0.05425304, "step": 24273 }, { "epoch": 48.548, "grad_norm": 1.1351462602615356, "learning_rate": 2e-05, "loss": 0.03608508, "step": 24274 }, { "epoch": 48.55, "grad_norm": 1.75313401222229, "learning_rate": 2e-05, "loss": 0.05220794, "step": 24275 }, { "epoch": 48.552, "grad_norm": 0.9481651782989502, "learning_rate": 2e-05, "loss": 0.0283526, "step": 24276 }, { "epoch": 48.554, "grad_norm": 1.0610271692276, "learning_rate": 2e-05, "loss": 0.04264805, "step": 24277 }, { "epoch": 48.556, "grad_norm": 2.3532328605651855, "learning_rate": 2e-05, "loss": 0.03620804, "step": 24278 }, { "epoch": 48.558, "grad_norm": 0.9489250183105469, "learning_rate": 2e-05, "loss": 0.03719745, "step": 24279 }, { "epoch": 48.56, "grad_norm": 1.1748366355895996, "learning_rate": 2e-05, "loss": 0.04677079, "step": 24280 }, { "epoch": 48.562, "grad_norm": 1.1246399879455566, "learning_rate": 2e-05, "loss": 0.04476772, "step": 24281 }, { "epoch": 48.564, "grad_norm": 1.8000998497009277, "learning_rate": 2e-05, "loss": 0.04949294, "step": 24282 }, { "epoch": 48.566, "grad_norm": 1.1726521253585815, "learning_rate": 2e-05, "loss": 0.0357168, "step": 24283 }, { "epoch": 48.568, "grad_norm": 1.0030269622802734, "learning_rate": 2e-05, "loss": 0.03976782, "step": 24284 }, { "epoch": 48.57, "grad_norm": 0.9358143210411072, "learning_rate": 2e-05, "loss": 0.03555932, "step": 24285 }, { "epoch": 48.572, "grad_norm": 1.801404356956482, "learning_rate": 2e-05, "loss": 0.04939446, "step": 24286 }, { "epoch": 48.574, "grad_norm": 0.946178674697876, "learning_rate": 2e-05, "loss": 0.03214157, "step": 24287 }, { "epoch": 48.576, "grad_norm": 1.096544623374939, "learning_rate": 2e-05, "loss": 0.03449382, "step": 24288 }, { "epoch": 48.578, "grad_norm": 1.3711516857147217, "learning_rate": 2e-05, "loss": 0.06276315, "step": 24289 }, { "epoch": 48.58, "grad_norm": 3.3949224948883057, "learning_rate": 2e-05, "loss": 0.05145295, "step": 24290 }, { "epoch": 48.582, "grad_norm": 0.9263776540756226, "learning_rate": 2e-05, "loss": 0.03830439, "step": 24291 }, { "epoch": 48.584, "grad_norm": 0.9900041222572327, "learning_rate": 2e-05, "loss": 0.04187006, "step": 24292 }, { "epoch": 48.586, "grad_norm": 1.1013092994689941, "learning_rate": 2e-05, "loss": 0.04096479, "step": 24293 }, { "epoch": 48.588, "grad_norm": 1.4741755723953247, "learning_rate": 2e-05, "loss": 0.05423959, "step": 24294 }, { "epoch": 48.59, "grad_norm": 1.6562232971191406, "learning_rate": 2e-05, "loss": 0.05241001, "step": 24295 }, { "epoch": 48.592, "grad_norm": 1.218342661857605, "learning_rate": 2e-05, "loss": 0.05349832, "step": 24296 }, { "epoch": 48.594, "grad_norm": 0.9022951722145081, "learning_rate": 2e-05, "loss": 0.0272672, "step": 24297 }, { "epoch": 48.596, "grad_norm": 1.0279433727264404, "learning_rate": 2e-05, "loss": 0.04580365, "step": 24298 }, { "epoch": 48.598, "grad_norm": 1.0660715103149414, "learning_rate": 2e-05, "loss": 0.04874376, "step": 24299 }, { "epoch": 48.6, "grad_norm": 1.0055099725723267, "learning_rate": 2e-05, "loss": 0.03486983, "step": 24300 }, { "epoch": 48.602, "grad_norm": 1.0519543886184692, "learning_rate": 2e-05, "loss": 0.03375277, "step": 24301 }, { "epoch": 48.604, "grad_norm": 1.289353370666504, "learning_rate": 2e-05, "loss": 0.05148524, "step": 24302 }, { "epoch": 48.606, "grad_norm": 1.653365135192871, "learning_rate": 2e-05, "loss": 0.04670177, "step": 24303 }, { "epoch": 48.608, "grad_norm": 1.6327040195465088, "learning_rate": 2e-05, "loss": 0.05498214, "step": 24304 }, { "epoch": 48.61, "grad_norm": 1.2024866342544556, "learning_rate": 2e-05, "loss": 0.04639989, "step": 24305 }, { "epoch": 48.612, "grad_norm": 2.8367912769317627, "learning_rate": 2e-05, "loss": 0.05120923, "step": 24306 }, { "epoch": 48.614, "grad_norm": 1.5392112731933594, "learning_rate": 2e-05, "loss": 0.05257453, "step": 24307 }, { "epoch": 48.616, "grad_norm": 1.0724151134490967, "learning_rate": 2e-05, "loss": 0.04344625, "step": 24308 }, { "epoch": 48.618, "grad_norm": 1.2043347358703613, "learning_rate": 2e-05, "loss": 0.05567119, "step": 24309 }, { "epoch": 48.62, "grad_norm": 0.9698638916015625, "learning_rate": 2e-05, "loss": 0.02439217, "step": 24310 }, { "epoch": 48.622, "grad_norm": 1.0780527591705322, "learning_rate": 2e-05, "loss": 0.0422633, "step": 24311 }, { "epoch": 48.624, "grad_norm": 1.0314886569976807, "learning_rate": 2e-05, "loss": 0.04491421, "step": 24312 }, { "epoch": 48.626, "grad_norm": 1.0601820945739746, "learning_rate": 2e-05, "loss": 0.03747461, "step": 24313 }, { "epoch": 48.628, "grad_norm": 1.0729905366897583, "learning_rate": 2e-05, "loss": 0.04422288, "step": 24314 }, { "epoch": 48.63, "grad_norm": 1.1984076499938965, "learning_rate": 2e-05, "loss": 0.03403098, "step": 24315 }, { "epoch": 48.632, "grad_norm": 1.9422006607055664, "learning_rate": 2e-05, "loss": 0.06337364, "step": 24316 }, { "epoch": 48.634, "grad_norm": 1.362138271331787, "learning_rate": 2e-05, "loss": 0.03771912, "step": 24317 }, { "epoch": 48.636, "grad_norm": 3.020495891571045, "learning_rate": 2e-05, "loss": 0.0747146, "step": 24318 }, { "epoch": 48.638, "grad_norm": 1.218593955039978, "learning_rate": 2e-05, "loss": 0.05234303, "step": 24319 }, { "epoch": 48.64, "grad_norm": 1.0708924531936646, "learning_rate": 2e-05, "loss": 0.04512764, "step": 24320 }, { "epoch": 48.642, "grad_norm": 2.5593295097351074, "learning_rate": 2e-05, "loss": 0.04091706, "step": 24321 }, { "epoch": 48.644, "grad_norm": 1.0374568700790405, "learning_rate": 2e-05, "loss": 0.04367303, "step": 24322 }, { "epoch": 48.646, "grad_norm": 1.5019183158874512, "learning_rate": 2e-05, "loss": 0.04535907, "step": 24323 }, { "epoch": 48.648, "grad_norm": 1.5762364864349365, "learning_rate": 2e-05, "loss": 0.036416, "step": 24324 }, { "epoch": 48.65, "grad_norm": 1.6011186838150024, "learning_rate": 2e-05, "loss": 0.04643697, "step": 24325 }, { "epoch": 48.652, "grad_norm": 0.9353030920028687, "learning_rate": 2e-05, "loss": 0.03492321, "step": 24326 }, { "epoch": 48.654, "grad_norm": 1.0807359218597412, "learning_rate": 2e-05, "loss": 0.04072664, "step": 24327 }, { "epoch": 48.656, "grad_norm": 1.6334316730499268, "learning_rate": 2e-05, "loss": 0.04389174, "step": 24328 }, { "epoch": 48.658, "grad_norm": 1.414925456047058, "learning_rate": 2e-05, "loss": 0.04218788, "step": 24329 }, { "epoch": 48.66, "grad_norm": 0.9829714298248291, "learning_rate": 2e-05, "loss": 0.04158359, "step": 24330 }, { "epoch": 48.662, "grad_norm": 1.0123839378356934, "learning_rate": 2e-05, "loss": 0.0368352, "step": 24331 }, { "epoch": 48.664, "grad_norm": 0.900935709476471, "learning_rate": 2e-05, "loss": 0.03761084, "step": 24332 }, { "epoch": 48.666, "grad_norm": 1.1026766300201416, "learning_rate": 2e-05, "loss": 0.04306389, "step": 24333 }, { "epoch": 48.668, "grad_norm": 1.1017385721206665, "learning_rate": 2e-05, "loss": 0.04529566, "step": 24334 }, { "epoch": 48.67, "grad_norm": 1.05580735206604, "learning_rate": 2e-05, "loss": 0.04102797, "step": 24335 }, { "epoch": 48.672, "grad_norm": 1.358658790588379, "learning_rate": 2e-05, "loss": 0.0621775, "step": 24336 }, { "epoch": 48.674, "grad_norm": 1.2008211612701416, "learning_rate": 2e-05, "loss": 0.04370626, "step": 24337 }, { "epoch": 48.676, "grad_norm": 1.593617558479309, "learning_rate": 2e-05, "loss": 0.05979547, "step": 24338 }, { "epoch": 48.678, "grad_norm": 1.2104921340942383, "learning_rate": 2e-05, "loss": 0.03068832, "step": 24339 }, { "epoch": 48.68, "grad_norm": 2.1031973361968994, "learning_rate": 2e-05, "loss": 0.07146349, "step": 24340 }, { "epoch": 48.682, "grad_norm": 2.78953218460083, "learning_rate": 2e-05, "loss": 0.06436908, "step": 24341 }, { "epoch": 48.684, "grad_norm": 0.9238982796669006, "learning_rate": 2e-05, "loss": 0.03925744, "step": 24342 }, { "epoch": 48.686, "grad_norm": 1.4998198747634888, "learning_rate": 2e-05, "loss": 0.04919617, "step": 24343 }, { "epoch": 48.688, "grad_norm": 1.2886649370193481, "learning_rate": 2e-05, "loss": 0.03802135, "step": 24344 }, { "epoch": 48.69, "grad_norm": 0.9078302979469299, "learning_rate": 2e-05, "loss": 0.03166285, "step": 24345 }, { "epoch": 48.692, "grad_norm": 1.0930813550949097, "learning_rate": 2e-05, "loss": 0.04575538, "step": 24346 }, { "epoch": 48.694, "grad_norm": 0.9382323026657104, "learning_rate": 2e-05, "loss": 0.04039787, "step": 24347 }, { "epoch": 48.696, "grad_norm": 0.8376799821853638, "learning_rate": 2e-05, "loss": 0.03154026, "step": 24348 }, { "epoch": 48.698, "grad_norm": 0.8060555458068848, "learning_rate": 2e-05, "loss": 0.02412071, "step": 24349 }, { "epoch": 48.7, "grad_norm": 1.573853611946106, "learning_rate": 2e-05, "loss": 0.04239369, "step": 24350 }, { "epoch": 48.702, "grad_norm": 1.240988850593567, "learning_rate": 2e-05, "loss": 0.05373859, "step": 24351 }, { "epoch": 48.704, "grad_norm": 0.8846662044525146, "learning_rate": 2e-05, "loss": 0.03031468, "step": 24352 }, { "epoch": 48.706, "grad_norm": 1.5107836723327637, "learning_rate": 2e-05, "loss": 0.04367136, "step": 24353 }, { "epoch": 48.708, "grad_norm": 0.9465537667274475, "learning_rate": 2e-05, "loss": 0.03686992, "step": 24354 }, { "epoch": 48.71, "grad_norm": 7.329326629638672, "learning_rate": 2e-05, "loss": 0.06544407, "step": 24355 }, { "epoch": 48.712, "grad_norm": 1.0423285961151123, "learning_rate": 2e-05, "loss": 0.03988872, "step": 24356 }, { "epoch": 48.714, "grad_norm": 0.8279536366462708, "learning_rate": 2e-05, "loss": 0.02683463, "step": 24357 }, { "epoch": 48.716, "grad_norm": 1.0414245128631592, "learning_rate": 2e-05, "loss": 0.04054063, "step": 24358 }, { "epoch": 48.718, "grad_norm": 0.898220956325531, "learning_rate": 2e-05, "loss": 0.02118924, "step": 24359 }, { "epoch": 48.72, "grad_norm": 0.9355086088180542, "learning_rate": 2e-05, "loss": 0.03453436, "step": 24360 }, { "epoch": 48.722, "grad_norm": 1.1354353427886963, "learning_rate": 2e-05, "loss": 0.03906699, "step": 24361 }, { "epoch": 48.724, "grad_norm": 1.1232068538665771, "learning_rate": 2e-05, "loss": 0.05339003, "step": 24362 }, { "epoch": 48.726, "grad_norm": 1.028148889541626, "learning_rate": 2e-05, "loss": 0.04637954, "step": 24363 }, { "epoch": 48.728, "grad_norm": 1.2347803115844727, "learning_rate": 2e-05, "loss": 0.02349299, "step": 24364 }, { "epoch": 48.73, "grad_norm": 0.9514893889427185, "learning_rate": 2e-05, "loss": 0.03246442, "step": 24365 }, { "epoch": 48.732, "grad_norm": 2.0282280445098877, "learning_rate": 2e-05, "loss": 0.05106306, "step": 24366 }, { "epoch": 48.734, "grad_norm": 1.1123815774917603, "learning_rate": 2e-05, "loss": 0.03770404, "step": 24367 }, { "epoch": 48.736, "grad_norm": 1.3165796995162964, "learning_rate": 2e-05, "loss": 0.05627037, "step": 24368 }, { "epoch": 48.738, "grad_norm": 1.5725345611572266, "learning_rate": 2e-05, "loss": 0.04725574, "step": 24369 }, { "epoch": 48.74, "grad_norm": 1.5316879749298096, "learning_rate": 2e-05, "loss": 0.0582117, "step": 24370 }, { "epoch": 48.742, "grad_norm": 1.2263118028640747, "learning_rate": 2e-05, "loss": 0.03796004, "step": 24371 }, { "epoch": 48.744, "grad_norm": 1.02708101272583, "learning_rate": 2e-05, "loss": 0.03169832, "step": 24372 }, { "epoch": 48.746, "grad_norm": 1.4545408487319946, "learning_rate": 2e-05, "loss": 0.04463018, "step": 24373 }, { "epoch": 48.748, "grad_norm": 1.2797362804412842, "learning_rate": 2e-05, "loss": 0.06535473, "step": 24374 }, { "epoch": 48.75, "grad_norm": 0.9623776078224182, "learning_rate": 2e-05, "loss": 0.03548843, "step": 24375 }, { "epoch": 48.752, "grad_norm": 1.2995597124099731, "learning_rate": 2e-05, "loss": 0.05275778, "step": 24376 }, { "epoch": 48.754, "grad_norm": 3.382997512817383, "learning_rate": 2e-05, "loss": 0.04675677, "step": 24377 }, { "epoch": 48.756, "grad_norm": 1.0011705160140991, "learning_rate": 2e-05, "loss": 0.04868302, "step": 24378 }, { "epoch": 48.758, "grad_norm": 1.1306729316711426, "learning_rate": 2e-05, "loss": 0.04722492, "step": 24379 }, { "epoch": 48.76, "grad_norm": 1.2009761333465576, "learning_rate": 2e-05, "loss": 0.05003314, "step": 24380 }, { "epoch": 48.762, "grad_norm": 1.008144497871399, "learning_rate": 2e-05, "loss": 0.03771747, "step": 24381 }, { "epoch": 48.764, "grad_norm": 1.1181625127792358, "learning_rate": 2e-05, "loss": 0.05064186, "step": 24382 }, { "epoch": 48.766, "grad_norm": 0.7634913921356201, "learning_rate": 2e-05, "loss": 0.02468442, "step": 24383 }, { "epoch": 48.768, "grad_norm": 2.3540360927581787, "learning_rate": 2e-05, "loss": 0.04042833, "step": 24384 }, { "epoch": 48.77, "grad_norm": 0.9578737020492554, "learning_rate": 2e-05, "loss": 0.04330675, "step": 24385 }, { "epoch": 48.772, "grad_norm": 1.1750001907348633, "learning_rate": 2e-05, "loss": 0.03898159, "step": 24386 }, { "epoch": 48.774, "grad_norm": 1.1242703199386597, "learning_rate": 2e-05, "loss": 0.05501888, "step": 24387 }, { "epoch": 48.776, "grad_norm": 0.9622960090637207, "learning_rate": 2e-05, "loss": 0.03375585, "step": 24388 }, { "epoch": 48.778, "grad_norm": 1.0836127996444702, "learning_rate": 2e-05, "loss": 0.04787442, "step": 24389 }, { "epoch": 48.78, "grad_norm": 1.127389669418335, "learning_rate": 2e-05, "loss": 0.03593518, "step": 24390 }, { "epoch": 48.782, "grad_norm": 2.1729109287261963, "learning_rate": 2e-05, "loss": 0.04273476, "step": 24391 }, { "epoch": 48.784, "grad_norm": 1.5484180450439453, "learning_rate": 2e-05, "loss": 0.03918305, "step": 24392 }, { "epoch": 48.786, "grad_norm": 0.9017122387886047, "learning_rate": 2e-05, "loss": 0.03190538, "step": 24393 }, { "epoch": 48.788, "grad_norm": 1.5424437522888184, "learning_rate": 2e-05, "loss": 0.0543567, "step": 24394 }, { "epoch": 48.79, "grad_norm": 1.155954122543335, "learning_rate": 2e-05, "loss": 0.03191642, "step": 24395 }, { "epoch": 48.792, "grad_norm": 1.4287625551223755, "learning_rate": 2e-05, "loss": 0.05400471, "step": 24396 }, { "epoch": 48.794, "grad_norm": 0.9282877445220947, "learning_rate": 2e-05, "loss": 0.0323032, "step": 24397 }, { "epoch": 48.796, "grad_norm": 1.1259311437606812, "learning_rate": 2e-05, "loss": 0.04647238, "step": 24398 }, { "epoch": 48.798, "grad_norm": 1.0310484170913696, "learning_rate": 2e-05, "loss": 0.04757043, "step": 24399 }, { "epoch": 48.8, "grad_norm": 1.293701171875, "learning_rate": 2e-05, "loss": 0.0582789, "step": 24400 }, { "epoch": 48.802, "grad_norm": 1.751761794090271, "learning_rate": 2e-05, "loss": 0.038141, "step": 24401 }, { "epoch": 48.804, "grad_norm": 1.2174683809280396, "learning_rate": 2e-05, "loss": 0.04666715, "step": 24402 }, { "epoch": 48.806, "grad_norm": 1.0694246292114258, "learning_rate": 2e-05, "loss": 0.04654847, "step": 24403 }, { "epoch": 48.808, "grad_norm": 1.3451502323150635, "learning_rate": 2e-05, "loss": 0.05215296, "step": 24404 }, { "epoch": 48.81, "grad_norm": 1.0607155561447144, "learning_rate": 2e-05, "loss": 0.04565288, "step": 24405 }, { "epoch": 48.812, "grad_norm": 0.8298874497413635, "learning_rate": 2e-05, "loss": 0.02550031, "step": 24406 }, { "epoch": 48.814, "grad_norm": 1.036697506904602, "learning_rate": 2e-05, "loss": 0.04669739, "step": 24407 }, { "epoch": 48.816, "grad_norm": 1.0443403720855713, "learning_rate": 2e-05, "loss": 0.02883327, "step": 24408 }, { "epoch": 48.818, "grad_norm": 0.9095358848571777, "learning_rate": 2e-05, "loss": 0.03351613, "step": 24409 }, { "epoch": 48.82, "grad_norm": 1.1774234771728516, "learning_rate": 2e-05, "loss": 0.03712221, "step": 24410 }, { "epoch": 48.822, "grad_norm": 2.6395068168640137, "learning_rate": 2e-05, "loss": 0.04827315, "step": 24411 }, { "epoch": 48.824, "grad_norm": 1.6375731229782104, "learning_rate": 2e-05, "loss": 0.05812149, "step": 24412 }, { "epoch": 48.826, "grad_norm": 1.0414574146270752, "learning_rate": 2e-05, "loss": 0.04219288, "step": 24413 }, { "epoch": 48.828, "grad_norm": 1.1180362701416016, "learning_rate": 2e-05, "loss": 0.03921234, "step": 24414 }, { "epoch": 48.83, "grad_norm": 0.9622010588645935, "learning_rate": 2e-05, "loss": 0.03459248, "step": 24415 }, { "epoch": 48.832, "grad_norm": 5.853996276855469, "learning_rate": 2e-05, "loss": 0.05215093, "step": 24416 }, { "epoch": 48.834, "grad_norm": 0.969934344291687, "learning_rate": 2e-05, "loss": 0.04063689, "step": 24417 }, { "epoch": 48.836, "grad_norm": 0.9728137254714966, "learning_rate": 2e-05, "loss": 0.04986447, "step": 24418 }, { "epoch": 48.838, "grad_norm": 2.3383843898773193, "learning_rate": 2e-05, "loss": 0.04833975, "step": 24419 }, { "epoch": 48.84, "grad_norm": 0.983928918838501, "learning_rate": 2e-05, "loss": 0.03613142, "step": 24420 }, { "epoch": 48.842, "grad_norm": 1.590661644935608, "learning_rate": 2e-05, "loss": 0.07209525, "step": 24421 }, { "epoch": 48.844, "grad_norm": 1.1611725091934204, "learning_rate": 2e-05, "loss": 0.04334851, "step": 24422 }, { "epoch": 48.846, "grad_norm": 1.372332215309143, "learning_rate": 2e-05, "loss": 0.04785038, "step": 24423 }, { "epoch": 48.848, "grad_norm": 0.9004780650138855, "learning_rate": 2e-05, "loss": 0.03351145, "step": 24424 }, { "epoch": 48.85, "grad_norm": 0.9335043430328369, "learning_rate": 2e-05, "loss": 0.03579152, "step": 24425 }, { "epoch": 48.852, "grad_norm": 1.0407893657684326, "learning_rate": 2e-05, "loss": 0.03815415, "step": 24426 }, { "epoch": 48.854, "grad_norm": 0.8316537737846375, "learning_rate": 2e-05, "loss": 0.02116261, "step": 24427 }, { "epoch": 48.856, "grad_norm": 1.2476933002471924, "learning_rate": 2e-05, "loss": 0.05714922, "step": 24428 }, { "epoch": 48.858, "grad_norm": 0.9810808300971985, "learning_rate": 2e-05, "loss": 0.03940214, "step": 24429 }, { "epoch": 48.86, "grad_norm": 1.078212857246399, "learning_rate": 2e-05, "loss": 0.05045369, "step": 24430 }, { "epoch": 48.862, "grad_norm": 1.0168265104293823, "learning_rate": 2e-05, "loss": 0.03490464, "step": 24431 }, { "epoch": 48.864, "grad_norm": 3.093143939971924, "learning_rate": 2e-05, "loss": 0.04677321, "step": 24432 }, { "epoch": 48.866, "grad_norm": 0.9214034676551819, "learning_rate": 2e-05, "loss": 0.0374401, "step": 24433 }, { "epoch": 48.868, "grad_norm": 1.2424558401107788, "learning_rate": 2e-05, "loss": 0.04146077, "step": 24434 }, { "epoch": 48.87, "grad_norm": 1.215382695198059, "learning_rate": 2e-05, "loss": 0.04679055, "step": 24435 }, { "epoch": 48.872, "grad_norm": 1.0487350225448608, "learning_rate": 2e-05, "loss": 0.03203248, "step": 24436 }, { "epoch": 48.874, "grad_norm": 2.260284662246704, "learning_rate": 2e-05, "loss": 0.05089334, "step": 24437 }, { "epoch": 48.876, "grad_norm": 6.283088684082031, "learning_rate": 2e-05, "loss": 0.06139347, "step": 24438 }, { "epoch": 48.878, "grad_norm": 1.2415889501571655, "learning_rate": 2e-05, "loss": 0.04238732, "step": 24439 }, { "epoch": 48.88, "grad_norm": 0.9172052145004272, "learning_rate": 2e-05, "loss": 0.03516413, "step": 24440 }, { "epoch": 48.882, "grad_norm": 1.2842570543289185, "learning_rate": 2e-05, "loss": 0.05789215, "step": 24441 }, { "epoch": 48.884, "grad_norm": 2.0520265102386475, "learning_rate": 2e-05, "loss": 0.06247369, "step": 24442 }, { "epoch": 48.886, "grad_norm": 1.0168920755386353, "learning_rate": 2e-05, "loss": 0.04313808, "step": 24443 }, { "epoch": 48.888, "grad_norm": 1.090262532234192, "learning_rate": 2e-05, "loss": 0.05598099, "step": 24444 }, { "epoch": 48.89, "grad_norm": 1.0099059343338013, "learning_rate": 2e-05, "loss": 0.03575943, "step": 24445 }, { "epoch": 48.892, "grad_norm": 1.1766314506530762, "learning_rate": 2e-05, "loss": 0.0414086, "step": 24446 }, { "epoch": 48.894, "grad_norm": 1.1394931077957153, "learning_rate": 2e-05, "loss": 0.04667051, "step": 24447 }, { "epoch": 48.896, "grad_norm": 1.3024219274520874, "learning_rate": 2e-05, "loss": 0.0646999, "step": 24448 }, { "epoch": 48.898, "grad_norm": 1.031206488609314, "learning_rate": 2e-05, "loss": 0.03263982, "step": 24449 }, { "epoch": 48.9, "grad_norm": 1.414013147354126, "learning_rate": 2e-05, "loss": 0.0322731, "step": 24450 }, { "epoch": 48.902, "grad_norm": 0.8033210039138794, "learning_rate": 2e-05, "loss": 0.02987779, "step": 24451 }, { "epoch": 48.904, "grad_norm": 1.283576488494873, "learning_rate": 2e-05, "loss": 0.04825049, "step": 24452 }, { "epoch": 48.906, "grad_norm": 1.2101244926452637, "learning_rate": 2e-05, "loss": 0.04218683, "step": 24453 }, { "epoch": 48.908, "grad_norm": 0.9382783770561218, "learning_rate": 2e-05, "loss": 0.04189238, "step": 24454 }, { "epoch": 48.91, "grad_norm": 1.2882331609725952, "learning_rate": 2e-05, "loss": 0.03295884, "step": 24455 }, { "epoch": 48.912, "grad_norm": 2.6059720516204834, "learning_rate": 2e-05, "loss": 0.03539278, "step": 24456 }, { "epoch": 48.914, "grad_norm": 1.1913390159606934, "learning_rate": 2e-05, "loss": 0.04725235, "step": 24457 }, { "epoch": 48.916, "grad_norm": 1.3205485343933105, "learning_rate": 2e-05, "loss": 0.05486622, "step": 24458 }, { "epoch": 48.918, "grad_norm": 2.254817247390747, "learning_rate": 2e-05, "loss": 0.04231478, "step": 24459 }, { "epoch": 48.92, "grad_norm": 1.3705193996429443, "learning_rate": 2e-05, "loss": 0.04113527, "step": 24460 }, { "epoch": 48.922, "grad_norm": 2.1772735118865967, "learning_rate": 2e-05, "loss": 0.03877498, "step": 24461 }, { "epoch": 48.924, "grad_norm": 1.1485670804977417, "learning_rate": 2e-05, "loss": 0.05482171, "step": 24462 }, { "epoch": 48.926, "grad_norm": 0.8895829916000366, "learning_rate": 2e-05, "loss": 0.02937362, "step": 24463 }, { "epoch": 48.928, "grad_norm": 1.0779998302459717, "learning_rate": 2e-05, "loss": 0.03473584, "step": 24464 }, { "epoch": 48.93, "grad_norm": 1.1385302543640137, "learning_rate": 2e-05, "loss": 0.04469822, "step": 24465 }, { "epoch": 48.932, "grad_norm": 1.342939019203186, "learning_rate": 2e-05, "loss": 0.03818111, "step": 24466 }, { "epoch": 48.934, "grad_norm": 1.848419427871704, "learning_rate": 2e-05, "loss": 0.05075561, "step": 24467 }, { "epoch": 48.936, "grad_norm": 1.5834753513336182, "learning_rate": 2e-05, "loss": 0.0539801, "step": 24468 }, { "epoch": 48.938, "grad_norm": 1.4930813312530518, "learning_rate": 2e-05, "loss": 0.05064347, "step": 24469 }, { "epoch": 48.94, "grad_norm": 1.1843292713165283, "learning_rate": 2e-05, "loss": 0.05155434, "step": 24470 }, { "epoch": 48.942, "grad_norm": 1.3824281692504883, "learning_rate": 2e-05, "loss": 0.05579555, "step": 24471 }, { "epoch": 48.944, "grad_norm": 0.9852959513664246, "learning_rate": 2e-05, "loss": 0.04141999, "step": 24472 }, { "epoch": 48.946, "grad_norm": 1.050506591796875, "learning_rate": 2e-05, "loss": 0.03825409, "step": 24473 }, { "epoch": 48.948, "grad_norm": 0.7316854596138, "learning_rate": 2e-05, "loss": 0.02565436, "step": 24474 }, { "epoch": 48.95, "grad_norm": 1.0315051078796387, "learning_rate": 2e-05, "loss": 0.02817239, "step": 24475 }, { "epoch": 48.952, "grad_norm": 1.309634804725647, "learning_rate": 2e-05, "loss": 0.04222146, "step": 24476 }, { "epoch": 48.954, "grad_norm": 1.6903845071792603, "learning_rate": 2e-05, "loss": 0.04784102, "step": 24477 }, { "epoch": 48.956, "grad_norm": 1.246270775794983, "learning_rate": 2e-05, "loss": 0.03245929, "step": 24478 }, { "epoch": 48.958, "grad_norm": 0.9276645183563232, "learning_rate": 2e-05, "loss": 0.0378779, "step": 24479 }, { "epoch": 48.96, "grad_norm": 2.562434196472168, "learning_rate": 2e-05, "loss": 0.04819561, "step": 24480 }, { "epoch": 48.962, "grad_norm": 1.131085991859436, "learning_rate": 2e-05, "loss": 0.04977149, "step": 24481 }, { "epoch": 48.964, "grad_norm": 2.169659376144409, "learning_rate": 2e-05, "loss": 0.05206218, "step": 24482 }, { "epoch": 48.966, "grad_norm": 1.2484794855117798, "learning_rate": 2e-05, "loss": 0.05283345, "step": 24483 }, { "epoch": 48.968, "grad_norm": 1.2104519605636597, "learning_rate": 2e-05, "loss": 0.04184366, "step": 24484 }, { "epoch": 48.97, "grad_norm": 0.9614647626876831, "learning_rate": 2e-05, "loss": 0.03740752, "step": 24485 }, { "epoch": 48.972, "grad_norm": 1.092301845550537, "learning_rate": 2e-05, "loss": 0.03565601, "step": 24486 }, { "epoch": 48.974, "grad_norm": 1.0595544576644897, "learning_rate": 2e-05, "loss": 0.04346304, "step": 24487 }, { "epoch": 48.976, "grad_norm": 1.5760530233383179, "learning_rate": 2e-05, "loss": 0.05206705, "step": 24488 }, { "epoch": 48.978, "grad_norm": 0.8983845114707947, "learning_rate": 2e-05, "loss": 0.03474924, "step": 24489 }, { "epoch": 48.98, "grad_norm": 1.123649001121521, "learning_rate": 2e-05, "loss": 0.048374, "step": 24490 }, { "epoch": 48.982, "grad_norm": 1.3236852884292603, "learning_rate": 2e-05, "loss": 0.04201726, "step": 24491 }, { "epoch": 48.984, "grad_norm": 1.2178043127059937, "learning_rate": 2e-05, "loss": 0.04156953, "step": 24492 }, { "epoch": 48.986, "grad_norm": 0.9753429293632507, "learning_rate": 2e-05, "loss": 0.02856264, "step": 24493 }, { "epoch": 48.988, "grad_norm": 1.0256152153015137, "learning_rate": 2e-05, "loss": 0.04131383, "step": 24494 }, { "epoch": 48.99, "grad_norm": 0.9965881109237671, "learning_rate": 2e-05, "loss": 0.04147368, "step": 24495 }, { "epoch": 48.992, "grad_norm": 1.501907229423523, "learning_rate": 2e-05, "loss": 0.04094127, "step": 24496 }, { "epoch": 48.994, "grad_norm": 0.9894804954528809, "learning_rate": 2e-05, "loss": 0.04942727, "step": 24497 }, { "epoch": 48.996, "grad_norm": 0.8625822067260742, "learning_rate": 2e-05, "loss": 0.03427246, "step": 24498 }, { "epoch": 48.998, "grad_norm": 1.0206845998764038, "learning_rate": 2e-05, "loss": 0.03845121, "step": 24499 }, { "epoch": 49.0, "grad_norm": 1.0286264419555664, "learning_rate": 2e-05, "loss": 0.04464644, "step": 24500 }, { "epoch": 49.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9800399201596807, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 0.9960079840319361, "Parallel_1": 0.9919839679358717, "Parallel_2": 0.9979959919839679, "Parallel_3": 0.994, "Perpendicular_1": 0.998, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8997995991983968, "PointLiesOnCircle_1": 0.9979959919839679, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 1.0, "PointLiesOnLine_3": 0.9840319361277445 }, "eval_runtime": 225.2624, "eval_samples_per_second": 46.612, "eval_steps_per_second": 0.932, "step": 24500 }, { "epoch": 49.002, "grad_norm": 0.8583446741104126, "learning_rate": 2e-05, "loss": 0.02878646, "step": 24501 }, { "epoch": 49.004, "grad_norm": 0.9851155877113342, "learning_rate": 2e-05, "loss": 0.03248733, "step": 24502 }, { "epoch": 49.006, "grad_norm": 1.1575677394866943, "learning_rate": 2e-05, "loss": 0.04908581, "step": 24503 }, { "epoch": 49.008, "grad_norm": 1.9135984182357788, "learning_rate": 2e-05, "loss": 0.05225367, "step": 24504 }, { "epoch": 49.01, "grad_norm": 1.0571635961532593, "learning_rate": 2e-05, "loss": 0.03725421, "step": 24505 }, { "epoch": 49.012, "grad_norm": 0.9231696128845215, "learning_rate": 2e-05, "loss": 0.03511491, "step": 24506 }, { "epoch": 49.014, "grad_norm": 1.0619639158248901, "learning_rate": 2e-05, "loss": 0.04548306, "step": 24507 }, { "epoch": 49.016, "grad_norm": 1.3454912900924683, "learning_rate": 2e-05, "loss": 0.05696021, "step": 24508 }, { "epoch": 49.018, "grad_norm": 1.1394917964935303, "learning_rate": 2e-05, "loss": 0.03755286, "step": 24509 }, { "epoch": 49.02, "grad_norm": 0.9397946000099182, "learning_rate": 2e-05, "loss": 0.0267869, "step": 24510 }, { "epoch": 49.022, "grad_norm": 1.1348040103912354, "learning_rate": 2e-05, "loss": 0.04928058, "step": 24511 }, { "epoch": 49.024, "grad_norm": 1.5963038206100464, "learning_rate": 2e-05, "loss": 0.04332534, "step": 24512 }, { "epoch": 49.026, "grad_norm": 0.9240319132804871, "learning_rate": 2e-05, "loss": 0.0472898, "step": 24513 }, { "epoch": 49.028, "grad_norm": 1.0876508951187134, "learning_rate": 2e-05, "loss": 0.04335083, "step": 24514 }, { "epoch": 49.03, "grad_norm": 1.7481757402420044, "learning_rate": 2e-05, "loss": 0.06548224, "step": 24515 }, { "epoch": 49.032, "grad_norm": 1.241217851638794, "learning_rate": 2e-05, "loss": 0.05546284, "step": 24516 }, { "epoch": 49.034, "grad_norm": 2.1888632774353027, "learning_rate": 2e-05, "loss": 0.04448302, "step": 24517 }, { "epoch": 49.036, "grad_norm": 1.15812349319458, "learning_rate": 2e-05, "loss": 0.05106151, "step": 24518 }, { "epoch": 49.038, "grad_norm": 1.1973505020141602, "learning_rate": 2e-05, "loss": 0.0450438, "step": 24519 }, { "epoch": 49.04, "grad_norm": 2.0966145992279053, "learning_rate": 2e-05, "loss": 0.05570465, "step": 24520 }, { "epoch": 49.042, "grad_norm": 2.673330783843994, "learning_rate": 2e-05, "loss": 0.05014793, "step": 24521 }, { "epoch": 49.044, "grad_norm": 0.980739414691925, "learning_rate": 2e-05, "loss": 0.03596898, "step": 24522 }, { "epoch": 49.046, "grad_norm": 1.6443208456039429, "learning_rate": 2e-05, "loss": 0.03878018, "step": 24523 }, { "epoch": 49.048, "grad_norm": 0.9813085198402405, "learning_rate": 2e-05, "loss": 0.04585869, "step": 24524 }, { "epoch": 49.05, "grad_norm": 1.5423898696899414, "learning_rate": 2e-05, "loss": 0.03968333, "step": 24525 }, { "epoch": 49.052, "grad_norm": 0.9640806913375854, "learning_rate": 2e-05, "loss": 0.04608911, "step": 24526 }, { "epoch": 49.054, "grad_norm": 0.9483569264411926, "learning_rate": 2e-05, "loss": 0.03394688, "step": 24527 }, { "epoch": 49.056, "grad_norm": 1.3797762393951416, "learning_rate": 2e-05, "loss": 0.04742776, "step": 24528 }, { "epoch": 49.058, "grad_norm": 1.0050280094146729, "learning_rate": 2e-05, "loss": 0.0408802, "step": 24529 }, { "epoch": 49.06, "grad_norm": 1.3145060539245605, "learning_rate": 2e-05, "loss": 0.05096242, "step": 24530 }, { "epoch": 49.062, "grad_norm": 1.0005338191986084, "learning_rate": 2e-05, "loss": 0.03343801, "step": 24531 }, { "epoch": 49.064, "grad_norm": 0.9544199109077454, "learning_rate": 2e-05, "loss": 0.02959356, "step": 24532 }, { "epoch": 49.066, "grad_norm": 0.9279265403747559, "learning_rate": 2e-05, "loss": 0.04169089, "step": 24533 }, { "epoch": 49.068, "grad_norm": 0.8280339241027832, "learning_rate": 2e-05, "loss": 0.03095424, "step": 24534 }, { "epoch": 49.07, "grad_norm": 1.0266772508621216, "learning_rate": 2e-05, "loss": 0.03506602, "step": 24535 }, { "epoch": 49.072, "grad_norm": 1.2256178855895996, "learning_rate": 2e-05, "loss": 0.03429116, "step": 24536 }, { "epoch": 49.074, "grad_norm": 0.9931820631027222, "learning_rate": 2e-05, "loss": 0.03936617, "step": 24537 }, { "epoch": 49.076, "grad_norm": 1.2586666345596313, "learning_rate": 2e-05, "loss": 0.04560041, "step": 24538 }, { "epoch": 49.078, "grad_norm": 1.106284499168396, "learning_rate": 2e-05, "loss": 0.04347613, "step": 24539 }, { "epoch": 49.08, "grad_norm": 2.020146608352661, "learning_rate": 2e-05, "loss": 0.04126682, "step": 24540 }, { "epoch": 49.082, "grad_norm": 0.9850757122039795, "learning_rate": 2e-05, "loss": 0.04097305, "step": 24541 }, { "epoch": 49.084, "grad_norm": 1.4308165311813354, "learning_rate": 2e-05, "loss": 0.05083925, "step": 24542 }, { "epoch": 49.086, "grad_norm": 1.0677158832550049, "learning_rate": 2e-05, "loss": 0.041007, "step": 24543 }, { "epoch": 49.088, "grad_norm": 0.9380294680595398, "learning_rate": 2e-05, "loss": 0.03367423, "step": 24544 }, { "epoch": 49.09, "grad_norm": 0.99846351146698, "learning_rate": 2e-05, "loss": 0.04039502, "step": 24545 }, { "epoch": 49.092, "grad_norm": 0.9589121341705322, "learning_rate": 2e-05, "loss": 0.0403058, "step": 24546 }, { "epoch": 49.094, "grad_norm": 0.9853387475013733, "learning_rate": 2e-05, "loss": 0.03225574, "step": 24547 }, { "epoch": 49.096, "grad_norm": 1.1121022701263428, "learning_rate": 2e-05, "loss": 0.04666482, "step": 24548 }, { "epoch": 49.098, "grad_norm": 2.83243727684021, "learning_rate": 2e-05, "loss": 0.04655527, "step": 24549 }, { "epoch": 49.1, "grad_norm": 1.2283378839492798, "learning_rate": 2e-05, "loss": 0.04209259, "step": 24550 }, { "epoch": 49.102, "grad_norm": 1.2571814060211182, "learning_rate": 2e-05, "loss": 0.04930793, "step": 24551 }, { "epoch": 49.104, "grad_norm": 0.9971734881401062, "learning_rate": 2e-05, "loss": 0.03615848, "step": 24552 }, { "epoch": 49.106, "grad_norm": 1.1642414331436157, "learning_rate": 2e-05, "loss": 0.0504192, "step": 24553 }, { "epoch": 49.108, "grad_norm": 2.009573221206665, "learning_rate": 2e-05, "loss": 0.05487194, "step": 24554 }, { "epoch": 49.11, "grad_norm": 1.770125150680542, "learning_rate": 2e-05, "loss": 0.03940173, "step": 24555 }, { "epoch": 49.112, "grad_norm": 0.8116210103034973, "learning_rate": 2e-05, "loss": 0.02463161, "step": 24556 }, { "epoch": 49.114, "grad_norm": 0.9781795144081116, "learning_rate": 2e-05, "loss": 0.04124717, "step": 24557 }, { "epoch": 49.116, "grad_norm": 0.9360294342041016, "learning_rate": 2e-05, "loss": 0.03511227, "step": 24558 }, { "epoch": 49.118, "grad_norm": 1.2480345964431763, "learning_rate": 2e-05, "loss": 0.05629587, "step": 24559 }, { "epoch": 49.12, "grad_norm": 2.986537218093872, "learning_rate": 2e-05, "loss": 0.04658691, "step": 24560 }, { "epoch": 49.122, "grad_norm": 1.1173847913742065, "learning_rate": 2e-05, "loss": 0.04091944, "step": 24561 }, { "epoch": 49.124, "grad_norm": 0.9558855295181274, "learning_rate": 2e-05, "loss": 0.04287197, "step": 24562 }, { "epoch": 49.126, "grad_norm": 1.652414321899414, "learning_rate": 2e-05, "loss": 0.07029001, "step": 24563 }, { "epoch": 49.128, "grad_norm": 0.8693435192108154, "learning_rate": 2e-05, "loss": 0.03287993, "step": 24564 }, { "epoch": 49.13, "grad_norm": 1.0141468048095703, "learning_rate": 2e-05, "loss": 0.03744337, "step": 24565 }, { "epoch": 49.132, "grad_norm": 1.1087985038757324, "learning_rate": 2e-05, "loss": 0.03972914, "step": 24566 }, { "epoch": 49.134, "grad_norm": 1.007947325706482, "learning_rate": 2e-05, "loss": 0.02856949, "step": 24567 }, { "epoch": 49.136, "grad_norm": 0.9426724314689636, "learning_rate": 2e-05, "loss": 0.03619595, "step": 24568 }, { "epoch": 49.138, "grad_norm": 1.0125420093536377, "learning_rate": 2e-05, "loss": 0.03893601, "step": 24569 }, { "epoch": 49.14, "grad_norm": 1.0333759784698486, "learning_rate": 2e-05, "loss": 0.04273131, "step": 24570 }, { "epoch": 49.142, "grad_norm": 1.0643538236618042, "learning_rate": 2e-05, "loss": 0.04247178, "step": 24571 }, { "epoch": 49.144, "grad_norm": 1.1779112815856934, "learning_rate": 2e-05, "loss": 0.04865604, "step": 24572 }, { "epoch": 49.146, "grad_norm": 2.9364054203033447, "learning_rate": 2e-05, "loss": 0.04364017, "step": 24573 }, { "epoch": 49.148, "grad_norm": 1.2954450845718384, "learning_rate": 2e-05, "loss": 0.07155376, "step": 24574 }, { "epoch": 49.15, "grad_norm": 1.0921179056167603, "learning_rate": 2e-05, "loss": 0.03807881, "step": 24575 }, { "epoch": 49.152, "grad_norm": 1.0704469680786133, "learning_rate": 2e-05, "loss": 0.03751354, "step": 24576 }, { "epoch": 49.154, "grad_norm": 4.479954242706299, "learning_rate": 2e-05, "loss": 0.05739081, "step": 24577 }, { "epoch": 49.156, "grad_norm": 1.2876255512237549, "learning_rate": 2e-05, "loss": 0.04973248, "step": 24578 }, { "epoch": 49.158, "grad_norm": 1.1002424955368042, "learning_rate": 2e-05, "loss": 0.04914426, "step": 24579 }, { "epoch": 49.16, "grad_norm": 0.9710339307785034, "learning_rate": 2e-05, "loss": 0.03993352, "step": 24580 }, { "epoch": 49.162, "grad_norm": 0.9672435522079468, "learning_rate": 2e-05, "loss": 0.03542747, "step": 24581 }, { "epoch": 49.164, "grad_norm": 1.171025037765503, "learning_rate": 2e-05, "loss": 0.03548908, "step": 24582 }, { "epoch": 49.166, "grad_norm": 1.3907248973846436, "learning_rate": 2e-05, "loss": 0.05717108, "step": 24583 }, { "epoch": 49.168, "grad_norm": 1.0556650161743164, "learning_rate": 2e-05, "loss": 0.05506879, "step": 24584 }, { "epoch": 49.17, "grad_norm": 0.9577919840812683, "learning_rate": 2e-05, "loss": 0.04090136, "step": 24585 }, { "epoch": 49.172, "grad_norm": 1.1164604425430298, "learning_rate": 2e-05, "loss": 0.04113727, "step": 24586 }, { "epoch": 49.174, "grad_norm": 0.9861107468605042, "learning_rate": 2e-05, "loss": 0.03927059, "step": 24587 }, { "epoch": 49.176, "grad_norm": 1.057741403579712, "learning_rate": 2e-05, "loss": 0.03765612, "step": 24588 }, { "epoch": 49.178, "grad_norm": 1.4868308305740356, "learning_rate": 2e-05, "loss": 0.03644291, "step": 24589 }, { "epoch": 49.18, "grad_norm": 0.8538646101951599, "learning_rate": 2e-05, "loss": 0.03074658, "step": 24590 }, { "epoch": 49.182, "grad_norm": 1.0696104764938354, "learning_rate": 2e-05, "loss": 0.04166707, "step": 24591 }, { "epoch": 49.184, "grad_norm": 2.3413591384887695, "learning_rate": 2e-05, "loss": 0.04105772, "step": 24592 }, { "epoch": 49.186, "grad_norm": 1.0375920534133911, "learning_rate": 2e-05, "loss": 0.03956812, "step": 24593 }, { "epoch": 49.188, "grad_norm": 1.1364259719848633, "learning_rate": 2e-05, "loss": 0.04986398, "step": 24594 }, { "epoch": 49.19, "grad_norm": 2.688459873199463, "learning_rate": 2e-05, "loss": 0.0614192, "step": 24595 }, { "epoch": 49.192, "grad_norm": 1.116719365119934, "learning_rate": 2e-05, "loss": 0.04095181, "step": 24596 }, { "epoch": 49.194, "grad_norm": 1.1176522970199585, "learning_rate": 2e-05, "loss": 0.04834028, "step": 24597 }, { "epoch": 49.196, "grad_norm": 0.9887307286262512, "learning_rate": 2e-05, "loss": 0.03353071, "step": 24598 }, { "epoch": 49.198, "grad_norm": 0.9877430200576782, "learning_rate": 2e-05, "loss": 0.04203132, "step": 24599 }, { "epoch": 49.2, "grad_norm": 1.2153443098068237, "learning_rate": 2e-05, "loss": 0.03651748, "step": 24600 }, { "epoch": 49.202, "grad_norm": 1.2961647510528564, "learning_rate": 2e-05, "loss": 0.05894446, "step": 24601 }, { "epoch": 49.204, "grad_norm": 1.1947745084762573, "learning_rate": 2e-05, "loss": 0.03382131, "step": 24602 }, { "epoch": 49.206, "grad_norm": 1.0582457780838013, "learning_rate": 2e-05, "loss": 0.0446366, "step": 24603 }, { "epoch": 49.208, "grad_norm": 1.3313616514205933, "learning_rate": 2e-05, "loss": 0.05331346, "step": 24604 }, { "epoch": 49.21, "grad_norm": 1.0319162607192993, "learning_rate": 2e-05, "loss": 0.03923106, "step": 24605 }, { "epoch": 49.212, "grad_norm": 2.036741256713867, "learning_rate": 2e-05, "loss": 0.04638916, "step": 24606 }, { "epoch": 49.214, "grad_norm": 0.9423798322677612, "learning_rate": 2e-05, "loss": 0.04253983, "step": 24607 }, { "epoch": 49.216, "grad_norm": 1.08109712600708, "learning_rate": 2e-05, "loss": 0.04881127, "step": 24608 }, { "epoch": 49.218, "grad_norm": 1.106484055519104, "learning_rate": 2e-05, "loss": 0.0435302, "step": 24609 }, { "epoch": 49.22, "grad_norm": 0.8532112240791321, "learning_rate": 2e-05, "loss": 0.01858062, "step": 24610 }, { "epoch": 49.222, "grad_norm": 1.0292795896530151, "learning_rate": 2e-05, "loss": 0.04507443, "step": 24611 }, { "epoch": 49.224, "grad_norm": 1.0857493877410889, "learning_rate": 2e-05, "loss": 0.04592678, "step": 24612 }, { "epoch": 49.226, "grad_norm": 1.8009240627288818, "learning_rate": 2e-05, "loss": 0.06089173, "step": 24613 }, { "epoch": 49.228, "grad_norm": 1.0835274457931519, "learning_rate": 2e-05, "loss": 0.04217212, "step": 24614 }, { "epoch": 49.23, "grad_norm": 1.1094833612442017, "learning_rate": 2e-05, "loss": 0.05112696, "step": 24615 }, { "epoch": 49.232, "grad_norm": 1.205477237701416, "learning_rate": 2e-05, "loss": 0.04340441, "step": 24616 }, { "epoch": 49.234, "grad_norm": 1.5527219772338867, "learning_rate": 2e-05, "loss": 0.04797059, "step": 24617 }, { "epoch": 49.236, "grad_norm": 1.183831810951233, "learning_rate": 2e-05, "loss": 0.05035634, "step": 24618 }, { "epoch": 49.238, "grad_norm": 1.2945419549942017, "learning_rate": 2e-05, "loss": 0.0319232, "step": 24619 }, { "epoch": 49.24, "grad_norm": 1.0422414541244507, "learning_rate": 2e-05, "loss": 0.04007659, "step": 24620 }, { "epoch": 49.242, "grad_norm": 1.6463996171951294, "learning_rate": 2e-05, "loss": 0.03573649, "step": 24621 }, { "epoch": 49.244, "grad_norm": 0.975992739200592, "learning_rate": 2e-05, "loss": 0.04361503, "step": 24622 }, { "epoch": 49.246, "grad_norm": 1.0894376039505005, "learning_rate": 2e-05, "loss": 0.04976866, "step": 24623 }, { "epoch": 49.248, "grad_norm": 1.0065984725952148, "learning_rate": 2e-05, "loss": 0.03552318, "step": 24624 }, { "epoch": 49.25, "grad_norm": 1.2080392837524414, "learning_rate": 2e-05, "loss": 0.04639924, "step": 24625 }, { "epoch": 49.252, "grad_norm": 0.9586325287818909, "learning_rate": 2e-05, "loss": 0.03254563, "step": 24626 }, { "epoch": 49.254, "grad_norm": 1.0653356313705444, "learning_rate": 2e-05, "loss": 0.03666486, "step": 24627 }, { "epoch": 49.256, "grad_norm": 1.0278879404067993, "learning_rate": 2e-05, "loss": 0.02797717, "step": 24628 }, { "epoch": 49.258, "grad_norm": 2.3303043842315674, "learning_rate": 2e-05, "loss": 0.03973323, "step": 24629 }, { "epoch": 49.26, "grad_norm": 1.2879209518432617, "learning_rate": 2e-05, "loss": 0.05315416, "step": 24630 }, { "epoch": 49.262, "grad_norm": 1.8166649341583252, "learning_rate": 2e-05, "loss": 0.04504403, "step": 24631 }, { "epoch": 49.264, "grad_norm": 3.1052403450012207, "learning_rate": 2e-05, "loss": 0.0474759, "step": 24632 }, { "epoch": 49.266, "grad_norm": 1.8105281591415405, "learning_rate": 2e-05, "loss": 0.05667026, "step": 24633 }, { "epoch": 49.268, "grad_norm": 2.293126106262207, "learning_rate": 2e-05, "loss": 0.05219525, "step": 24634 }, { "epoch": 49.27, "grad_norm": 1.109702467918396, "learning_rate": 2e-05, "loss": 0.0441339, "step": 24635 }, { "epoch": 49.272, "grad_norm": 1.3569027185440063, "learning_rate": 2e-05, "loss": 0.04739938, "step": 24636 }, { "epoch": 49.274, "grad_norm": 1.0846933126449585, "learning_rate": 2e-05, "loss": 0.04329056, "step": 24637 }, { "epoch": 49.276, "grad_norm": 1.0567454099655151, "learning_rate": 2e-05, "loss": 0.03278684, "step": 24638 }, { "epoch": 49.278, "grad_norm": 1.1167305707931519, "learning_rate": 2e-05, "loss": 0.05570868, "step": 24639 }, { "epoch": 49.28, "grad_norm": 1.2852802276611328, "learning_rate": 2e-05, "loss": 0.04792012, "step": 24640 }, { "epoch": 49.282, "grad_norm": 0.9999123215675354, "learning_rate": 2e-05, "loss": 0.04235648, "step": 24641 }, { "epoch": 49.284, "grad_norm": 0.8534079194068909, "learning_rate": 2e-05, "loss": 0.0269881, "step": 24642 }, { "epoch": 49.286, "grad_norm": 1.124849557876587, "learning_rate": 2e-05, "loss": 0.03860065, "step": 24643 }, { "epoch": 49.288, "grad_norm": 1.3666092157363892, "learning_rate": 2e-05, "loss": 0.03782129, "step": 24644 }, { "epoch": 49.29, "grad_norm": 1.378060221672058, "learning_rate": 2e-05, "loss": 0.03012034, "step": 24645 }, { "epoch": 49.292, "grad_norm": 0.9148956537246704, "learning_rate": 2e-05, "loss": 0.0281839, "step": 24646 }, { "epoch": 49.294, "grad_norm": 1.1802234649658203, "learning_rate": 2e-05, "loss": 0.04364511, "step": 24647 }, { "epoch": 49.296, "grad_norm": 1.572166919708252, "learning_rate": 2e-05, "loss": 0.06220203, "step": 24648 }, { "epoch": 49.298, "grad_norm": 1.059324860572815, "learning_rate": 2e-05, "loss": 0.03796491, "step": 24649 }, { "epoch": 49.3, "grad_norm": 1.0360914468765259, "learning_rate": 2e-05, "loss": 0.0471753, "step": 24650 }, { "epoch": 49.302, "grad_norm": 1.3504611253738403, "learning_rate": 2e-05, "loss": 0.04822979, "step": 24651 }, { "epoch": 49.304, "grad_norm": 1.2905203104019165, "learning_rate": 2e-05, "loss": 0.04391602, "step": 24652 }, { "epoch": 49.306, "grad_norm": 1.2455099821090698, "learning_rate": 2e-05, "loss": 0.03146322, "step": 24653 }, { "epoch": 49.308, "grad_norm": 1.323203206062317, "learning_rate": 2e-05, "loss": 0.05788973, "step": 24654 }, { "epoch": 49.31, "grad_norm": 1.1326344013214111, "learning_rate": 2e-05, "loss": 0.04332056, "step": 24655 }, { "epoch": 49.312, "grad_norm": 1.718957781791687, "learning_rate": 2e-05, "loss": 0.04510897, "step": 24656 }, { "epoch": 49.314, "grad_norm": 1.2120651006698608, "learning_rate": 2e-05, "loss": 0.05926821, "step": 24657 }, { "epoch": 49.316, "grad_norm": 0.9322920441627502, "learning_rate": 2e-05, "loss": 0.03083501, "step": 24658 }, { "epoch": 49.318, "grad_norm": 1.2577521800994873, "learning_rate": 2e-05, "loss": 0.04124579, "step": 24659 }, { "epoch": 49.32, "grad_norm": 1.2376171350479126, "learning_rate": 2e-05, "loss": 0.05630952, "step": 24660 }, { "epoch": 49.322, "grad_norm": 1.118993878364563, "learning_rate": 2e-05, "loss": 0.04258374, "step": 24661 }, { "epoch": 49.324, "grad_norm": 1.4571943283081055, "learning_rate": 2e-05, "loss": 0.06272435, "step": 24662 }, { "epoch": 49.326, "grad_norm": 1.1407997608184814, "learning_rate": 2e-05, "loss": 0.04013451, "step": 24663 }, { "epoch": 49.328, "grad_norm": 1.0370513200759888, "learning_rate": 2e-05, "loss": 0.03939046, "step": 24664 }, { "epoch": 49.33, "grad_norm": 1.0017948150634766, "learning_rate": 2e-05, "loss": 0.0430796, "step": 24665 }, { "epoch": 49.332, "grad_norm": 0.9457167387008667, "learning_rate": 2e-05, "loss": 0.02823194, "step": 24666 }, { "epoch": 49.334, "grad_norm": 1.0324259996414185, "learning_rate": 2e-05, "loss": 0.04115735, "step": 24667 }, { "epoch": 49.336, "grad_norm": 1.0007051229476929, "learning_rate": 2e-05, "loss": 0.04468364, "step": 24668 }, { "epoch": 49.338, "grad_norm": 1.1138314008712769, "learning_rate": 2e-05, "loss": 0.03701682, "step": 24669 }, { "epoch": 49.34, "grad_norm": 1.1422604322433472, "learning_rate": 2e-05, "loss": 0.04005319, "step": 24670 }, { "epoch": 49.342, "grad_norm": 1.1146414279937744, "learning_rate": 2e-05, "loss": 0.04270441, "step": 24671 }, { "epoch": 49.344, "grad_norm": 1.130110502243042, "learning_rate": 2e-05, "loss": 0.04896062, "step": 24672 }, { "epoch": 49.346, "grad_norm": 1.111843466758728, "learning_rate": 2e-05, "loss": 0.06119808, "step": 24673 }, { "epoch": 49.348, "grad_norm": 1.3585618734359741, "learning_rate": 2e-05, "loss": 0.04980446, "step": 24674 }, { "epoch": 49.35, "grad_norm": 1.0276371240615845, "learning_rate": 2e-05, "loss": 0.04615089, "step": 24675 }, { "epoch": 49.352, "grad_norm": 1.2625640630722046, "learning_rate": 2e-05, "loss": 0.05774258, "step": 24676 }, { "epoch": 49.354, "grad_norm": 1.0882261991500854, "learning_rate": 2e-05, "loss": 0.03528015, "step": 24677 }, { "epoch": 49.356, "grad_norm": 1.2010290622711182, "learning_rate": 2e-05, "loss": 0.04758507, "step": 24678 }, { "epoch": 49.358, "grad_norm": 1.7031240463256836, "learning_rate": 2e-05, "loss": 0.04642814, "step": 24679 }, { "epoch": 49.36, "grad_norm": 1.1779032945632935, "learning_rate": 2e-05, "loss": 0.04119724, "step": 24680 }, { "epoch": 49.362, "grad_norm": 1.2029178142547607, "learning_rate": 2e-05, "loss": 0.03965361, "step": 24681 }, { "epoch": 49.364, "grad_norm": 1.2390246391296387, "learning_rate": 2e-05, "loss": 0.05226513, "step": 24682 }, { "epoch": 49.366, "grad_norm": 1.1336655616760254, "learning_rate": 2e-05, "loss": 0.04133983, "step": 24683 }, { "epoch": 49.368, "grad_norm": 0.917346179485321, "learning_rate": 2e-05, "loss": 0.03386243, "step": 24684 }, { "epoch": 49.37, "grad_norm": 1.0905228853225708, "learning_rate": 2e-05, "loss": 0.05082481, "step": 24685 }, { "epoch": 49.372, "grad_norm": 1.5331437587738037, "learning_rate": 2e-05, "loss": 0.04803453, "step": 24686 }, { "epoch": 49.374, "grad_norm": 0.9902241230010986, "learning_rate": 2e-05, "loss": 0.04177987, "step": 24687 }, { "epoch": 49.376, "grad_norm": 1.1352343559265137, "learning_rate": 2e-05, "loss": 0.05020043, "step": 24688 }, { "epoch": 49.378, "grad_norm": 1.0997288227081299, "learning_rate": 2e-05, "loss": 0.04313187, "step": 24689 }, { "epoch": 49.38, "grad_norm": 1.0782105922698975, "learning_rate": 2e-05, "loss": 0.03647075, "step": 24690 }, { "epoch": 49.382, "grad_norm": 1.2032206058502197, "learning_rate": 2e-05, "loss": 0.04110326, "step": 24691 }, { "epoch": 49.384, "grad_norm": 1.193439245223999, "learning_rate": 2e-05, "loss": 0.04080711, "step": 24692 }, { "epoch": 49.386, "grad_norm": 1.726857304573059, "learning_rate": 2e-05, "loss": 0.05584483, "step": 24693 }, { "epoch": 49.388, "grad_norm": 1.2693097591400146, "learning_rate": 2e-05, "loss": 0.05596174, "step": 24694 }, { "epoch": 49.39, "grad_norm": 0.9810490012168884, "learning_rate": 2e-05, "loss": 0.04783895, "step": 24695 }, { "epoch": 49.392, "grad_norm": 0.9765514135360718, "learning_rate": 2e-05, "loss": 0.04450765, "step": 24696 }, { "epoch": 49.394, "grad_norm": 8.964780807495117, "learning_rate": 2e-05, "loss": 0.06945755, "step": 24697 }, { "epoch": 49.396, "grad_norm": 0.8959904909133911, "learning_rate": 2e-05, "loss": 0.03427248, "step": 24698 }, { "epoch": 49.398, "grad_norm": 0.9807778000831604, "learning_rate": 2e-05, "loss": 0.02669545, "step": 24699 }, { "epoch": 49.4, "grad_norm": 1.0889664888381958, "learning_rate": 2e-05, "loss": 0.0537745, "step": 24700 }, { "epoch": 49.402, "grad_norm": 1.1517667770385742, "learning_rate": 2e-05, "loss": 0.02914631, "step": 24701 }, { "epoch": 49.404, "grad_norm": 1.4584327936172485, "learning_rate": 2e-05, "loss": 0.06181898, "step": 24702 }, { "epoch": 49.406, "grad_norm": 1.4141366481781006, "learning_rate": 2e-05, "loss": 0.04195465, "step": 24703 }, { "epoch": 49.408, "grad_norm": 1.022611141204834, "learning_rate": 2e-05, "loss": 0.04146919, "step": 24704 }, { "epoch": 49.41, "grad_norm": 3.007127285003662, "learning_rate": 2e-05, "loss": 0.04600266, "step": 24705 }, { "epoch": 49.412, "grad_norm": 1.114327073097229, "learning_rate": 2e-05, "loss": 0.04626326, "step": 24706 }, { "epoch": 49.414, "grad_norm": 2.1486666202545166, "learning_rate": 2e-05, "loss": 0.04987513, "step": 24707 }, { "epoch": 49.416, "grad_norm": 0.9948080778121948, "learning_rate": 2e-05, "loss": 0.04465476, "step": 24708 }, { "epoch": 49.418, "grad_norm": 1.065022587776184, "learning_rate": 2e-05, "loss": 0.04357406, "step": 24709 }, { "epoch": 49.42, "grad_norm": 1.0498683452606201, "learning_rate": 2e-05, "loss": 0.04670323, "step": 24710 }, { "epoch": 49.422, "grad_norm": 1.1768629550933838, "learning_rate": 2e-05, "loss": 0.03564895, "step": 24711 }, { "epoch": 49.424, "grad_norm": 1.3938863277435303, "learning_rate": 2e-05, "loss": 0.06006155, "step": 24712 }, { "epoch": 49.426, "grad_norm": 0.9704594016075134, "learning_rate": 2e-05, "loss": 0.04674681, "step": 24713 }, { "epoch": 49.428, "grad_norm": 0.972400426864624, "learning_rate": 2e-05, "loss": 0.03741869, "step": 24714 }, { "epoch": 49.43, "grad_norm": 0.9401067495346069, "learning_rate": 2e-05, "loss": 0.03956653, "step": 24715 }, { "epoch": 49.432, "grad_norm": 1.37922203540802, "learning_rate": 2e-05, "loss": 0.04319195, "step": 24716 }, { "epoch": 49.434, "grad_norm": 1.190850853919983, "learning_rate": 2e-05, "loss": 0.04866359, "step": 24717 }, { "epoch": 49.436, "grad_norm": 1.0724947452545166, "learning_rate": 2e-05, "loss": 0.04414925, "step": 24718 }, { "epoch": 49.438, "grad_norm": 1.0883091688156128, "learning_rate": 2e-05, "loss": 0.0542049, "step": 24719 }, { "epoch": 49.44, "grad_norm": 1.54373300075531, "learning_rate": 2e-05, "loss": 0.05054798, "step": 24720 }, { "epoch": 49.442, "grad_norm": 0.9083189368247986, "learning_rate": 2e-05, "loss": 0.02986085, "step": 24721 }, { "epoch": 49.444, "grad_norm": 1.0260084867477417, "learning_rate": 2e-05, "loss": 0.04128547, "step": 24722 }, { "epoch": 49.446, "grad_norm": 1.0199854373931885, "learning_rate": 2e-05, "loss": 0.04422305, "step": 24723 }, { "epoch": 49.448, "grad_norm": 1.10794997215271, "learning_rate": 2e-05, "loss": 0.03618187, "step": 24724 }, { "epoch": 49.45, "grad_norm": 1.6060500144958496, "learning_rate": 2e-05, "loss": 0.04625408, "step": 24725 }, { "epoch": 49.452, "grad_norm": 1.1200417280197144, "learning_rate": 2e-05, "loss": 0.04874882, "step": 24726 }, { "epoch": 49.454, "grad_norm": 0.8720324039459229, "learning_rate": 2e-05, "loss": 0.02442401, "step": 24727 }, { "epoch": 49.456, "grad_norm": 0.9494289755821228, "learning_rate": 2e-05, "loss": 0.0305661, "step": 24728 }, { "epoch": 49.458, "grad_norm": 1.1519304513931274, "learning_rate": 2e-05, "loss": 0.04396735, "step": 24729 }, { "epoch": 49.46, "grad_norm": 0.9433466792106628, "learning_rate": 2e-05, "loss": 0.03663548, "step": 24730 }, { "epoch": 49.462, "grad_norm": 0.8576862812042236, "learning_rate": 2e-05, "loss": 0.03240684, "step": 24731 }, { "epoch": 49.464, "grad_norm": 1.5251647233963013, "learning_rate": 2e-05, "loss": 0.04433912, "step": 24732 }, { "epoch": 49.466, "grad_norm": 1.0683085918426514, "learning_rate": 2e-05, "loss": 0.04529489, "step": 24733 }, { "epoch": 49.468, "grad_norm": 0.9964842200279236, "learning_rate": 2e-05, "loss": 0.03180193, "step": 24734 }, { "epoch": 49.47, "grad_norm": 1.1677091121673584, "learning_rate": 2e-05, "loss": 0.04339496, "step": 24735 }, { "epoch": 49.472, "grad_norm": 1.1310073137283325, "learning_rate": 2e-05, "loss": 0.04492489, "step": 24736 }, { "epoch": 49.474, "grad_norm": 1.1572608947753906, "learning_rate": 2e-05, "loss": 0.04643245, "step": 24737 }, { "epoch": 49.476, "grad_norm": 1.1417022943496704, "learning_rate": 2e-05, "loss": 0.05412038, "step": 24738 }, { "epoch": 49.478, "grad_norm": 1.1997774839401245, "learning_rate": 2e-05, "loss": 0.05101493, "step": 24739 }, { "epoch": 49.48, "grad_norm": 0.9632428288459778, "learning_rate": 2e-05, "loss": 0.0451662, "step": 24740 }, { "epoch": 49.482, "grad_norm": 1.104675054550171, "learning_rate": 2e-05, "loss": 0.04268707, "step": 24741 }, { "epoch": 49.484, "grad_norm": 1.1712579727172852, "learning_rate": 2e-05, "loss": 0.05423909, "step": 24742 }, { "epoch": 49.486, "grad_norm": 1.0737416744232178, "learning_rate": 2e-05, "loss": 0.05446152, "step": 24743 }, { "epoch": 49.488, "grad_norm": 1.342406988143921, "learning_rate": 2e-05, "loss": 0.04656635, "step": 24744 }, { "epoch": 49.49, "grad_norm": 1.0200036764144897, "learning_rate": 2e-05, "loss": 0.03646156, "step": 24745 }, { "epoch": 49.492, "grad_norm": 1.04305100440979, "learning_rate": 2e-05, "loss": 0.05235441, "step": 24746 }, { "epoch": 49.494, "grad_norm": 1.029733657836914, "learning_rate": 2e-05, "loss": 0.04650908, "step": 24747 }, { "epoch": 49.496, "grad_norm": 1.0096410512924194, "learning_rate": 2e-05, "loss": 0.03905725, "step": 24748 }, { "epoch": 49.498, "grad_norm": 3.061919689178467, "learning_rate": 2e-05, "loss": 0.0524442, "step": 24749 }, { "epoch": 49.5, "grad_norm": 3.1289222240448, "learning_rate": 2e-05, "loss": 0.06494565, "step": 24750 }, { "epoch": 49.502, "grad_norm": 1.0056252479553223, "learning_rate": 2e-05, "loss": 0.04348144, "step": 24751 }, { "epoch": 49.504, "grad_norm": 2.9071202278137207, "learning_rate": 2e-05, "loss": 0.0595634, "step": 24752 }, { "epoch": 49.506, "grad_norm": 1.0068395137786865, "learning_rate": 2e-05, "loss": 0.04620958, "step": 24753 }, { "epoch": 49.508, "grad_norm": 1.0774083137512207, "learning_rate": 2e-05, "loss": 0.03431277, "step": 24754 }, { "epoch": 49.51, "grad_norm": 1.0018212795257568, "learning_rate": 2e-05, "loss": 0.03377705, "step": 24755 }, { "epoch": 49.512, "grad_norm": 1.349738359451294, "learning_rate": 2e-05, "loss": 0.04063623, "step": 24756 }, { "epoch": 49.514, "grad_norm": 1.3048489093780518, "learning_rate": 2e-05, "loss": 0.04847485, "step": 24757 }, { "epoch": 49.516, "grad_norm": 1.0069266557693481, "learning_rate": 2e-05, "loss": 0.04076147, "step": 24758 }, { "epoch": 49.518, "grad_norm": 1.0892078876495361, "learning_rate": 2e-05, "loss": 0.04461211, "step": 24759 }, { "epoch": 49.52, "grad_norm": 2.8372607231140137, "learning_rate": 2e-05, "loss": 0.07047935, "step": 24760 }, { "epoch": 49.522, "grad_norm": 1.056724190711975, "learning_rate": 2e-05, "loss": 0.03957215, "step": 24761 }, { "epoch": 49.524, "grad_norm": 1.2342745065689087, "learning_rate": 2e-05, "loss": 0.04317779, "step": 24762 }, { "epoch": 49.526, "grad_norm": 0.9254241585731506, "learning_rate": 2e-05, "loss": 0.03505052, "step": 24763 }, { "epoch": 49.528, "grad_norm": 1.1359896659851074, "learning_rate": 2e-05, "loss": 0.0378983, "step": 24764 }, { "epoch": 49.53, "grad_norm": 1.3386236429214478, "learning_rate": 2e-05, "loss": 0.05099172, "step": 24765 }, { "epoch": 49.532, "grad_norm": 0.9271393418312073, "learning_rate": 2e-05, "loss": 0.03775775, "step": 24766 }, { "epoch": 49.534, "grad_norm": 1.0704801082611084, "learning_rate": 2e-05, "loss": 0.04510186, "step": 24767 }, { "epoch": 49.536, "grad_norm": 1.5312801599502563, "learning_rate": 2e-05, "loss": 0.04211585, "step": 24768 }, { "epoch": 49.538, "grad_norm": 1.4320790767669678, "learning_rate": 2e-05, "loss": 0.05060805, "step": 24769 }, { "epoch": 49.54, "grad_norm": 0.7448192238807678, "learning_rate": 2e-05, "loss": 0.01872768, "step": 24770 }, { "epoch": 49.542, "grad_norm": 1.0809388160705566, "learning_rate": 2e-05, "loss": 0.04057469, "step": 24771 }, { "epoch": 49.544, "grad_norm": 1.0746262073516846, "learning_rate": 2e-05, "loss": 0.03865876, "step": 24772 }, { "epoch": 49.546, "grad_norm": 1.9519784450531006, "learning_rate": 2e-05, "loss": 0.05613887, "step": 24773 }, { "epoch": 49.548, "grad_norm": 1.5609780550003052, "learning_rate": 2e-05, "loss": 0.04499282, "step": 24774 }, { "epoch": 49.55, "grad_norm": 2.2495954036712646, "learning_rate": 2e-05, "loss": 0.05356452, "step": 24775 }, { "epoch": 49.552, "grad_norm": 0.991223156452179, "learning_rate": 2e-05, "loss": 0.05030403, "step": 24776 }, { "epoch": 49.554, "grad_norm": 0.8626605868339539, "learning_rate": 2e-05, "loss": 0.02678864, "step": 24777 }, { "epoch": 49.556, "grad_norm": 0.9818754196166992, "learning_rate": 2e-05, "loss": 0.04858343, "step": 24778 }, { "epoch": 49.558, "grad_norm": 2.119330644607544, "learning_rate": 2e-05, "loss": 0.05371625, "step": 24779 }, { "epoch": 49.56, "grad_norm": 0.6945134401321411, "learning_rate": 2e-05, "loss": 0.02326001, "step": 24780 }, { "epoch": 49.562, "grad_norm": 1.407651424407959, "learning_rate": 2e-05, "loss": 0.03856934, "step": 24781 }, { "epoch": 49.564, "grad_norm": 0.9893646836280823, "learning_rate": 2e-05, "loss": 0.03339931, "step": 24782 }, { "epoch": 49.566, "grad_norm": 1.1991362571716309, "learning_rate": 2e-05, "loss": 0.04740452, "step": 24783 }, { "epoch": 49.568, "grad_norm": 0.8290677070617676, "learning_rate": 2e-05, "loss": 0.02461241, "step": 24784 }, { "epoch": 49.57, "grad_norm": 0.8643149137496948, "learning_rate": 2e-05, "loss": 0.03052554, "step": 24785 }, { "epoch": 49.572, "grad_norm": 1.2954477071762085, "learning_rate": 2e-05, "loss": 0.04940365, "step": 24786 }, { "epoch": 49.574, "grad_norm": 1.1949050426483154, "learning_rate": 2e-05, "loss": 0.04736652, "step": 24787 }, { "epoch": 49.576, "grad_norm": 1.1324270963668823, "learning_rate": 2e-05, "loss": 0.04971011, "step": 24788 }, { "epoch": 49.578, "grad_norm": 1.7234563827514648, "learning_rate": 2e-05, "loss": 0.05535849, "step": 24789 }, { "epoch": 49.58, "grad_norm": 0.8952879309654236, "learning_rate": 2e-05, "loss": 0.03086542, "step": 24790 }, { "epoch": 49.582, "grad_norm": 1.0903652906417847, "learning_rate": 2e-05, "loss": 0.04609568, "step": 24791 }, { "epoch": 49.584, "grad_norm": 1.0622342824935913, "learning_rate": 2e-05, "loss": 0.05448773, "step": 24792 }, { "epoch": 49.586, "grad_norm": 1.202077031135559, "learning_rate": 2e-05, "loss": 0.04804546, "step": 24793 }, { "epoch": 49.588, "grad_norm": 1.0059195756912231, "learning_rate": 2e-05, "loss": 0.03175985, "step": 24794 }, { "epoch": 49.59, "grad_norm": 0.9214918613433838, "learning_rate": 2e-05, "loss": 0.03904678, "step": 24795 }, { "epoch": 49.592, "grad_norm": 0.9755774736404419, "learning_rate": 2e-05, "loss": 0.04978103, "step": 24796 }, { "epoch": 49.594, "grad_norm": 1.190766453742981, "learning_rate": 2e-05, "loss": 0.05260941, "step": 24797 }, { "epoch": 49.596, "grad_norm": 1.2022937536239624, "learning_rate": 2e-05, "loss": 0.0497488, "step": 24798 }, { "epoch": 49.598, "grad_norm": 1.3012069463729858, "learning_rate": 2e-05, "loss": 0.05515526, "step": 24799 }, { "epoch": 49.6, "grad_norm": 1.144500494003296, "learning_rate": 2e-05, "loss": 0.03072025, "step": 24800 }, { "epoch": 49.602, "grad_norm": 1.2053053379058838, "learning_rate": 2e-05, "loss": 0.0516979, "step": 24801 }, { "epoch": 49.604, "grad_norm": 0.9579549431800842, "learning_rate": 2e-05, "loss": 0.03502993, "step": 24802 }, { "epoch": 49.606, "grad_norm": 1.2894636392593384, "learning_rate": 2e-05, "loss": 0.04848025, "step": 24803 }, { "epoch": 49.608, "grad_norm": 1.0626569986343384, "learning_rate": 2e-05, "loss": 0.0373996, "step": 24804 }, { "epoch": 49.61, "grad_norm": 1.1010606288909912, "learning_rate": 2e-05, "loss": 0.04229572, "step": 24805 }, { "epoch": 49.612, "grad_norm": 1.3592993021011353, "learning_rate": 2e-05, "loss": 0.05048738, "step": 24806 }, { "epoch": 49.614, "grad_norm": 1.5161974430084229, "learning_rate": 2e-05, "loss": 0.06620622, "step": 24807 }, { "epoch": 49.616, "grad_norm": 1.0021581649780273, "learning_rate": 2e-05, "loss": 0.05341959, "step": 24808 }, { "epoch": 49.618, "grad_norm": 1.1023560762405396, "learning_rate": 2e-05, "loss": 0.03936197, "step": 24809 }, { "epoch": 49.62, "grad_norm": 1.6482691764831543, "learning_rate": 2e-05, "loss": 0.03655303, "step": 24810 }, { "epoch": 49.622, "grad_norm": 1.3629248142242432, "learning_rate": 2e-05, "loss": 0.04756444, "step": 24811 }, { "epoch": 49.624, "grad_norm": 1.1595243215560913, "learning_rate": 2e-05, "loss": 0.04744871, "step": 24812 }, { "epoch": 49.626, "grad_norm": 0.9872837066650391, "learning_rate": 2e-05, "loss": 0.03105111, "step": 24813 }, { "epoch": 49.628, "grad_norm": 1.4306702613830566, "learning_rate": 2e-05, "loss": 0.05686957, "step": 24814 }, { "epoch": 49.63, "grad_norm": 1.083804965019226, "learning_rate": 2e-05, "loss": 0.04063908, "step": 24815 }, { "epoch": 49.632, "grad_norm": 0.9744434356689453, "learning_rate": 2e-05, "loss": 0.03802946, "step": 24816 }, { "epoch": 49.634, "grad_norm": 0.9040374755859375, "learning_rate": 2e-05, "loss": 0.03771093, "step": 24817 }, { "epoch": 49.636, "grad_norm": 1.3792043924331665, "learning_rate": 2e-05, "loss": 0.0672031, "step": 24818 }, { "epoch": 49.638, "grad_norm": 0.8202154040336609, "learning_rate": 2e-05, "loss": 0.02961908, "step": 24819 }, { "epoch": 49.64, "grad_norm": 1.118488073348999, "learning_rate": 2e-05, "loss": 0.04422841, "step": 24820 }, { "epoch": 49.642, "grad_norm": 1.541996717453003, "learning_rate": 2e-05, "loss": 0.03663242, "step": 24821 }, { "epoch": 49.644, "grad_norm": 1.0276820659637451, "learning_rate": 2e-05, "loss": 0.04533731, "step": 24822 }, { "epoch": 49.646, "grad_norm": 0.9830988049507141, "learning_rate": 2e-05, "loss": 0.0273793, "step": 24823 }, { "epoch": 49.648, "grad_norm": 1.1483078002929688, "learning_rate": 2e-05, "loss": 0.03894852, "step": 24824 }, { "epoch": 49.65, "grad_norm": 1.456406831741333, "learning_rate": 2e-05, "loss": 0.05092985, "step": 24825 }, { "epoch": 49.652, "grad_norm": 1.3090943098068237, "learning_rate": 2e-05, "loss": 0.04998201, "step": 24826 }, { "epoch": 49.654, "grad_norm": 1.1949411630630493, "learning_rate": 2e-05, "loss": 0.04100946, "step": 24827 }, { "epoch": 49.656, "grad_norm": 0.9303939938545227, "learning_rate": 2e-05, "loss": 0.02468432, "step": 24828 }, { "epoch": 49.658, "grad_norm": 1.3813577890396118, "learning_rate": 2e-05, "loss": 0.05121312, "step": 24829 }, { "epoch": 49.66, "grad_norm": 1.081059217453003, "learning_rate": 2e-05, "loss": 0.04219427, "step": 24830 }, { "epoch": 49.662, "grad_norm": 0.9480776786804199, "learning_rate": 2e-05, "loss": 0.03769704, "step": 24831 }, { "epoch": 49.664, "grad_norm": 1.4447762966156006, "learning_rate": 2e-05, "loss": 0.03532948, "step": 24832 }, { "epoch": 49.666, "grad_norm": 1.1264969110488892, "learning_rate": 2e-05, "loss": 0.04793052, "step": 24833 }, { "epoch": 49.668, "grad_norm": 1.2184799909591675, "learning_rate": 2e-05, "loss": 0.05303574, "step": 24834 }, { "epoch": 49.67, "grad_norm": 0.98581862449646, "learning_rate": 2e-05, "loss": 0.04781711, "step": 24835 }, { "epoch": 49.672, "grad_norm": 1.2370775938034058, "learning_rate": 2e-05, "loss": 0.04284956, "step": 24836 }, { "epoch": 49.674, "grad_norm": 1.0807240009307861, "learning_rate": 2e-05, "loss": 0.04022052, "step": 24837 }, { "epoch": 49.676, "grad_norm": 0.9385483860969543, "learning_rate": 2e-05, "loss": 0.03576366, "step": 24838 }, { "epoch": 49.678, "grad_norm": 1.9082796573638916, "learning_rate": 2e-05, "loss": 0.04182665, "step": 24839 }, { "epoch": 49.68, "grad_norm": 0.9855085015296936, "learning_rate": 2e-05, "loss": 0.04332475, "step": 24840 }, { "epoch": 49.682, "grad_norm": 1.052796483039856, "learning_rate": 2e-05, "loss": 0.05000243, "step": 24841 }, { "epoch": 49.684, "grad_norm": 1.2842367887496948, "learning_rate": 2e-05, "loss": 0.04829719, "step": 24842 }, { "epoch": 49.686, "grad_norm": 1.8028453588485718, "learning_rate": 2e-05, "loss": 0.04254216, "step": 24843 }, { "epoch": 49.688, "grad_norm": 1.3590182065963745, "learning_rate": 2e-05, "loss": 0.05686507, "step": 24844 }, { "epoch": 49.69, "grad_norm": 1.0941338539123535, "learning_rate": 2e-05, "loss": 0.05110643, "step": 24845 }, { "epoch": 49.692, "grad_norm": 1.1010818481445312, "learning_rate": 2e-05, "loss": 0.06121033, "step": 24846 }, { "epoch": 49.694, "grad_norm": 1.0535378456115723, "learning_rate": 2e-05, "loss": 0.04916462, "step": 24847 }, { "epoch": 49.696, "grad_norm": 0.8626852035522461, "learning_rate": 2e-05, "loss": 0.03044773, "step": 24848 }, { "epoch": 49.698, "grad_norm": 1.1342573165893555, "learning_rate": 2e-05, "loss": 0.03986349, "step": 24849 }, { "epoch": 49.7, "grad_norm": 2.3549184799194336, "learning_rate": 2e-05, "loss": 0.04721179, "step": 24850 }, { "epoch": 49.702, "grad_norm": 1.5162947177886963, "learning_rate": 2e-05, "loss": 0.04141025, "step": 24851 }, { "epoch": 49.704, "grad_norm": 0.8891056776046753, "learning_rate": 2e-05, "loss": 0.02372481, "step": 24852 }, { "epoch": 49.706, "grad_norm": 0.9024839997291565, "learning_rate": 2e-05, "loss": 0.02757401, "step": 24853 }, { "epoch": 49.708, "grad_norm": 0.9477348923683167, "learning_rate": 2e-05, "loss": 0.03025828, "step": 24854 }, { "epoch": 49.71, "grad_norm": 1.0611172914505005, "learning_rate": 2e-05, "loss": 0.03228024, "step": 24855 }, { "epoch": 49.712, "grad_norm": 0.9956902861595154, "learning_rate": 2e-05, "loss": 0.0326893, "step": 24856 }, { "epoch": 49.714, "grad_norm": 1.0358914136886597, "learning_rate": 2e-05, "loss": 0.04935817, "step": 24857 }, { "epoch": 49.716, "grad_norm": 0.8950533866882324, "learning_rate": 2e-05, "loss": 0.03259481, "step": 24858 }, { "epoch": 49.718, "grad_norm": 1.203794240951538, "learning_rate": 2e-05, "loss": 0.06020834, "step": 24859 }, { "epoch": 49.72, "grad_norm": 1.5588897466659546, "learning_rate": 2e-05, "loss": 0.04630896, "step": 24860 }, { "epoch": 49.722, "grad_norm": 1.1105680465698242, "learning_rate": 2e-05, "loss": 0.04828059, "step": 24861 }, { "epoch": 49.724, "grad_norm": 1.1131335496902466, "learning_rate": 2e-05, "loss": 0.04634016, "step": 24862 }, { "epoch": 49.726, "grad_norm": 1.0347458124160767, "learning_rate": 2e-05, "loss": 0.04644644, "step": 24863 }, { "epoch": 49.728, "grad_norm": 1.325013279914856, "learning_rate": 2e-05, "loss": 0.06091823, "step": 24864 }, { "epoch": 49.73, "grad_norm": 1.630811333656311, "learning_rate": 2e-05, "loss": 0.046593, "step": 24865 }, { "epoch": 49.732, "grad_norm": 1.1042811870574951, "learning_rate": 2e-05, "loss": 0.04020706, "step": 24866 }, { "epoch": 49.734, "grad_norm": 1.2629075050354004, "learning_rate": 2e-05, "loss": 0.05800544, "step": 24867 }, { "epoch": 49.736, "grad_norm": 1.3433440923690796, "learning_rate": 2e-05, "loss": 0.03619378, "step": 24868 }, { "epoch": 49.738, "grad_norm": 1.1060618162155151, "learning_rate": 2e-05, "loss": 0.05418765, "step": 24869 }, { "epoch": 49.74, "grad_norm": 0.9184826612472534, "learning_rate": 2e-05, "loss": 0.03164966, "step": 24870 }, { "epoch": 49.742, "grad_norm": 1.887937068939209, "learning_rate": 2e-05, "loss": 0.05083492, "step": 24871 }, { "epoch": 49.744, "grad_norm": 1.0048717260360718, "learning_rate": 2e-05, "loss": 0.03851245, "step": 24872 }, { "epoch": 49.746, "grad_norm": 1.1231769323349, "learning_rate": 2e-05, "loss": 0.04883204, "step": 24873 }, { "epoch": 49.748, "grad_norm": 1.5423176288604736, "learning_rate": 2e-05, "loss": 0.04539229, "step": 24874 }, { "epoch": 49.75, "grad_norm": 0.9069594144821167, "learning_rate": 2e-05, "loss": 0.03787273, "step": 24875 }, { "epoch": 49.752, "grad_norm": 0.9556673169136047, "learning_rate": 2e-05, "loss": 0.03602478, "step": 24876 }, { "epoch": 49.754, "grad_norm": 1.1162433624267578, "learning_rate": 2e-05, "loss": 0.05431801, "step": 24877 }, { "epoch": 49.756, "grad_norm": 2.209975242614746, "learning_rate": 2e-05, "loss": 0.06188718, "step": 24878 }, { "epoch": 49.758, "grad_norm": 0.9537637829780579, "learning_rate": 2e-05, "loss": 0.03330243, "step": 24879 }, { "epoch": 49.76, "grad_norm": 1.1607041358947754, "learning_rate": 2e-05, "loss": 0.0500868, "step": 24880 }, { "epoch": 49.762, "grad_norm": 1.042999267578125, "learning_rate": 2e-05, "loss": 0.03492203, "step": 24881 }, { "epoch": 49.764, "grad_norm": 1.9624617099761963, "learning_rate": 2e-05, "loss": 0.03566482, "step": 24882 }, { "epoch": 49.766, "grad_norm": 1.218358039855957, "learning_rate": 2e-05, "loss": 0.05145382, "step": 24883 }, { "epoch": 49.768, "grad_norm": 0.8092342615127563, "learning_rate": 2e-05, "loss": 0.02541806, "step": 24884 }, { "epoch": 49.77, "grad_norm": 1.1442075967788696, "learning_rate": 2e-05, "loss": 0.03843348, "step": 24885 }, { "epoch": 49.772, "grad_norm": 1.036983609199524, "learning_rate": 2e-05, "loss": 0.04306552, "step": 24886 }, { "epoch": 49.774, "grad_norm": 0.962234616279602, "learning_rate": 2e-05, "loss": 0.02417943, "step": 24887 }, { "epoch": 49.776, "grad_norm": 0.8866322636604309, "learning_rate": 2e-05, "loss": 0.03473242, "step": 24888 }, { "epoch": 49.778, "grad_norm": 0.9864252805709839, "learning_rate": 2e-05, "loss": 0.03776695, "step": 24889 }, { "epoch": 49.78, "grad_norm": 1.2825427055358887, "learning_rate": 2e-05, "loss": 0.04866822, "step": 24890 }, { "epoch": 49.782, "grad_norm": 1.1259939670562744, "learning_rate": 2e-05, "loss": 0.04382715, "step": 24891 }, { "epoch": 49.784, "grad_norm": 1.0599970817565918, "learning_rate": 2e-05, "loss": 0.05545626, "step": 24892 }, { "epoch": 49.786, "grad_norm": 0.9740849733352661, "learning_rate": 2e-05, "loss": 0.03826662, "step": 24893 }, { "epoch": 49.788, "grad_norm": 1.1011581420898438, "learning_rate": 2e-05, "loss": 0.04356104, "step": 24894 }, { "epoch": 49.79, "grad_norm": 1.0393247604370117, "learning_rate": 2e-05, "loss": 0.0313997, "step": 24895 }, { "epoch": 49.792, "grad_norm": 1.0680491924285889, "learning_rate": 2e-05, "loss": 0.04629553, "step": 24896 }, { "epoch": 49.794, "grad_norm": 0.9690883755683899, "learning_rate": 2e-05, "loss": 0.03323016, "step": 24897 }, { "epoch": 49.796, "grad_norm": 1.139299988746643, "learning_rate": 2e-05, "loss": 0.04732706, "step": 24898 }, { "epoch": 49.798, "grad_norm": 0.9536605477333069, "learning_rate": 2e-05, "loss": 0.03206507, "step": 24899 }, { "epoch": 49.8, "grad_norm": 1.2573645114898682, "learning_rate": 2e-05, "loss": 0.04279046, "step": 24900 }, { "epoch": 49.802, "grad_norm": 1.3798284530639648, "learning_rate": 2e-05, "loss": 0.04954434, "step": 24901 }, { "epoch": 49.804, "grad_norm": 1.2849464416503906, "learning_rate": 2e-05, "loss": 0.03823982, "step": 24902 }, { "epoch": 49.806, "grad_norm": 1.146357774734497, "learning_rate": 2e-05, "loss": 0.04749518, "step": 24903 }, { "epoch": 49.808, "grad_norm": 1.7107923030853271, "learning_rate": 2e-05, "loss": 0.03398804, "step": 24904 }, { "epoch": 49.81, "grad_norm": 1.7503803968429565, "learning_rate": 2e-05, "loss": 0.03375882, "step": 24905 }, { "epoch": 49.812, "grad_norm": 1.0013306140899658, "learning_rate": 2e-05, "loss": 0.03456224, "step": 24906 }, { "epoch": 49.814, "grad_norm": 2.049954891204834, "learning_rate": 2e-05, "loss": 0.05196879, "step": 24907 }, { "epoch": 49.816, "grad_norm": 1.0595333576202393, "learning_rate": 2e-05, "loss": 0.03910065, "step": 24908 }, { "epoch": 49.818, "grad_norm": 1.4569969177246094, "learning_rate": 2e-05, "loss": 0.04358368, "step": 24909 }, { "epoch": 49.82, "grad_norm": 1.036618947982788, "learning_rate": 2e-05, "loss": 0.04219685, "step": 24910 }, { "epoch": 49.822, "grad_norm": 1.2029200792312622, "learning_rate": 2e-05, "loss": 0.04156885, "step": 24911 }, { "epoch": 49.824, "grad_norm": 2.3981430530548096, "learning_rate": 2e-05, "loss": 0.05088883, "step": 24912 }, { "epoch": 49.826, "grad_norm": 1.0911908149719238, "learning_rate": 2e-05, "loss": 0.0423134, "step": 24913 }, { "epoch": 49.828, "grad_norm": 1.795661211013794, "learning_rate": 2e-05, "loss": 0.05145224, "step": 24914 }, { "epoch": 49.83, "grad_norm": 1.1347631216049194, "learning_rate": 2e-05, "loss": 0.04425269, "step": 24915 }, { "epoch": 49.832, "grad_norm": 0.996590256690979, "learning_rate": 2e-05, "loss": 0.03622637, "step": 24916 }, { "epoch": 49.834, "grad_norm": 1.114018201828003, "learning_rate": 2e-05, "loss": 0.03644144, "step": 24917 }, { "epoch": 49.836, "grad_norm": 1.7798055410385132, "learning_rate": 2e-05, "loss": 0.02921627, "step": 24918 }, { "epoch": 49.838, "grad_norm": 6.049989700317383, "learning_rate": 2e-05, "loss": 0.0376968, "step": 24919 }, { "epoch": 49.84, "grad_norm": 2.1038734912872314, "learning_rate": 2e-05, "loss": 0.05064761, "step": 24920 }, { "epoch": 49.842, "grad_norm": 1.0031530857086182, "learning_rate": 2e-05, "loss": 0.03724699, "step": 24921 }, { "epoch": 49.844, "grad_norm": 0.9289708733558655, "learning_rate": 2e-05, "loss": 0.03552995, "step": 24922 }, { "epoch": 49.846, "grad_norm": 1.2577115297317505, "learning_rate": 2e-05, "loss": 0.04416243, "step": 24923 }, { "epoch": 49.848, "grad_norm": 2.0983121395111084, "learning_rate": 2e-05, "loss": 0.06318004, "step": 24924 }, { "epoch": 49.85, "grad_norm": 1.1208101511001587, "learning_rate": 2e-05, "loss": 0.04421171, "step": 24925 }, { "epoch": 49.852, "grad_norm": 6.197086811065674, "learning_rate": 2e-05, "loss": 0.04424567, "step": 24926 }, { "epoch": 49.854, "grad_norm": 1.4194402694702148, "learning_rate": 2e-05, "loss": 0.05405318, "step": 24927 }, { "epoch": 49.856, "grad_norm": 1.089532494544983, "learning_rate": 2e-05, "loss": 0.04562683, "step": 24928 }, { "epoch": 49.858, "grad_norm": 0.9106767773628235, "learning_rate": 2e-05, "loss": 0.03047303, "step": 24929 }, { "epoch": 49.86, "grad_norm": 0.8433771133422852, "learning_rate": 2e-05, "loss": 0.02209272, "step": 24930 }, { "epoch": 49.862, "grad_norm": 1.3784946203231812, "learning_rate": 2e-05, "loss": 0.06297345, "step": 24931 }, { "epoch": 49.864, "grad_norm": 1.1481796503067017, "learning_rate": 2e-05, "loss": 0.0448839, "step": 24932 }, { "epoch": 49.866, "grad_norm": 0.9092108607292175, "learning_rate": 2e-05, "loss": 0.02775331, "step": 24933 }, { "epoch": 49.868, "grad_norm": 1.31647527217865, "learning_rate": 2e-05, "loss": 0.04741878, "step": 24934 }, { "epoch": 49.87, "grad_norm": 1.3921095132827759, "learning_rate": 2e-05, "loss": 0.04816588, "step": 24935 }, { "epoch": 49.872, "grad_norm": 0.8842795491218567, "learning_rate": 2e-05, "loss": 0.0295011, "step": 24936 }, { "epoch": 49.874, "grad_norm": 1.1025551557540894, "learning_rate": 2e-05, "loss": 0.04078747, "step": 24937 }, { "epoch": 49.876, "grad_norm": 1.2609764337539673, "learning_rate": 2e-05, "loss": 0.03431833, "step": 24938 }, { "epoch": 49.878, "grad_norm": 0.9778035283088684, "learning_rate": 2e-05, "loss": 0.03750598, "step": 24939 }, { "epoch": 49.88, "grad_norm": 0.8919627070426941, "learning_rate": 2e-05, "loss": 0.03527178, "step": 24940 }, { "epoch": 49.882, "grad_norm": 1.1053169965744019, "learning_rate": 2e-05, "loss": 0.03513969, "step": 24941 }, { "epoch": 49.884, "grad_norm": 1.1159098148345947, "learning_rate": 2e-05, "loss": 0.03108471, "step": 24942 }, { "epoch": 49.886, "grad_norm": 1.4785064458847046, "learning_rate": 2e-05, "loss": 0.04755219, "step": 24943 }, { "epoch": 49.888, "grad_norm": 1.4567912817001343, "learning_rate": 2e-05, "loss": 0.04809107, "step": 24944 }, { "epoch": 49.89, "grad_norm": 1.0119338035583496, "learning_rate": 2e-05, "loss": 0.03600883, "step": 24945 }, { "epoch": 49.892, "grad_norm": 1.190835952758789, "learning_rate": 2e-05, "loss": 0.04753434, "step": 24946 }, { "epoch": 49.894, "grad_norm": 1.0839767456054688, "learning_rate": 2e-05, "loss": 0.03664957, "step": 24947 }, { "epoch": 49.896, "grad_norm": 1.0693492889404297, "learning_rate": 2e-05, "loss": 0.04752456, "step": 24948 }, { "epoch": 49.898, "grad_norm": 0.9856106042861938, "learning_rate": 2e-05, "loss": 0.03069158, "step": 24949 }, { "epoch": 49.9, "grad_norm": 1.1164964437484741, "learning_rate": 2e-05, "loss": 0.04414185, "step": 24950 }, { "epoch": 49.902, "grad_norm": 1.226431131362915, "learning_rate": 2e-05, "loss": 0.04662591, "step": 24951 }, { "epoch": 49.904, "grad_norm": 1.009730339050293, "learning_rate": 2e-05, "loss": 0.03885, "step": 24952 }, { "epoch": 49.906, "grad_norm": 1.103999376296997, "learning_rate": 2e-05, "loss": 0.03785316, "step": 24953 }, { "epoch": 49.908, "grad_norm": 1.0808565616607666, "learning_rate": 2e-05, "loss": 0.04197159, "step": 24954 }, { "epoch": 49.91, "grad_norm": 1.548228144645691, "learning_rate": 2e-05, "loss": 0.06571939, "step": 24955 }, { "epoch": 49.912, "grad_norm": 0.9375112652778625, "learning_rate": 2e-05, "loss": 0.0334466, "step": 24956 }, { "epoch": 49.914, "grad_norm": 1.147757887840271, "learning_rate": 2e-05, "loss": 0.04146357, "step": 24957 }, { "epoch": 49.916, "grad_norm": 1.9307175874710083, "learning_rate": 2e-05, "loss": 0.04380386, "step": 24958 }, { "epoch": 49.918, "grad_norm": 1.2523711919784546, "learning_rate": 2e-05, "loss": 0.04250611, "step": 24959 }, { "epoch": 49.92, "grad_norm": 2.221480369567871, "learning_rate": 2e-05, "loss": 0.02776944, "step": 24960 }, { "epoch": 49.922, "grad_norm": 1.1378600597381592, "learning_rate": 2e-05, "loss": 0.04391511, "step": 24961 }, { "epoch": 49.924, "grad_norm": 0.9567440748214722, "learning_rate": 2e-05, "loss": 0.03287553, "step": 24962 }, { "epoch": 49.926, "grad_norm": 1.100652813911438, "learning_rate": 2e-05, "loss": 0.05480853, "step": 24963 }, { "epoch": 49.928, "grad_norm": 1.1033121347427368, "learning_rate": 2e-05, "loss": 0.042821, "step": 24964 }, { "epoch": 49.93, "grad_norm": 1.1322799921035767, "learning_rate": 2e-05, "loss": 0.04415174, "step": 24965 }, { "epoch": 49.932, "grad_norm": 1.2606244087219238, "learning_rate": 2e-05, "loss": 0.04892481, "step": 24966 }, { "epoch": 49.934, "grad_norm": 1.5338444709777832, "learning_rate": 2e-05, "loss": 0.0447277, "step": 24967 }, { "epoch": 49.936, "grad_norm": 1.379228115081787, "learning_rate": 2e-05, "loss": 0.04925364, "step": 24968 }, { "epoch": 49.938, "grad_norm": 1.0800189971923828, "learning_rate": 2e-05, "loss": 0.04567931, "step": 24969 }, { "epoch": 49.94, "grad_norm": 3.400176525115967, "learning_rate": 2e-05, "loss": 0.04380437, "step": 24970 }, { "epoch": 49.942, "grad_norm": 1.0015314817428589, "learning_rate": 2e-05, "loss": 0.0441324, "step": 24971 }, { "epoch": 49.944, "grad_norm": 1.0332520008087158, "learning_rate": 2e-05, "loss": 0.04444009, "step": 24972 }, { "epoch": 49.946, "grad_norm": 1.0894391536712646, "learning_rate": 2e-05, "loss": 0.03653394, "step": 24973 }, { "epoch": 49.948, "grad_norm": 3.30261492729187, "learning_rate": 2e-05, "loss": 0.05914664, "step": 24974 }, { "epoch": 49.95, "grad_norm": 1.0296601057052612, "learning_rate": 2e-05, "loss": 0.03509054, "step": 24975 }, { "epoch": 49.952, "grad_norm": 1.0186389684677124, "learning_rate": 2e-05, "loss": 0.03557619, "step": 24976 }, { "epoch": 49.954, "grad_norm": 4.498384475708008, "learning_rate": 2e-05, "loss": 0.04559414, "step": 24977 }, { "epoch": 49.956, "grad_norm": 1.5450221300125122, "learning_rate": 2e-05, "loss": 0.05883741, "step": 24978 }, { "epoch": 49.958, "grad_norm": 1.1557542085647583, "learning_rate": 2e-05, "loss": 0.04679997, "step": 24979 }, { "epoch": 49.96, "grad_norm": 1.3720020055770874, "learning_rate": 2e-05, "loss": 0.04060546, "step": 24980 }, { "epoch": 49.962, "grad_norm": 1.2938978672027588, "learning_rate": 2e-05, "loss": 0.04087094, "step": 24981 }, { "epoch": 49.964, "grad_norm": 0.9590157270431519, "learning_rate": 2e-05, "loss": 0.0313542, "step": 24982 }, { "epoch": 49.966, "grad_norm": 1.0000096559524536, "learning_rate": 2e-05, "loss": 0.04113472, "step": 24983 }, { "epoch": 49.968, "grad_norm": 3.820485830307007, "learning_rate": 2e-05, "loss": 0.04907839, "step": 24984 }, { "epoch": 49.97, "grad_norm": 0.924235999584198, "learning_rate": 2e-05, "loss": 0.03174813, "step": 24985 }, { "epoch": 49.972, "grad_norm": 1.0339453220367432, "learning_rate": 2e-05, "loss": 0.04087591, "step": 24986 }, { "epoch": 49.974, "grad_norm": 1.101836085319519, "learning_rate": 2e-05, "loss": 0.02996041, "step": 24987 }, { "epoch": 49.976, "grad_norm": 1.032610297203064, "learning_rate": 2e-05, "loss": 0.03689782, "step": 24988 }, { "epoch": 49.978, "grad_norm": 2.0414888858795166, "learning_rate": 2e-05, "loss": 0.0298646, "step": 24989 }, { "epoch": 49.98, "grad_norm": 1.0647419691085815, "learning_rate": 2e-05, "loss": 0.048884, "step": 24990 }, { "epoch": 49.982, "grad_norm": 1.6127547025680542, "learning_rate": 2e-05, "loss": 0.04098117, "step": 24991 }, { "epoch": 49.984, "grad_norm": 1.0571293830871582, "learning_rate": 2e-05, "loss": 0.03534008, "step": 24992 }, { "epoch": 49.986, "grad_norm": 1.1999707221984863, "learning_rate": 2e-05, "loss": 0.04195736, "step": 24993 }, { "epoch": 49.988, "grad_norm": 1.662569522857666, "learning_rate": 2e-05, "loss": 0.05736773, "step": 24994 }, { "epoch": 49.99, "grad_norm": 1.1176104545593262, "learning_rate": 2e-05, "loss": 0.04102027, "step": 24995 }, { "epoch": 49.992, "grad_norm": 1.0699552297592163, "learning_rate": 2e-05, "loss": 0.0367976, "step": 24996 }, { "epoch": 49.994, "grad_norm": 1.013917326927185, "learning_rate": 2e-05, "loss": 0.03190574, "step": 24997 }, { "epoch": 49.996, "grad_norm": 1.588291049003601, "learning_rate": 2e-05, "loss": 0.03622506, "step": 24998 }, { "epoch": 49.998, "grad_norm": 1.030822515487671, "learning_rate": 2e-05, "loss": 0.03857606, "step": 24999 }, { "epoch": 50.0, "grad_norm": 1.2565765380859375, "learning_rate": 2e-05, "loss": 0.0394761, "step": 25000 }, { "epoch": 50.0, "eval_performance": { "AngleClassification_1": 1.0, "AngleClassification_2": 1.0, "AngleClassification_3": 0.9860279441117764, "Equal_1": 0.998, "Equal_2": 0.9820359281437125, "Equal_3": 0.9880239520958084, "LineComparison_1": 1.0, "LineComparison_2": 1.0, "LineComparison_3": 1.0, "Parallel_1": 0.9879759519038076, "Parallel_2": 0.9959919839679359, "Parallel_3": 0.992, "Perpendicular_1": 1.0, "Perpendicular_2": 0.994, "Perpendicular_3": 0.8977955911823647, "PointLiesOnCircle_1": 1.0, "PointLiesOnCircle_2": 1.0, "PointLiesOnCircle_3": 0.994, "PointLiesOnLine_1": 0.9919839679358717, "PointLiesOnLine_2": 0.9959919839679359, "PointLiesOnLine_3": 0.9820359281437125 }, "eval_runtime": 224.0568, "eval_samples_per_second": 46.863, "eval_steps_per_second": 0.937, "step": 25000 }, { "epoch": 50.0, "step": 25000, "total_flos": 6.920741581557965e+17, "train_loss": 0.054082208942621945, "train_runtime": 73266.8116, "train_samples_per_second": 21.838, "train_steps_per_second": 0.341 } ], "logging_steps": 1, "max_steps": 25000, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 5000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.920741581557965e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }