{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999159027836179, "eval_steps": 500, "global_step": 5945, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016819443276427552, "grad_norm": NaN, "learning_rate": 1.0000000000000002e-06, "loss": 1.2492, "step": 1 }, { "epoch": 0.00033638886552855103, "grad_norm": NaN, "learning_rate": 2.0000000000000003e-06, "loss": 0.0, "step": 2 }, { "epoch": 0.0005045832982928265, "grad_norm": NaN, "learning_rate": 3e-06, "loss": 0.0, "step": 3 }, { "epoch": 0.0006727777310571021, "grad_norm": NaN, "learning_rate": 4.000000000000001e-06, "loss": 0.0, "step": 4 }, { "epoch": 0.0008409721638213775, "grad_norm": NaN, "learning_rate": 5e-06, "loss": 0.0, "step": 5 }, { "epoch": 0.001009166596585653, "grad_norm": NaN, "learning_rate": 6e-06, "loss": 0.0, "step": 6 }, { "epoch": 0.0011773610293499286, "grad_norm": NaN, "learning_rate": 7.000000000000001e-06, "loss": 0.0, "step": 7 }, { "epoch": 0.0013455554621142041, "grad_norm": NaN, "learning_rate": 8.000000000000001e-06, "loss": 0.0, "step": 8 }, { "epoch": 0.0015137498948784795, "grad_norm": NaN, "learning_rate": 9e-06, "loss": 0.0, "step": 9 }, { "epoch": 0.001681944327642755, "grad_norm": NaN, "learning_rate": 1e-05, "loss": 0.0, "step": 10 }, { "epoch": 0.0018501387604070306, "grad_norm": NaN, "learning_rate": 1.1000000000000001e-05, "loss": 0.0, "step": 11 }, { "epoch": 0.002018333193171306, "grad_norm": NaN, "learning_rate": 1.2e-05, "loss": 0.0, "step": 12 }, { "epoch": 0.0021865276259355813, "grad_norm": NaN, "learning_rate": 1.3000000000000001e-05, "loss": 0.0, "step": 13 }, { "epoch": 0.002354722058699857, "grad_norm": NaN, "learning_rate": 1.4000000000000001e-05, "loss": 0.0, "step": 14 }, { "epoch": 0.0025229164914641325, "grad_norm": NaN, "learning_rate": 1.5e-05, "loss": 0.0, "step": 15 }, { "epoch": 0.0026911109242284082, "grad_norm": NaN, "learning_rate": 1.6000000000000003e-05, "loss": 0.0, "step": 16 }, { "epoch": 0.0028593053569926836, "grad_norm": NaN, "learning_rate": 1.7000000000000003e-05, "loss": 0.0, "step": 17 }, { "epoch": 0.003027499789756959, "grad_norm": NaN, "learning_rate": 1.8e-05, "loss": 0.0, "step": 18 }, { "epoch": 0.0031956942225212347, "grad_norm": NaN, "learning_rate": 1.9e-05, "loss": 0.0, "step": 19 }, { "epoch": 0.00336388865528551, "grad_norm": NaN, "learning_rate": 2e-05, "loss": 0.0, "step": 20 }, { "epoch": 0.0035320830880497854, "grad_norm": NaN, "learning_rate": 2.1e-05, "loss": 0.0, "step": 21 }, { "epoch": 0.0037002775208140612, "grad_norm": NaN, "learning_rate": 2.2000000000000003e-05, "loss": 0.0, "step": 22 }, { "epoch": 0.0038684719535783366, "grad_norm": NaN, "learning_rate": 2.3000000000000003e-05, "loss": 0.0, "step": 23 }, { "epoch": 0.004036666386342612, "grad_norm": NaN, "learning_rate": 2.4e-05, "loss": 0.0, "step": 24 }, { "epoch": 0.004204860819106888, "grad_norm": NaN, "learning_rate": 2.5e-05, "loss": 0.0, "step": 25 }, { "epoch": 0.004373055251871163, "grad_norm": NaN, "learning_rate": 2.6000000000000002e-05, "loss": 0.0, "step": 26 }, { "epoch": 0.004541249684635438, "grad_norm": NaN, "learning_rate": 2.7000000000000002e-05, "loss": 0.0, "step": 27 }, { "epoch": 0.004709444117399714, "grad_norm": NaN, "learning_rate": 2.8000000000000003e-05, "loss": 0.0, "step": 28 }, { "epoch": 0.00487763855016399, "grad_norm": NaN, "learning_rate": 2.9e-05, "loss": 0.0, "step": 29 }, { "epoch": 0.005045832982928265, "grad_norm": NaN, "learning_rate": 3e-05, "loss": 0.0, "step": 30 }, { "epoch": 0.005214027415692541, "grad_norm": NaN, "learning_rate": 3.1e-05, "loss": 0.0, "step": 31 }, { "epoch": 0.0053822218484568165, "grad_norm": NaN, "learning_rate": 3.2000000000000005e-05, "loss": 0.0, "step": 32 }, { "epoch": 0.005550416281221091, "grad_norm": NaN, "learning_rate": 3.3e-05, "loss": 0.0, "step": 33 }, { "epoch": 0.005718610713985367, "grad_norm": NaN, "learning_rate": 3.4000000000000007e-05, "loss": 0.0, "step": 34 }, { "epoch": 0.005886805146749643, "grad_norm": NaN, "learning_rate": 3.5e-05, "loss": 0.0, "step": 35 }, { "epoch": 0.006054999579513918, "grad_norm": NaN, "learning_rate": 3.6e-05, "loss": 0.0, "step": 36 }, { "epoch": 0.006223194012278194, "grad_norm": NaN, "learning_rate": 3.7e-05, "loss": 0.0, "step": 37 }, { "epoch": 0.0063913884450424695, "grad_norm": NaN, "learning_rate": 3.8e-05, "loss": 0.0, "step": 38 }, { "epoch": 0.006559582877806744, "grad_norm": NaN, "learning_rate": 3.9000000000000006e-05, "loss": 0.0, "step": 39 }, { "epoch": 0.00672777731057102, "grad_norm": NaN, "learning_rate": 4e-05, "loss": 0.0, "step": 40 }, { "epoch": 0.006895971743335296, "grad_norm": NaN, "learning_rate": 4.1e-05, "loss": 0.0, "step": 41 }, { "epoch": 0.007064166176099571, "grad_norm": NaN, "learning_rate": 4.2e-05, "loss": 0.0, "step": 42 }, { "epoch": 0.007232360608863847, "grad_norm": NaN, "learning_rate": 4.3e-05, "loss": 0.0, "step": 43 }, { "epoch": 0.0074005550416281225, "grad_norm": NaN, "learning_rate": 4.4000000000000006e-05, "loss": 0.0, "step": 44 }, { "epoch": 0.007568749474392397, "grad_norm": NaN, "learning_rate": 4.5e-05, "loss": 0.0, "step": 45 }, { "epoch": 0.007736943907156673, "grad_norm": NaN, "learning_rate": 4.600000000000001e-05, "loss": 0.0, "step": 46 }, { "epoch": 0.007905138339920948, "grad_norm": NaN, "learning_rate": 4.7e-05, "loss": 0.0, "step": 47 }, { "epoch": 0.008073332772685224, "grad_norm": NaN, "learning_rate": 4.8e-05, "loss": 0.0, "step": 48 }, { "epoch": 0.0082415272054495, "grad_norm": NaN, "learning_rate": 4.9e-05, "loss": 0.0, "step": 49 }, { "epoch": 0.008409721638213775, "grad_norm": NaN, "learning_rate": 5e-05, "loss": 0.0, "step": 50 }, { "epoch": 0.008577916070978051, "grad_norm": NaN, "learning_rate": 5.1000000000000006e-05, "loss": 0.0, "step": 51 }, { "epoch": 0.008746110503742325, "grad_norm": NaN, "learning_rate": 5.2000000000000004e-05, "loss": 0.0, "step": 52 }, { "epoch": 0.008914304936506601, "grad_norm": NaN, "learning_rate": 5.300000000000001e-05, "loss": 0.0, "step": 53 }, { "epoch": 0.009082499369270877, "grad_norm": NaN, "learning_rate": 5.4000000000000005e-05, "loss": 0.0, "step": 54 }, { "epoch": 0.009250693802035153, "grad_norm": NaN, "learning_rate": 5.500000000000001e-05, "loss": 0.0, "step": 55 }, { "epoch": 0.009418888234799428, "grad_norm": NaN, "learning_rate": 5.6000000000000006e-05, "loss": 0.0, "step": 56 }, { "epoch": 0.009587082667563704, "grad_norm": NaN, "learning_rate": 5.6999999999999996e-05, "loss": 0.0, "step": 57 }, { "epoch": 0.00975527710032798, "grad_norm": NaN, "learning_rate": 5.8e-05, "loss": 0.0, "step": 58 }, { "epoch": 0.009923471533092254, "grad_norm": NaN, "learning_rate": 5.9e-05, "loss": 0.0, "step": 59 }, { "epoch": 0.01009166596585653, "grad_norm": NaN, "learning_rate": 6e-05, "loss": 0.0, "step": 60 }, { "epoch": 0.010259860398620806, "grad_norm": NaN, "learning_rate": 6.1e-05, "loss": 0.0, "step": 61 }, { "epoch": 0.010428054831385081, "grad_norm": NaN, "learning_rate": 6.2e-05, "loss": 0.0, "step": 62 }, { "epoch": 0.010596249264149357, "grad_norm": NaN, "learning_rate": 6.3e-05, "loss": 0.0, "step": 63 }, { "epoch": 0.010764443696913633, "grad_norm": NaN, "learning_rate": 6.400000000000001e-05, "loss": 0.0, "step": 64 }, { "epoch": 0.010932638129677907, "grad_norm": NaN, "learning_rate": 6.500000000000001e-05, "loss": 0.0, "step": 65 }, { "epoch": 0.011100832562442183, "grad_norm": NaN, "learning_rate": 6.6e-05, "loss": 0.0, "step": 66 }, { "epoch": 0.011269026995206459, "grad_norm": NaN, "learning_rate": 6.7e-05, "loss": 0.0, "step": 67 }, { "epoch": 0.011437221427970734, "grad_norm": NaN, "learning_rate": 6.800000000000001e-05, "loss": 0.0, "step": 68 }, { "epoch": 0.01160541586073501, "grad_norm": NaN, "learning_rate": 6.9e-05, "loss": 0.0, "step": 69 }, { "epoch": 0.011773610293499286, "grad_norm": NaN, "learning_rate": 7e-05, "loss": 0.0, "step": 70 }, { "epoch": 0.01194180472626356, "grad_norm": NaN, "learning_rate": 7.1e-05, "loss": 0.0, "step": 71 }, { "epoch": 0.012109999159027836, "grad_norm": NaN, "learning_rate": 7.2e-05, "loss": 0.0, "step": 72 }, { "epoch": 0.012278193591792112, "grad_norm": NaN, "learning_rate": 7.3e-05, "loss": 0.0, "step": 73 }, { "epoch": 0.012446388024556387, "grad_norm": NaN, "learning_rate": 7.4e-05, "loss": 0.0, "step": 74 }, { "epoch": 0.012614582457320663, "grad_norm": NaN, "learning_rate": 7.500000000000001e-05, "loss": 0.0, "step": 75 }, { "epoch": 0.012782776890084939, "grad_norm": NaN, "learning_rate": 7.6e-05, "loss": 0.0, "step": 76 }, { "epoch": 0.012950971322849213, "grad_norm": NaN, "learning_rate": 7.7e-05, "loss": 0.0, "step": 77 }, { "epoch": 0.013119165755613489, "grad_norm": NaN, "learning_rate": 7.800000000000001e-05, "loss": 0.0, "step": 78 }, { "epoch": 0.013287360188377765, "grad_norm": NaN, "learning_rate": 7.900000000000001e-05, "loss": 0.0, "step": 79 }, { "epoch": 0.01345555462114204, "grad_norm": NaN, "learning_rate": 8e-05, "loss": 0.0, "step": 80 }, { "epoch": 0.013623749053906316, "grad_norm": NaN, "learning_rate": 8.1e-05, "loss": 0.0, "step": 81 }, { "epoch": 0.013791943486670592, "grad_norm": NaN, "learning_rate": 8.2e-05, "loss": 0.0, "step": 82 }, { "epoch": 0.013960137919434866, "grad_norm": NaN, "learning_rate": 8.3e-05, "loss": 0.0, "step": 83 }, { "epoch": 0.014128332352199142, "grad_norm": NaN, "learning_rate": 8.4e-05, "loss": 0.0, "step": 84 }, { "epoch": 0.014296526784963418, "grad_norm": NaN, "learning_rate": 8.5e-05, "loss": 0.0, "step": 85 }, { "epoch": 0.014464721217727693, "grad_norm": NaN, "learning_rate": 8.6e-05, "loss": 0.0, "step": 86 }, { "epoch": 0.014632915650491969, "grad_norm": NaN, "learning_rate": 8.7e-05, "loss": 0.0, "step": 87 }, { "epoch": 0.014801110083256245, "grad_norm": NaN, "learning_rate": 8.800000000000001e-05, "loss": 0.0, "step": 88 }, { "epoch": 0.014969304516020519, "grad_norm": NaN, "learning_rate": 8.900000000000001e-05, "loss": 0.0, "step": 89 }, { "epoch": 0.015137498948784795, "grad_norm": NaN, "learning_rate": 9e-05, "loss": 0.0, "step": 90 }, { "epoch": 0.01530569338154907, "grad_norm": NaN, "learning_rate": 9.1e-05, "loss": 0.0, "step": 91 }, { "epoch": 0.015473887814313346, "grad_norm": NaN, "learning_rate": 9.200000000000001e-05, "loss": 0.0, "step": 92 }, { "epoch": 0.01564208224707762, "grad_norm": NaN, "learning_rate": 9.300000000000001e-05, "loss": 0.0, "step": 93 }, { "epoch": 0.015810276679841896, "grad_norm": NaN, "learning_rate": 9.4e-05, "loss": 0.0, "step": 94 }, { "epoch": 0.015978471112606172, "grad_norm": NaN, "learning_rate": 9.5e-05, "loss": 0.0, "step": 95 }, { "epoch": 0.016146665545370448, "grad_norm": NaN, "learning_rate": 9.6e-05, "loss": 0.0, "step": 96 }, { "epoch": 0.016314859978134724, "grad_norm": NaN, "learning_rate": 9.7e-05, "loss": 0.0, "step": 97 }, { "epoch": 0.016483054410899, "grad_norm": NaN, "learning_rate": 9.8e-05, "loss": 0.0, "step": 98 }, { "epoch": 0.016651248843663275, "grad_norm": NaN, "learning_rate": 9.900000000000001e-05, "loss": 0.0, "step": 99 }, { "epoch": 0.01681944327642755, "grad_norm": NaN, "learning_rate": 0.0001, "loss": 0.0, "step": 100 }, { "epoch": 0.016987637709191827, "grad_norm": NaN, "learning_rate": 9.999999277778003e-05, "loss": 0.0, "step": 101 }, { "epoch": 0.017155832141956102, "grad_norm": NaN, "learning_rate": 9.999997111112216e-05, "loss": 0.0, "step": 102 }, { "epoch": 0.017324026574720378, "grad_norm": NaN, "learning_rate": 9.999993500003267e-05, "loss": 0.0, "step": 103 }, { "epoch": 0.01749222100748465, "grad_norm": NaN, "learning_rate": 9.999988444452199e-05, "loss": 0.0, "step": 104 }, { "epoch": 0.017660415440248926, "grad_norm": NaN, "learning_rate": 9.999981944460473e-05, "loss": 0.0, "step": 105 }, { "epoch": 0.017828609873013202, "grad_norm": NaN, "learning_rate": 9.999974000029966e-05, "loss": 0.0, "step": 106 }, { "epoch": 0.017996804305777478, "grad_norm": NaN, "learning_rate": 9.999964611162974e-05, "loss": 0.0, "step": 107 }, { "epoch": 0.018164998738541754, "grad_norm": NaN, "learning_rate": 9.999953777862207e-05, "loss": 0.0, "step": 108 }, { "epoch": 0.01833319317130603, "grad_norm": NaN, "learning_rate": 9.999941500130797e-05, "loss": 0.0, "step": 109 }, { "epoch": 0.018501387604070305, "grad_norm": NaN, "learning_rate": 9.99992777797229e-05, "loss": 0.0, "step": 110 }, { "epoch": 0.01866958203683458, "grad_norm": NaN, "learning_rate": 9.999912611390651e-05, "loss": 0.0, "step": 111 }, { "epoch": 0.018837776469598857, "grad_norm": NaN, "learning_rate": 9.999896000390261e-05, "loss": 0.0, "step": 112 }, { "epoch": 0.019005970902363133, "grad_norm": NaN, "learning_rate": 9.999877944975917e-05, "loss": 0.0, "step": 113 }, { "epoch": 0.01917416533512741, "grad_norm": NaN, "learning_rate": 9.999858445152839e-05, "loss": 0.0, "step": 114 }, { "epoch": 0.019342359767891684, "grad_norm": NaN, "learning_rate": 9.999837500926656e-05, "loss": 0.0, "step": 115 }, { "epoch": 0.01951055420065596, "grad_norm": NaN, "learning_rate": 9.99981511230342e-05, "loss": 0.0, "step": 116 }, { "epoch": 0.019678748633420232, "grad_norm": NaN, "learning_rate": 9.999791279289601e-05, "loss": 0.0, "step": 117 }, { "epoch": 0.019846943066184508, "grad_norm": NaN, "learning_rate": 9.999766001892081e-05, "loss": 0.0, "step": 118 }, { "epoch": 0.020015137498948784, "grad_norm": NaN, "learning_rate": 9.999739280118163e-05, "loss": 0.0, "step": 119 }, { "epoch": 0.02018333193171306, "grad_norm": NaN, "learning_rate": 9.999711113975568e-05, "loss": 0.0, "step": 120 }, { "epoch": 0.020351526364477335, "grad_norm": NaN, "learning_rate": 9.999681503472433e-05, "loss": 0.0, "step": 121 }, { "epoch": 0.02051972079724161, "grad_norm": NaN, "learning_rate": 9.99965044861731e-05, "loss": 0.0, "step": 122 }, { "epoch": 0.020687915230005887, "grad_norm": NaN, "learning_rate": 9.999617949419174e-05, "loss": 0.0, "step": 123 }, { "epoch": 0.020856109662770163, "grad_norm": NaN, "learning_rate": 9.999584005887407e-05, "loss": 0.0, "step": 124 }, { "epoch": 0.02102430409553444, "grad_norm": NaN, "learning_rate": 9.999548618031823e-05, "loss": 0.0, "step": 125 }, { "epoch": 0.021192498528298714, "grad_norm": NaN, "learning_rate": 9.99951178586264e-05, "loss": 0.0, "step": 126 }, { "epoch": 0.02136069296106299, "grad_norm": NaN, "learning_rate": 9.9994735093905e-05, "loss": 0.0, "step": 127 }, { "epoch": 0.021528887393827266, "grad_norm": NaN, "learning_rate": 9.999433788626461e-05, "loss": 0.0, "step": 128 }, { "epoch": 0.02169708182659154, "grad_norm": NaN, "learning_rate": 9.999392623581997e-05, "loss": 0.0, "step": 129 }, { "epoch": 0.021865276259355814, "grad_norm": NaN, "learning_rate": 9.999350014269e-05, "loss": 0.0, "step": 130 }, { "epoch": 0.02203347069212009, "grad_norm": NaN, "learning_rate": 9.999305960699781e-05, "loss": 0.0, "step": 131 }, { "epoch": 0.022201665124884366, "grad_norm": NaN, "learning_rate": 9.999260462887064e-05, "loss": 0.0, "step": 132 }, { "epoch": 0.02236985955764864, "grad_norm": NaN, "learning_rate": 9.999213520843994e-05, "loss": 0.0, "step": 133 }, { "epoch": 0.022538053990412917, "grad_norm": NaN, "learning_rate": 9.999165134584133e-05, "loss": 0.0, "step": 134 }, { "epoch": 0.022706248423177193, "grad_norm": NaN, "learning_rate": 9.999115304121457e-05, "loss": 0.0, "step": 135 }, { "epoch": 0.02287444285594147, "grad_norm": NaN, "learning_rate": 9.999064029470366e-05, "loss": 0.0, "step": 136 }, { "epoch": 0.023042637288705745, "grad_norm": NaN, "learning_rate": 9.999011310645668e-05, "loss": 0.0, "step": 137 }, { "epoch": 0.02321083172147002, "grad_norm": NaN, "learning_rate": 9.998957147662594e-05, "loss": 0.0, "step": 138 }, { "epoch": 0.023379026154234296, "grad_norm": NaN, "learning_rate": 9.998901540536792e-05, "loss": 0.0, "step": 139 }, { "epoch": 0.023547220586998572, "grad_norm": NaN, "learning_rate": 9.998844489284327e-05, "loss": 0.0, "step": 140 }, { "epoch": 0.023715415019762844, "grad_norm": NaN, "learning_rate": 9.998785993921678e-05, "loss": 0.0, "step": 141 }, { "epoch": 0.02388360945252712, "grad_norm": NaN, "learning_rate": 9.998726054465744e-05, "loss": 0.0, "step": 142 }, { "epoch": 0.024051803885291396, "grad_norm": NaN, "learning_rate": 9.998664670933844e-05, "loss": 0.0, "step": 143 }, { "epoch": 0.02421999831805567, "grad_norm": NaN, "learning_rate": 9.998601843343707e-05, "loss": 0.0, "step": 144 }, { "epoch": 0.024388192750819947, "grad_norm": NaN, "learning_rate": 9.998537571713487e-05, "loss": 0.0, "step": 145 }, { "epoch": 0.024556387183584223, "grad_norm": NaN, "learning_rate": 9.998471856061747e-05, "loss": 0.0, "step": 146 }, { "epoch": 0.0247245816163485, "grad_norm": NaN, "learning_rate": 9.998404696407476e-05, "loss": 0.0, "step": 147 }, { "epoch": 0.024892776049112775, "grad_norm": NaN, "learning_rate": 9.998336092770073e-05, "loss": 0.0, "step": 148 }, { "epoch": 0.02506097048187705, "grad_norm": NaN, "learning_rate": 9.998266045169356e-05, "loss": 0.0, "step": 149 }, { "epoch": 0.025229164914641326, "grad_norm": NaN, "learning_rate": 9.998194553625563e-05, "loss": 0.0, "step": 150 }, { "epoch": 0.025397359347405602, "grad_norm": NaN, "learning_rate": 9.998121618159346e-05, "loss": 0.0, "step": 151 }, { "epoch": 0.025565553780169878, "grad_norm": NaN, "learning_rate": 9.998047238791777e-05, "loss": 0.0, "step": 152 }, { "epoch": 0.02573374821293415, "grad_norm": NaN, "learning_rate": 9.997971415544341e-05, "loss": 0.0, "step": 153 }, { "epoch": 0.025901942645698426, "grad_norm": NaN, "learning_rate": 9.997894148438944e-05, "loss": 0.0, "step": 154 }, { "epoch": 0.026070137078462702, "grad_norm": NaN, "learning_rate": 9.997815437497908e-05, "loss": 0.0, "step": 155 }, { "epoch": 0.026238331511226978, "grad_norm": NaN, "learning_rate": 9.997735282743969e-05, "loss": 0.0, "step": 156 }, { "epoch": 0.026406525943991253, "grad_norm": NaN, "learning_rate": 9.997653684200286e-05, "loss": 0.0, "step": 157 }, { "epoch": 0.02657472037675553, "grad_norm": NaN, "learning_rate": 9.99757064189043e-05, "loss": 0.0, "step": 158 }, { "epoch": 0.026742914809519805, "grad_norm": NaN, "learning_rate": 9.997486155838392e-05, "loss": 0.0, "step": 159 }, { "epoch": 0.02691110924228408, "grad_norm": NaN, "learning_rate": 9.997400226068578e-05, "loss": 0.0, "step": 160 }, { "epoch": 0.027079303675048357, "grad_norm": NaN, "learning_rate": 9.997312852605814e-05, "loss": 0.0, "step": 161 }, { "epoch": 0.027247498107812632, "grad_norm": NaN, "learning_rate": 9.997224035475339e-05, "loss": 0.0, "step": 162 }, { "epoch": 0.027415692540576908, "grad_norm": NaN, "learning_rate": 9.997133774702812e-05, "loss": 0.0, "step": 163 }, { "epoch": 0.027583886973341184, "grad_norm": NaN, "learning_rate": 9.997042070314309e-05, "loss": 0.0, "step": 164 }, { "epoch": 0.027752081406105456, "grad_norm": NaN, "learning_rate": 9.996948922336323e-05, "loss": 0.0, "step": 165 }, { "epoch": 0.027920275838869732, "grad_norm": NaN, "learning_rate": 9.996854330795761e-05, "loss": 0.0, "step": 166 }, { "epoch": 0.028088470271634008, "grad_norm": NaN, "learning_rate": 9.996758295719951e-05, "loss": 0.0, "step": 167 }, { "epoch": 0.028256664704398284, "grad_norm": NaN, "learning_rate": 9.996660817136636e-05, "loss": 0.0, "step": 168 }, { "epoch": 0.02842485913716256, "grad_norm": NaN, "learning_rate": 9.996561895073976e-05, "loss": 0.0, "step": 169 }, { "epoch": 0.028593053569926835, "grad_norm": NaN, "learning_rate": 9.996461529560553e-05, "loss": 0.0, "step": 170 }, { "epoch": 0.02876124800269111, "grad_norm": NaN, "learning_rate": 9.996359720625354e-05, "loss": 0.0, "step": 171 }, { "epoch": 0.028929442435455387, "grad_norm": NaN, "learning_rate": 9.996256468297795e-05, "loss": 0.0, "step": 172 }, { "epoch": 0.029097636868219662, "grad_norm": NaN, "learning_rate": 9.996151772607704e-05, "loss": 0.0, "step": 173 }, { "epoch": 0.029265831300983938, "grad_norm": NaN, "learning_rate": 9.996045633585326e-05, "loss": 0.0, "step": 174 }, { "epoch": 0.029434025733748214, "grad_norm": NaN, "learning_rate": 9.995938051261324e-05, "loss": 0.0, "step": 175 }, { "epoch": 0.02960222016651249, "grad_norm": NaN, "learning_rate": 9.995829025666775e-05, "loss": 0.0, "step": 176 }, { "epoch": 0.029770414599276766, "grad_norm": NaN, "learning_rate": 9.995718556833178e-05, "loss": 0.0, "step": 177 }, { "epoch": 0.029938609032041038, "grad_norm": NaN, "learning_rate": 9.995606644792446e-05, "loss": 0.0, "step": 178 }, { "epoch": 0.030106803464805314, "grad_norm": NaN, "learning_rate": 9.995493289576907e-05, "loss": 0.0, "step": 179 }, { "epoch": 0.03027499789756959, "grad_norm": NaN, "learning_rate": 9.99537849121931e-05, "loss": 0.0, "step": 180 }, { "epoch": 0.030443192330333865, "grad_norm": NaN, "learning_rate": 9.995262249752817e-05, "loss": 0.0, "step": 181 }, { "epoch": 0.03061138676309814, "grad_norm": NaN, "learning_rate": 9.995144565211012e-05, "loss": 0.0, "step": 182 }, { "epoch": 0.030779581195862417, "grad_norm": NaN, "learning_rate": 9.99502543762789e-05, "loss": 0.0, "step": 183 }, { "epoch": 0.030947775628626693, "grad_norm": NaN, "learning_rate": 9.994904867037867e-05, "loss": 0.0, "step": 184 }, { "epoch": 0.03111597006139097, "grad_norm": NaN, "learning_rate": 9.994782853475774e-05, "loss": 0.0, "step": 185 }, { "epoch": 0.03128416449415524, "grad_norm": NaN, "learning_rate": 9.994659396976859e-05, "loss": 0.0, "step": 186 }, { "epoch": 0.03145235892691952, "grad_norm": NaN, "learning_rate": 9.994534497576787e-05, "loss": 0.0, "step": 187 }, { "epoch": 0.03162055335968379, "grad_norm": NaN, "learning_rate": 9.994408155311642e-05, "loss": 0.0, "step": 188 }, { "epoch": 0.03178874779244807, "grad_norm": NaN, "learning_rate": 9.994280370217922e-05, "loss": 0.0, "step": 189 }, { "epoch": 0.031956942225212344, "grad_norm": NaN, "learning_rate": 9.99415114233254e-05, "loss": 0.0, "step": 190 }, { "epoch": 0.03212513665797662, "grad_norm": NaN, "learning_rate": 9.994020471692833e-05, "loss": 0.0, "step": 191 }, { "epoch": 0.032293331090740895, "grad_norm": NaN, "learning_rate": 9.993888358336545e-05, "loss": 0.0, "step": 192 }, { "epoch": 0.032461525523505175, "grad_norm": NaN, "learning_rate": 9.993754802301847e-05, "loss": 0.0, "step": 193 }, { "epoch": 0.03262971995626945, "grad_norm": NaN, "learning_rate": 9.993619803627321e-05, "loss": 0.0, "step": 194 }, { "epoch": 0.032797914389033726, "grad_norm": NaN, "learning_rate": 9.993483362351963e-05, "loss": 0.0, "step": 195 }, { "epoch": 0.032966108821798, "grad_norm": NaN, "learning_rate": 9.993345478515194e-05, "loss": 0.0, "step": 196 }, { "epoch": 0.03313430325456227, "grad_norm": NaN, "learning_rate": 9.993206152156844e-05, "loss": 0.0, "step": 197 }, { "epoch": 0.03330249768732655, "grad_norm": NaN, "learning_rate": 9.993065383317163e-05, "loss": 0.0, "step": 198 }, { "epoch": 0.03347069212009082, "grad_norm": NaN, "learning_rate": 9.992923172036819e-05, "loss": 0.0, "step": 199 }, { "epoch": 0.0336388865528551, "grad_norm": NaN, "learning_rate": 9.992779518356896e-05, "loss": 0.0, "step": 200 }, { "epoch": 0.033807080985619374, "grad_norm": NaN, "learning_rate": 9.99263442231889e-05, "loss": 0.0, "step": 201 }, { "epoch": 0.03397527541838365, "grad_norm": NaN, "learning_rate": 9.99248788396472e-05, "loss": 0.0, "step": 202 }, { "epoch": 0.034143469851147926, "grad_norm": NaN, "learning_rate": 9.992339903336722e-05, "loss": 0.0, "step": 203 }, { "epoch": 0.034311664283912205, "grad_norm": NaN, "learning_rate": 9.992190480477641e-05, "loss": 0.0, "step": 204 }, { "epoch": 0.03447985871667648, "grad_norm": NaN, "learning_rate": 9.992039615430648e-05, "loss": 0.0, "step": 205 }, { "epoch": 0.034648053149440756, "grad_norm": NaN, "learning_rate": 9.991887308239322e-05, "loss": 0.0, "step": 206 }, { "epoch": 0.03481624758220503, "grad_norm": NaN, "learning_rate": 9.991733558947667e-05, "loss": 0.0, "step": 207 }, { "epoch": 0.0349844420149693, "grad_norm": NaN, "learning_rate": 9.991578367600096e-05, "loss": 0.0, "step": 208 }, { "epoch": 0.03515263644773358, "grad_norm": NaN, "learning_rate": 9.991421734241444e-05, "loss": 0.0, "step": 209 }, { "epoch": 0.03532083088049785, "grad_norm": NaN, "learning_rate": 9.99126365891696e-05, "loss": 0.0, "step": 210 }, { "epoch": 0.03548902531326213, "grad_norm": NaN, "learning_rate": 9.991104141672309e-05, "loss": 0.0, "step": 211 }, { "epoch": 0.035657219746026404, "grad_norm": NaN, "learning_rate": 9.990943182553579e-05, "loss": 0.0, "step": 212 }, { "epoch": 0.035825414178790684, "grad_norm": NaN, "learning_rate": 9.990780781607261e-05, "loss": 0.0, "step": 213 }, { "epoch": 0.035993608611554956, "grad_norm": NaN, "learning_rate": 9.990616938880278e-05, "loss": 0.0, "step": 214 }, { "epoch": 0.036161803044319235, "grad_norm": NaN, "learning_rate": 9.990451654419957e-05, "loss": 0.0, "step": 215 }, { "epoch": 0.03632999747708351, "grad_norm": NaN, "learning_rate": 9.990284928274051e-05, "loss": 0.0, "step": 216 }, { "epoch": 0.03649819190984779, "grad_norm": NaN, "learning_rate": 9.990116760490723e-05, "loss": 0.0, "step": 217 }, { "epoch": 0.03666638634261206, "grad_norm": NaN, "learning_rate": 9.989947151118555e-05, "loss": 0.0, "step": 218 }, { "epoch": 0.03683458077537634, "grad_norm": NaN, "learning_rate": 9.989776100206548e-05, "loss": 0.0, "step": 219 }, { "epoch": 0.03700277520814061, "grad_norm": NaN, "learning_rate": 9.989603607804112e-05, "loss": 0.0, "step": 220 }, { "epoch": 0.03717096964090488, "grad_norm": NaN, "learning_rate": 9.98942967396108e-05, "loss": 0.0, "step": 221 }, { "epoch": 0.03733916407366916, "grad_norm": NaN, "learning_rate": 9.989254298727702e-05, "loss": 0.0, "step": 222 }, { "epoch": 0.037507358506433434, "grad_norm": NaN, "learning_rate": 9.989077482154638e-05, "loss": 0.0, "step": 223 }, { "epoch": 0.037675552939197714, "grad_norm": NaN, "learning_rate": 9.988899224292971e-05, "loss": 0.0, "step": 224 }, { "epoch": 0.037843747371961986, "grad_norm": NaN, "learning_rate": 9.988719525194198e-05, "loss": 0.0, "step": 225 }, { "epoch": 0.038011941804726265, "grad_norm": NaN, "learning_rate": 9.988538384910231e-05, "loss": 0.0, "step": 226 }, { "epoch": 0.03818013623749054, "grad_norm": NaN, "learning_rate": 9.988355803493398e-05, "loss": 0.0, "step": 227 }, { "epoch": 0.03834833067025482, "grad_norm": NaN, "learning_rate": 9.988171780996446e-05, "loss": 0.0, "step": 228 }, { "epoch": 0.03851652510301909, "grad_norm": NaN, "learning_rate": 9.987986317472539e-05, "loss": 0.0, "step": 229 }, { "epoch": 0.03868471953578337, "grad_norm": NaN, "learning_rate": 9.987799412975252e-05, "loss": 0.0, "step": 230 }, { "epoch": 0.03885291396854764, "grad_norm": NaN, "learning_rate": 9.987611067558582e-05, "loss": 0.0, "step": 231 }, { "epoch": 0.03902110840131192, "grad_norm": NaN, "learning_rate": 9.987421281276939e-05, "loss": 0.0, "step": 232 }, { "epoch": 0.03918930283407619, "grad_norm": NaN, "learning_rate": 9.98723005418515e-05, "loss": 0.0, "step": 233 }, { "epoch": 0.039357497266840465, "grad_norm": NaN, "learning_rate": 9.987037386338458e-05, "loss": 0.0, "step": 234 }, { "epoch": 0.039525691699604744, "grad_norm": NaN, "learning_rate": 9.986843277792523e-05, "loss": 0.0, "step": 235 }, { "epoch": 0.039693886132369016, "grad_norm": NaN, "learning_rate": 9.986647728603422e-05, "loss": 0.0, "step": 236 }, { "epoch": 0.039862080565133295, "grad_norm": NaN, "learning_rate": 9.986450738827646e-05, "loss": 0.0, "step": 237 }, { "epoch": 0.04003027499789757, "grad_norm": NaN, "learning_rate": 9.986252308522101e-05, "loss": 0.0, "step": 238 }, { "epoch": 0.04019846943066185, "grad_norm": NaN, "learning_rate": 9.986052437744115e-05, "loss": 0.0, "step": 239 }, { "epoch": 0.04036666386342612, "grad_norm": NaN, "learning_rate": 9.985851126551428e-05, "loss": 0.0, "step": 240 }, { "epoch": 0.0405348582961904, "grad_norm": NaN, "learning_rate": 9.985648375002192e-05, "loss": 0.0, "step": 241 }, { "epoch": 0.04070305272895467, "grad_norm": NaN, "learning_rate": 9.985444183154986e-05, "loss": 0.0, "step": 242 }, { "epoch": 0.04087124716171895, "grad_norm": NaN, "learning_rate": 9.985238551068794e-05, "loss": 0.0, "step": 243 }, { "epoch": 0.04103944159448322, "grad_norm": NaN, "learning_rate": 9.985031478803022e-05, "loss": 0.0, "step": 244 }, { "epoch": 0.041207636027247495, "grad_norm": NaN, "learning_rate": 9.984822966417494e-05, "loss": 0.0, "step": 245 }, { "epoch": 0.041375830460011774, "grad_norm": NaN, "learning_rate": 9.984613013972442e-05, "loss": 0.0, "step": 246 }, { "epoch": 0.041544024892776046, "grad_norm": NaN, "learning_rate": 9.98440162152852e-05, "loss": 0.0, "step": 247 }, { "epoch": 0.041712219325540326, "grad_norm": NaN, "learning_rate": 9.9841887891468e-05, "loss": 0.0, "step": 248 }, { "epoch": 0.0418804137583046, "grad_norm": NaN, "learning_rate": 9.983974516888763e-05, "loss": 0.0, "step": 249 }, { "epoch": 0.04204860819106888, "grad_norm": NaN, "learning_rate": 9.983758804816314e-05, "loss": 0.0, "step": 250 }, { "epoch": 0.04221680262383315, "grad_norm": NaN, "learning_rate": 9.983541652991766e-05, "loss": 0.0, "step": 251 }, { "epoch": 0.04238499705659743, "grad_norm": NaN, "learning_rate": 9.983323061477854e-05, "loss": 0.0, "step": 252 }, { "epoch": 0.0425531914893617, "grad_norm": NaN, "learning_rate": 9.983103030337727e-05, "loss": 0.0, "step": 253 }, { "epoch": 0.04272138592212598, "grad_norm": NaN, "learning_rate": 9.982881559634947e-05, "loss": 0.0, "step": 254 }, { "epoch": 0.04288958035489025, "grad_norm": NaN, "learning_rate": 9.982658649433497e-05, "loss": 0.0, "step": 255 }, { "epoch": 0.04305777478765453, "grad_norm": NaN, "learning_rate": 9.982434299797772e-05, "loss": 0.0, "step": 256 }, { "epoch": 0.043225969220418804, "grad_norm": NaN, "learning_rate": 9.982208510792583e-05, "loss": 0.0, "step": 257 }, { "epoch": 0.04339416365318308, "grad_norm": NaN, "learning_rate": 9.98198128248316e-05, "loss": 0.0, "step": 258 }, { "epoch": 0.043562358085947356, "grad_norm": NaN, "learning_rate": 9.981752614935145e-05, "loss": 0.0, "step": 259 }, { "epoch": 0.04373055251871163, "grad_norm": NaN, "learning_rate": 9.9815225082146e-05, "loss": 0.0, "step": 260 }, { "epoch": 0.04389874695147591, "grad_norm": NaN, "learning_rate": 9.981290962387998e-05, "loss": 0.0, "step": 261 }, { "epoch": 0.04406694138424018, "grad_norm": NaN, "learning_rate": 9.98105797752223e-05, "loss": 0.0, "step": 262 }, { "epoch": 0.04423513581700446, "grad_norm": NaN, "learning_rate": 9.980823553684604e-05, "loss": 0.0, "step": 263 }, { "epoch": 0.04440333024976873, "grad_norm": NaN, "learning_rate": 9.980587690942841e-05, "loss": 0.0, "step": 264 }, { "epoch": 0.04457152468253301, "grad_norm": NaN, "learning_rate": 9.980350389365081e-05, "loss": 0.0, "step": 265 }, { "epoch": 0.04473971911529728, "grad_norm": NaN, "learning_rate": 9.980111649019876e-05, "loss": 0.0, "step": 266 }, { "epoch": 0.04490791354806156, "grad_norm": NaN, "learning_rate": 9.979871469976196e-05, "loss": 0.0, "step": 267 }, { "epoch": 0.045076107980825834, "grad_norm": NaN, "learning_rate": 9.979629852303426e-05, "loss": 0.0, "step": 268 }, { "epoch": 0.04524430241359011, "grad_norm": NaN, "learning_rate": 9.979386796071366e-05, "loss": 0.0, "step": 269 }, { "epoch": 0.045412496846354386, "grad_norm": NaN, "learning_rate": 9.979142301350235e-05, "loss": 0.0, "step": 270 }, { "epoch": 0.04558069127911866, "grad_norm": NaN, "learning_rate": 9.978896368210662e-05, "loss": 0.0, "step": 271 }, { "epoch": 0.04574888571188294, "grad_norm": NaN, "learning_rate": 9.978648996723695e-05, "loss": 0.0, "step": 272 }, { "epoch": 0.04591708014464721, "grad_norm": NaN, "learning_rate": 9.978400186960795e-05, "loss": 0.0, "step": 273 }, { "epoch": 0.04608527457741149, "grad_norm": NaN, "learning_rate": 9.978149938993843e-05, "loss": 0.0, "step": 274 }, { "epoch": 0.04625346901017576, "grad_norm": NaN, "learning_rate": 9.977898252895134e-05, "loss": 0.0, "step": 275 }, { "epoch": 0.04642166344294004, "grad_norm": NaN, "learning_rate": 9.977645128737374e-05, "loss": 0.0, "step": 276 }, { "epoch": 0.04658985787570431, "grad_norm": NaN, "learning_rate": 9.97739056659369e-05, "loss": 0.0, "step": 277 }, { "epoch": 0.04675805230846859, "grad_norm": NaN, "learning_rate": 9.977134566537621e-05, "loss": 0.0, "step": 278 }, { "epoch": 0.046926246741232865, "grad_norm": NaN, "learning_rate": 9.976877128643122e-05, "loss": 0.0, "step": 279 }, { "epoch": 0.047094441173997144, "grad_norm": NaN, "learning_rate": 9.976618252984564e-05, "loss": 0.0, "step": 280 }, { "epoch": 0.047262635606761416, "grad_norm": NaN, "learning_rate": 9.976357939636735e-05, "loss": 0.0, "step": 281 }, { "epoch": 0.04743083003952569, "grad_norm": NaN, "learning_rate": 9.976096188674837e-05, "loss": 0.0, "step": 282 }, { "epoch": 0.04759902447228997, "grad_norm": NaN, "learning_rate": 9.975833000174485e-05, "loss": 0.0, "step": 283 }, { "epoch": 0.04776721890505424, "grad_norm": NaN, "learning_rate": 9.97556837421171e-05, "loss": 0.0, "step": 284 }, { "epoch": 0.04793541333781852, "grad_norm": NaN, "learning_rate": 9.975302310862964e-05, "loss": 0.0, "step": 285 }, { "epoch": 0.04810360777058279, "grad_norm": NaN, "learning_rate": 9.975034810205105e-05, "loss": 0.0, "step": 286 }, { "epoch": 0.04827180220334707, "grad_norm": NaN, "learning_rate": 9.974765872315414e-05, "loss": 0.0, "step": 287 }, { "epoch": 0.04843999663611134, "grad_norm": NaN, "learning_rate": 9.974495497271583e-05, "loss": 0.0, "step": 288 }, { "epoch": 0.04860819106887562, "grad_norm": NaN, "learning_rate": 9.97422368515172e-05, "loss": 0.0, "step": 289 }, { "epoch": 0.048776385501639895, "grad_norm": NaN, "learning_rate": 9.973950436034348e-05, "loss": 0.0, "step": 290 }, { "epoch": 0.048944579934404174, "grad_norm": NaN, "learning_rate": 9.973675749998407e-05, "loss": 0.0, "step": 291 }, { "epoch": 0.049112774367168446, "grad_norm": NaN, "learning_rate": 9.97339962712325e-05, "loss": 0.0, "step": 292 }, { "epoch": 0.049280968799932726, "grad_norm": NaN, "learning_rate": 9.973122067488647e-05, "loss": 0.0, "step": 293 }, { "epoch": 0.049449163232697, "grad_norm": NaN, "learning_rate": 9.97284307117478e-05, "loss": 0.0, "step": 294 }, { "epoch": 0.04961735766546127, "grad_norm": NaN, "learning_rate": 9.972562638262249e-05, "loss": 0.0, "step": 295 }, { "epoch": 0.04978555209822555, "grad_norm": NaN, "learning_rate": 9.972280768832068e-05, "loss": 0.0, "step": 296 }, { "epoch": 0.04995374653098982, "grad_norm": NaN, "learning_rate": 9.971997462965666e-05, "loss": 0.0, "step": 297 }, { "epoch": 0.0501219409637541, "grad_norm": NaN, "learning_rate": 9.971712720744885e-05, "loss": 0.0, "step": 298 }, { "epoch": 0.05029013539651837, "grad_norm": NaN, "learning_rate": 9.971426542251986e-05, "loss": 0.0, "step": 299 }, { "epoch": 0.05045832982928265, "grad_norm": NaN, "learning_rate": 9.971138927569642e-05, "loss": 0.0, "step": 300 }, { "epoch": 0.050626524262046925, "grad_norm": NaN, "learning_rate": 9.970849876780942e-05, "loss": 0.0, "step": 301 }, { "epoch": 0.050794718694811204, "grad_norm": NaN, "learning_rate": 9.970559389969389e-05, "loss": 0.0, "step": 302 }, { "epoch": 0.05096291312757548, "grad_norm": NaN, "learning_rate": 9.970267467218904e-05, "loss": 0.0, "step": 303 }, { "epoch": 0.051131107560339756, "grad_norm": NaN, "learning_rate": 9.969974108613816e-05, "loss": 0.0, "step": 304 }, { "epoch": 0.05129930199310403, "grad_norm": NaN, "learning_rate": 9.969679314238874e-05, "loss": 0.0, "step": 305 }, { "epoch": 0.0514674964258683, "grad_norm": NaN, "learning_rate": 9.969383084179243e-05, "loss": 0.0, "step": 306 }, { "epoch": 0.05163569085863258, "grad_norm": NaN, "learning_rate": 9.969085418520498e-05, "loss": 0.0, "step": 307 }, { "epoch": 0.05180388529139685, "grad_norm": NaN, "learning_rate": 9.968786317348632e-05, "loss": 0.0, "step": 308 }, { "epoch": 0.05197207972416113, "grad_norm": NaN, "learning_rate": 9.968485780750056e-05, "loss": 0.0, "step": 309 }, { "epoch": 0.052140274156925404, "grad_norm": NaN, "learning_rate": 9.968183808811586e-05, "loss": 0.0, "step": 310 }, { "epoch": 0.05230846858968968, "grad_norm": NaN, "learning_rate": 9.96788040162046e-05, "loss": 0.0, "step": 311 }, { "epoch": 0.052476663022453955, "grad_norm": NaN, "learning_rate": 9.96757555926433e-05, "loss": 0.0, "step": 312 }, { "epoch": 0.052644857455218234, "grad_norm": NaN, "learning_rate": 9.967269281831262e-05, "loss": 0.0, "step": 313 }, { "epoch": 0.05281305188798251, "grad_norm": NaN, "learning_rate": 9.966961569409735e-05, "loss": 0.0, "step": 314 }, { "epoch": 0.052981246320746786, "grad_norm": NaN, "learning_rate": 9.96665242208864e-05, "loss": 0.0, "step": 315 }, { "epoch": 0.05314944075351106, "grad_norm": NaN, "learning_rate": 9.966341839957293e-05, "loss": 0.0, "step": 316 }, { "epoch": 0.05331763518627534, "grad_norm": NaN, "learning_rate": 9.966029823105416e-05, "loss": 0.0, "step": 317 }, { "epoch": 0.05348582961903961, "grad_norm": NaN, "learning_rate": 9.965716371623143e-05, "loss": 0.0, "step": 318 }, { "epoch": 0.05365402405180388, "grad_norm": NaN, "learning_rate": 9.965401485601031e-05, "loss": 0.0, "step": 319 }, { "epoch": 0.05382221848456816, "grad_norm": NaN, "learning_rate": 9.965085165130046e-05, "loss": 0.0, "step": 320 }, { "epoch": 0.053990412917332434, "grad_norm": NaN, "learning_rate": 9.964767410301568e-05, "loss": 0.0, "step": 321 }, { "epoch": 0.05415860735009671, "grad_norm": NaN, "learning_rate": 9.964448221207392e-05, "loss": 0.0, "step": 322 }, { "epoch": 0.054326801782860985, "grad_norm": NaN, "learning_rate": 9.964127597939732e-05, "loss": 0.0, "step": 323 }, { "epoch": 0.054494996215625265, "grad_norm": NaN, "learning_rate": 9.963805540591211e-05, "loss": 0.0, "step": 324 }, { "epoch": 0.05466319064838954, "grad_norm": NaN, "learning_rate": 9.963482049254866e-05, "loss": 0.0, "step": 325 }, { "epoch": 0.054831385081153816, "grad_norm": NaN, "learning_rate": 9.963157124024152e-05, "loss": 0.0, "step": 326 }, { "epoch": 0.05499957951391809, "grad_norm": NaN, "learning_rate": 9.962830764992934e-05, "loss": 0.0, "step": 327 }, { "epoch": 0.05516777394668237, "grad_norm": NaN, "learning_rate": 9.962502972255497e-05, "loss": 0.0, "step": 328 }, { "epoch": 0.05533596837944664, "grad_norm": NaN, "learning_rate": 9.962173745906533e-05, "loss": 0.0, "step": 329 }, { "epoch": 0.05550416281221091, "grad_norm": NaN, "learning_rate": 9.961843086041154e-05, "loss": 0.0, "step": 330 }, { "epoch": 0.05567235724497519, "grad_norm": NaN, "learning_rate": 9.961510992754882e-05, "loss": 0.0, "step": 331 }, { "epoch": 0.055840551677739464, "grad_norm": NaN, "learning_rate": 9.961177466143658e-05, "loss": 0.0, "step": 332 }, { "epoch": 0.05600874611050374, "grad_norm": NaN, "learning_rate": 9.960842506303831e-05, "loss": 0.0, "step": 333 }, { "epoch": 0.056176940543268016, "grad_norm": NaN, "learning_rate": 9.960506113332169e-05, "loss": 0.0, "step": 334 }, { "epoch": 0.056345134976032295, "grad_norm": NaN, "learning_rate": 9.960168287325852e-05, "loss": 0.0, "step": 335 }, { "epoch": 0.05651332940879657, "grad_norm": NaN, "learning_rate": 9.959829028382473e-05, "loss": 0.0, "step": 336 }, { "epoch": 0.056681523841560846, "grad_norm": NaN, "learning_rate": 9.959488336600042e-05, "loss": 0.0, "step": 337 }, { "epoch": 0.05684971827432512, "grad_norm": NaN, "learning_rate": 9.95914621207698e-05, "loss": 0.0, "step": 338 }, { "epoch": 0.0570179127070894, "grad_norm": NaN, "learning_rate": 9.958802654912123e-05, "loss": 0.0, "step": 339 }, { "epoch": 0.05718610713985367, "grad_norm": NaN, "learning_rate": 9.95845766520472e-05, "loss": 0.0, "step": 340 }, { "epoch": 0.05735430157261795, "grad_norm": NaN, "learning_rate": 9.958111243054436e-05, "loss": 0.0, "step": 341 }, { "epoch": 0.05752249600538222, "grad_norm": NaN, "learning_rate": 9.957763388561347e-05, "loss": 0.0, "step": 342 }, { "epoch": 0.057690690438146494, "grad_norm": NaN, "learning_rate": 9.957414101825945e-05, "loss": 0.0, "step": 343 }, { "epoch": 0.05785888487091077, "grad_norm": NaN, "learning_rate": 9.957063382949137e-05, "loss": 0.0, "step": 344 }, { "epoch": 0.058027079303675046, "grad_norm": NaN, "learning_rate": 9.95671123203224e-05, "loss": 0.0, "step": 345 }, { "epoch": 0.058195273736439325, "grad_norm": NaN, "learning_rate": 9.956357649176984e-05, "loss": 0.0, "step": 346 }, { "epoch": 0.0583634681692036, "grad_norm": NaN, "learning_rate": 9.95600263448552e-05, "loss": 0.0, "step": 347 }, { "epoch": 0.058531662601967877, "grad_norm": NaN, "learning_rate": 9.955646188060404e-05, "loss": 0.0, "step": 348 }, { "epoch": 0.05869985703473215, "grad_norm": NaN, "learning_rate": 9.955288310004612e-05, "loss": 0.0, "step": 349 }, { "epoch": 0.05886805146749643, "grad_norm": NaN, "learning_rate": 9.95492900042153e-05, "loss": 0.0, "step": 350 }, { "epoch": 0.0590362459002607, "grad_norm": NaN, "learning_rate": 9.954568259414956e-05, "loss": 0.0, "step": 351 }, { "epoch": 0.05920444033302498, "grad_norm": NaN, "learning_rate": 9.954206087089107e-05, "loss": 0.0, "step": 352 }, { "epoch": 0.05937263476578925, "grad_norm": NaN, "learning_rate": 9.95384248354861e-05, "loss": 0.0, "step": 353 }, { "epoch": 0.05954082919855353, "grad_norm": NaN, "learning_rate": 9.953477448898506e-05, "loss": 0.0, "step": 354 }, { "epoch": 0.059709023631317804, "grad_norm": NaN, "learning_rate": 9.953110983244248e-05, "loss": 0.0, "step": 355 }, { "epoch": 0.059877218064082076, "grad_norm": NaN, "learning_rate": 9.952743086691705e-05, "loss": 0.0, "step": 356 }, { "epoch": 0.060045412496846355, "grad_norm": NaN, "learning_rate": 9.952373759347157e-05, "loss": 0.0, "step": 357 }, { "epoch": 0.06021360692961063, "grad_norm": NaN, "learning_rate": 9.952003001317303e-05, "loss": 0.0, "step": 358 }, { "epoch": 0.06038180136237491, "grad_norm": NaN, "learning_rate": 9.951630812709245e-05, "loss": 0.0, "step": 359 }, { "epoch": 0.06054999579513918, "grad_norm": NaN, "learning_rate": 9.951257193630507e-05, "loss": 0.0, "step": 360 }, { "epoch": 0.06071819022790346, "grad_norm": NaN, "learning_rate": 9.950882144189024e-05, "loss": 0.0, "step": 361 }, { "epoch": 0.06088638466066773, "grad_norm": NaN, "learning_rate": 9.950505664493144e-05, "loss": 0.0, "step": 362 }, { "epoch": 0.06105457909343201, "grad_norm": NaN, "learning_rate": 9.950127754651624e-05, "loss": 0.0, "step": 363 }, { "epoch": 0.06122277352619628, "grad_norm": NaN, "learning_rate": 9.949748414773641e-05, "loss": 0.0, "step": 364 }, { "epoch": 0.06139096795896056, "grad_norm": NaN, "learning_rate": 9.949367644968781e-05, "loss": 0.0, "step": 365 }, { "epoch": 0.061559162391724834, "grad_norm": NaN, "learning_rate": 9.948985445347046e-05, "loss": 0.0, "step": 366 }, { "epoch": 0.061727356824489106, "grad_norm": NaN, "learning_rate": 9.948601816018846e-05, "loss": 0.0, "step": 367 }, { "epoch": 0.061895551257253385, "grad_norm": NaN, "learning_rate": 9.948216757095008e-05, "loss": 0.0, "step": 368 }, { "epoch": 0.06206374569001766, "grad_norm": NaN, "learning_rate": 9.947830268686773e-05, "loss": 0.0, "step": 369 }, { "epoch": 0.06223194012278194, "grad_norm": NaN, "learning_rate": 9.947442350905793e-05, "loss": 0.0, "step": 370 }, { "epoch": 0.06240013455554621, "grad_norm": NaN, "learning_rate": 9.947053003864134e-05, "loss": 0.0, "step": 371 }, { "epoch": 0.06256832898831048, "grad_norm": NaN, "learning_rate": 9.946662227674268e-05, "loss": 0.0, "step": 372 }, { "epoch": 0.06273652342107476, "grad_norm": NaN, "learning_rate": 9.946270022449093e-05, "loss": 0.0, "step": 373 }, { "epoch": 0.06290471785383904, "grad_norm": NaN, "learning_rate": 9.945876388301909e-05, "loss": 0.0, "step": 374 }, { "epoch": 0.06307291228660332, "grad_norm": NaN, "learning_rate": 9.945481325346434e-05, "loss": 0.0, "step": 375 }, { "epoch": 0.06324110671936758, "grad_norm": NaN, "learning_rate": 9.945084833696797e-05, "loss": 0.0, "step": 376 }, { "epoch": 0.06340930115213186, "grad_norm": NaN, "learning_rate": 9.944686913467539e-05, "loss": 0.0, "step": 377 }, { "epoch": 0.06357749558489614, "grad_norm": NaN, "learning_rate": 9.944287564773615e-05, "loss": 0.0, "step": 378 }, { "epoch": 0.06374569001766041, "grad_norm": NaN, "learning_rate": 9.943886787730394e-05, "loss": 0.0, "step": 379 }, { "epoch": 0.06391388445042469, "grad_norm": NaN, "learning_rate": 9.943484582453653e-05, "loss": 0.0, "step": 380 }, { "epoch": 0.06408207888318897, "grad_norm": NaN, "learning_rate": 9.943080949059588e-05, "loss": 0.0, "step": 381 }, { "epoch": 0.06425027331595325, "grad_norm": NaN, "learning_rate": 9.942675887664802e-05, "loss": 0.0, "step": 382 }, { "epoch": 0.06441846774871751, "grad_norm": NaN, "learning_rate": 9.942269398386313e-05, "loss": 0.0, "step": 383 }, { "epoch": 0.06458666218148179, "grad_norm": NaN, "learning_rate": 9.941861481341551e-05, "loss": 0.0, "step": 384 }, { "epoch": 0.06475485661424607, "grad_norm": NaN, "learning_rate": 9.941452136648359e-05, "loss": 0.0, "step": 385 }, { "epoch": 0.06492305104701035, "grad_norm": NaN, "learning_rate": 9.941041364424993e-05, "loss": 0.0, "step": 386 }, { "epoch": 0.06509124547977461, "grad_norm": NaN, "learning_rate": 9.94062916479012e-05, "loss": 0.0, "step": 387 }, { "epoch": 0.0652594399125389, "grad_norm": NaN, "learning_rate": 9.940215537862818e-05, "loss": 0.0, "step": 388 }, { "epoch": 0.06542763434530317, "grad_norm": NaN, "learning_rate": 9.939800483762581e-05, "loss": 0.0, "step": 389 }, { "epoch": 0.06559582877806745, "grad_norm": NaN, "learning_rate": 9.939384002609315e-05, "loss": 0.0, "step": 390 }, { "epoch": 0.06576402321083172, "grad_norm": NaN, "learning_rate": 9.938966094523332e-05, "loss": 0.0, "step": 391 }, { "epoch": 0.065932217643596, "grad_norm": NaN, "learning_rate": 9.938546759625367e-05, "loss": 0.0, "step": 392 }, { "epoch": 0.06610041207636028, "grad_norm": NaN, "learning_rate": 9.938125998036555e-05, "loss": 0.0, "step": 393 }, { "epoch": 0.06626860650912454, "grad_norm": NaN, "learning_rate": 9.937703809878455e-05, "loss": 0.0, "step": 394 }, { "epoch": 0.06643680094188882, "grad_norm": NaN, "learning_rate": 9.93728019527303e-05, "loss": 0.0, "step": 395 }, { "epoch": 0.0666049953746531, "grad_norm": NaN, "learning_rate": 9.936855154342655e-05, "loss": 0.0, "step": 396 }, { "epoch": 0.06677318980741738, "grad_norm": NaN, "learning_rate": 9.936428687210122e-05, "loss": 0.0, "step": 397 }, { "epoch": 0.06694138424018165, "grad_norm": NaN, "learning_rate": 9.936000793998634e-05, "loss": 0.0, "step": 398 }, { "epoch": 0.06710957867294592, "grad_norm": NaN, "learning_rate": 9.935571474831801e-05, "loss": 0.0, "step": 399 }, { "epoch": 0.0672777731057102, "grad_norm": NaN, "learning_rate": 9.935140729833651e-05, "loss": 0.0, "step": 400 }, { "epoch": 0.06744596753847448, "grad_norm": NaN, "learning_rate": 9.934708559128622e-05, "loss": 0.0, "step": 401 }, { "epoch": 0.06761416197123875, "grad_norm": NaN, "learning_rate": 9.934274962841561e-05, "loss": 0.0, "step": 402 }, { "epoch": 0.06778235640400303, "grad_norm": NaN, "learning_rate": 9.933839941097732e-05, "loss": 0.0, "step": 403 }, { "epoch": 0.0679505508367673, "grad_norm": NaN, "learning_rate": 9.933403494022806e-05, "loss": 0.0, "step": 404 }, { "epoch": 0.06811874526953157, "grad_norm": NaN, "learning_rate": 9.932965621742867e-05, "loss": 0.0, "step": 405 }, { "epoch": 0.06828693970229585, "grad_norm": NaN, "learning_rate": 9.932526324384412e-05, "loss": 0.0, "step": 406 }, { "epoch": 0.06845513413506013, "grad_norm": NaN, "learning_rate": 9.93208560207435e-05, "loss": 0.0, "step": 407 }, { "epoch": 0.06862332856782441, "grad_norm": NaN, "learning_rate": 9.93164345494e-05, "loss": 0.0, "step": 408 }, { "epoch": 0.06879152300058868, "grad_norm": NaN, "learning_rate": 9.931199883109093e-05, "loss": 0.0, "step": 409 }, { "epoch": 0.06895971743335295, "grad_norm": NaN, "learning_rate": 9.930754886709772e-05, "loss": 0.0, "step": 410 }, { "epoch": 0.06912791186611723, "grad_norm": NaN, "learning_rate": 9.930308465870593e-05, "loss": 0.0, "step": 411 }, { "epoch": 0.06929610629888151, "grad_norm": NaN, "learning_rate": 9.92986062072052e-05, "loss": 0.0, "step": 412 }, { "epoch": 0.06946430073164578, "grad_norm": NaN, "learning_rate": 9.92941135138893e-05, "loss": 0.0, "step": 413 }, { "epoch": 0.06963249516441006, "grad_norm": NaN, "learning_rate": 9.928960658005617e-05, "loss": 0.0, "step": 414 }, { "epoch": 0.06980068959717434, "grad_norm": NaN, "learning_rate": 9.928508540700774e-05, "loss": 0.0, "step": 415 }, { "epoch": 0.0699688840299386, "grad_norm": NaN, "learning_rate": 9.928054999605018e-05, "loss": 0.0, "step": 416 }, { "epoch": 0.07013707846270288, "grad_norm": NaN, "learning_rate": 9.92760003484937e-05, "loss": 0.0, "step": 417 }, { "epoch": 0.07030527289546716, "grad_norm": NaN, "learning_rate": 9.927143646565262e-05, "loss": 0.0, "step": 418 }, { "epoch": 0.07047346732823144, "grad_norm": NaN, "learning_rate": 9.926685834884543e-05, "loss": 0.0, "step": 419 }, { "epoch": 0.0706416617609957, "grad_norm": NaN, "learning_rate": 9.926226599939468e-05, "loss": 0.0, "step": 420 }, { "epoch": 0.07080985619375998, "grad_norm": NaN, "learning_rate": 9.925765941862706e-05, "loss": 0.0, "step": 421 }, { "epoch": 0.07097805062652426, "grad_norm": NaN, "learning_rate": 9.925303860787335e-05, "loss": 0.0, "step": 422 }, { "epoch": 0.07114624505928854, "grad_norm": NaN, "learning_rate": 9.924840356846845e-05, "loss": 0.0, "step": 423 }, { "epoch": 0.07131443949205281, "grad_norm": NaN, "learning_rate": 9.924375430175136e-05, "loss": 0.0, "step": 424 }, { "epoch": 0.07148263392481709, "grad_norm": NaN, "learning_rate": 9.923909080906524e-05, "loss": 0.0, "step": 425 }, { "epoch": 0.07165082835758137, "grad_norm": NaN, "learning_rate": 9.923441309175727e-05, "loss": 0.0, "step": 426 }, { "epoch": 0.07181902279034565, "grad_norm": NaN, "learning_rate": 9.922972115117883e-05, "loss": 0.0, "step": 427 }, { "epoch": 0.07198721722310991, "grad_norm": NaN, "learning_rate": 9.922501498868535e-05, "loss": 0.0, "step": 428 }, { "epoch": 0.07215541165587419, "grad_norm": NaN, "learning_rate": 9.92202946056364e-05, "loss": 0.0, "step": 429 }, { "epoch": 0.07232360608863847, "grad_norm": NaN, "learning_rate": 9.921556000339563e-05, "loss": 0.0, "step": 430 }, { "epoch": 0.07249180052140274, "grad_norm": NaN, "learning_rate": 9.921081118333081e-05, "loss": 0.0, "step": 431 }, { "epoch": 0.07265999495416701, "grad_norm": NaN, "learning_rate": 9.920604814681385e-05, "loss": 0.0, "step": 432 }, { "epoch": 0.0728281893869313, "grad_norm": NaN, "learning_rate": 9.920127089522071e-05, "loss": 0.0, "step": 433 }, { "epoch": 0.07299638381969557, "grad_norm": NaN, "learning_rate": 9.919647942993148e-05, "loss": 0.0, "step": 434 }, { "epoch": 0.07316457825245984, "grad_norm": NaN, "learning_rate": 9.919167375233041e-05, "loss": 0.0, "step": 435 }, { "epoch": 0.07333277268522412, "grad_norm": NaN, "learning_rate": 9.918685386380573e-05, "loss": 0.0, "step": 436 }, { "epoch": 0.0735009671179884, "grad_norm": NaN, "learning_rate": 9.918201976574992e-05, "loss": 0.0, "step": 437 }, { "epoch": 0.07366916155075268, "grad_norm": NaN, "learning_rate": 9.917717145955947e-05, "loss": 0.0, "step": 438 }, { "epoch": 0.07383735598351694, "grad_norm": NaN, "learning_rate": 9.917230894663499e-05, "loss": 0.0, "step": 439 }, { "epoch": 0.07400555041628122, "grad_norm": NaN, "learning_rate": 9.916743222838123e-05, "loss": 0.0, "step": 440 }, { "epoch": 0.0741737448490455, "grad_norm": NaN, "learning_rate": 9.9162541306207e-05, "loss": 0.0, "step": 441 }, { "epoch": 0.07434193928180977, "grad_norm": NaN, "learning_rate": 9.915763618152522e-05, "loss": 0.0, "step": 442 }, { "epoch": 0.07451013371457404, "grad_norm": NaN, "learning_rate": 9.915271685575297e-05, "loss": 0.0, "step": 443 }, { "epoch": 0.07467832814733832, "grad_norm": NaN, "learning_rate": 9.914778333031135e-05, "loss": 0.0, "step": 444 }, { "epoch": 0.0748465225801026, "grad_norm": NaN, "learning_rate": 9.914283560662562e-05, "loss": 0.0, "step": 445 }, { "epoch": 0.07501471701286687, "grad_norm": NaN, "learning_rate": 9.91378736861251e-05, "loss": 0.0, "step": 446 }, { "epoch": 0.07518291144563115, "grad_norm": NaN, "learning_rate": 9.913289757024327e-05, "loss": 0.0, "step": 447 }, { "epoch": 0.07535110587839543, "grad_norm": NaN, "learning_rate": 9.912790726041763e-05, "loss": 0.0, "step": 448 }, { "epoch": 0.0755193003111597, "grad_norm": NaN, "learning_rate": 9.912290275808986e-05, "loss": 0.0, "step": 449 }, { "epoch": 0.07568749474392397, "grad_norm": NaN, "learning_rate": 9.911788406470569e-05, "loss": 0.0, "step": 450 }, { "epoch": 0.07585568917668825, "grad_norm": NaN, "learning_rate": 9.911285118171496e-05, "loss": 0.0, "step": 451 }, { "epoch": 0.07602388360945253, "grad_norm": NaN, "learning_rate": 9.910780411057164e-05, "loss": 0.0, "step": 452 }, { "epoch": 0.0761920780422168, "grad_norm": NaN, "learning_rate": 9.910274285273373e-05, "loss": 0.0, "step": 453 }, { "epoch": 0.07636027247498108, "grad_norm": NaN, "learning_rate": 9.909766740966339e-05, "loss": 0.0, "step": 454 }, { "epoch": 0.07652846690774535, "grad_norm": NaN, "learning_rate": 9.909257778282685e-05, "loss": 0.0, "step": 455 }, { "epoch": 0.07669666134050963, "grad_norm": NaN, "learning_rate": 9.908747397369449e-05, "loss": 0.0, "step": 456 }, { "epoch": 0.0768648557732739, "grad_norm": NaN, "learning_rate": 9.908235598374069e-05, "loss": 0.0, "step": 457 }, { "epoch": 0.07703305020603818, "grad_norm": NaN, "learning_rate": 9.9077223814444e-05, "loss": 0.0, "step": 458 }, { "epoch": 0.07720124463880246, "grad_norm": NaN, "learning_rate": 9.907207746728706e-05, "loss": 0.0, "step": 459 }, { "epoch": 0.07736943907156674, "grad_norm": NaN, "learning_rate": 9.906691694375656e-05, "loss": 0.0, "step": 460 }, { "epoch": 0.077537633504331, "grad_norm": NaN, "learning_rate": 9.906174224534335e-05, "loss": 0.0, "step": 461 }, { "epoch": 0.07770582793709528, "grad_norm": NaN, "learning_rate": 9.905655337354231e-05, "loss": 0.0, "step": 462 }, { "epoch": 0.07787402236985956, "grad_norm": NaN, "learning_rate": 9.905135032985249e-05, "loss": 0.0, "step": 463 }, { "epoch": 0.07804221680262384, "grad_norm": NaN, "learning_rate": 9.904613311577695e-05, "loss": 0.0, "step": 464 }, { "epoch": 0.0782104112353881, "grad_norm": NaN, "learning_rate": 9.90409017328229e-05, "loss": 0.0, "step": 465 }, { "epoch": 0.07837860566815238, "grad_norm": NaN, "learning_rate": 9.903565618250165e-05, "loss": 0.0, "step": 466 }, { "epoch": 0.07854680010091666, "grad_norm": NaN, "learning_rate": 9.903039646632855e-05, "loss": 0.0, "step": 467 }, { "epoch": 0.07871499453368093, "grad_norm": NaN, "learning_rate": 9.902512258582307e-05, "loss": 0.0, "step": 468 }, { "epoch": 0.07888318896644521, "grad_norm": NaN, "learning_rate": 9.901983454250878e-05, "loss": 0.0, "step": 469 }, { "epoch": 0.07905138339920949, "grad_norm": NaN, "learning_rate": 9.901453233791337e-05, "loss": 0.0, "step": 470 }, { "epoch": 0.07921957783197377, "grad_norm": NaN, "learning_rate": 9.900921597356856e-05, "loss": 0.0, "step": 471 }, { "epoch": 0.07938777226473803, "grad_norm": NaN, "learning_rate": 9.900388545101018e-05, "loss": 0.0, "step": 472 }, { "epoch": 0.07955596669750231, "grad_norm": NaN, "learning_rate": 9.899854077177816e-05, "loss": 0.0, "step": 473 }, { "epoch": 0.07972416113026659, "grad_norm": NaN, "learning_rate": 9.899318193741654e-05, "loss": 0.0, "step": 474 }, { "epoch": 0.07989235556303087, "grad_norm": NaN, "learning_rate": 9.89878089494734e-05, "loss": 0.0, "step": 475 }, { "epoch": 0.08006054999579514, "grad_norm": NaN, "learning_rate": 9.898242180950097e-05, "loss": 0.0, "step": 476 }, { "epoch": 0.08022874442855941, "grad_norm": NaN, "learning_rate": 9.89770205190555e-05, "loss": 0.0, "step": 477 }, { "epoch": 0.0803969388613237, "grad_norm": NaN, "learning_rate": 9.897160507969738e-05, "loss": 0.0, "step": 478 }, { "epoch": 0.08056513329408796, "grad_norm": NaN, "learning_rate": 9.896617549299106e-05, "loss": 0.0, "step": 479 }, { "epoch": 0.08073332772685224, "grad_norm": NaN, "learning_rate": 9.896073176050508e-05, "loss": 0.0, "step": 480 }, { "epoch": 0.08090152215961652, "grad_norm": NaN, "learning_rate": 9.895527388381212e-05, "loss": 0.0, "step": 481 }, { "epoch": 0.0810697165923808, "grad_norm": NaN, "learning_rate": 9.894980186448885e-05, "loss": 0.0, "step": 482 }, { "epoch": 0.08123791102514506, "grad_norm": NaN, "learning_rate": 9.894431570411608e-05, "loss": 0.0, "step": 483 }, { "epoch": 0.08140610545790934, "grad_norm": NaN, "learning_rate": 9.893881540427872e-05, "loss": 0.0, "step": 484 }, { "epoch": 0.08157429989067362, "grad_norm": NaN, "learning_rate": 9.893330096656574e-05, "loss": 0.0, "step": 485 }, { "epoch": 0.0817424943234379, "grad_norm": NaN, "learning_rate": 9.892777239257019e-05, "loss": 0.0, "step": 486 }, { "epoch": 0.08191068875620217, "grad_norm": NaN, "learning_rate": 9.892222968388922e-05, "loss": 0.0, "step": 487 }, { "epoch": 0.08207888318896644, "grad_norm": NaN, "learning_rate": 9.891667284212404e-05, "loss": 0.0, "step": 488 }, { "epoch": 0.08224707762173072, "grad_norm": NaN, "learning_rate": 9.891110186887999e-05, "loss": 0.0, "step": 489 }, { "epoch": 0.08241527205449499, "grad_norm": NaN, "learning_rate": 9.890551676576645e-05, "loss": 0.0, "step": 490 }, { "epoch": 0.08258346648725927, "grad_norm": NaN, "learning_rate": 9.889991753439689e-05, "loss": 0.0, "step": 491 }, { "epoch": 0.08275166092002355, "grad_norm": NaN, "learning_rate": 9.889430417638885e-05, "loss": 0.0, "step": 492 }, { "epoch": 0.08291985535278783, "grad_norm": NaN, "learning_rate": 9.888867669336399e-05, "loss": 0.0, "step": 493 }, { "epoch": 0.08308804978555209, "grad_norm": NaN, "learning_rate": 9.888303508694802e-05, "loss": 0.0, "step": 494 }, { "epoch": 0.08325624421831637, "grad_norm": NaN, "learning_rate": 9.887737935877072e-05, "loss": 0.0, "step": 495 }, { "epoch": 0.08342443865108065, "grad_norm": NaN, "learning_rate": 9.8871709510466e-05, "loss": 0.0, "step": 496 }, { "epoch": 0.08359263308384493, "grad_norm": NaN, "learning_rate": 9.886602554367179e-05, "loss": 0.0, "step": 497 }, { "epoch": 0.0837608275166092, "grad_norm": NaN, "learning_rate": 9.886032746003013e-05, "loss": 0.0, "step": 498 }, { "epoch": 0.08392902194937348, "grad_norm": NaN, "learning_rate": 9.885461526118713e-05, "loss": 0.0, "step": 499 }, { "epoch": 0.08409721638213775, "grad_norm": NaN, "learning_rate": 9.8848888948793e-05, "loss": 0.0, "step": 500 }, { "epoch": 0.08426541081490203, "grad_norm": NaN, "learning_rate": 9.884314852450198e-05, "loss": 0.0, "step": 501 }, { "epoch": 0.0844336052476663, "grad_norm": NaN, "learning_rate": 9.883739398997243e-05, "loss": 0.0, "step": 502 }, { "epoch": 0.08460179968043058, "grad_norm": NaN, "learning_rate": 9.883162534686675e-05, "loss": 0.0, "step": 503 }, { "epoch": 0.08476999411319486, "grad_norm": NaN, "learning_rate": 9.882584259685147e-05, "loss": 0.0, "step": 504 }, { "epoch": 0.08493818854595912, "grad_norm": NaN, "learning_rate": 9.882004574159713e-05, "loss": 0.0, "step": 505 }, { "epoch": 0.0851063829787234, "grad_norm": NaN, "learning_rate": 9.88142347827784e-05, "loss": 0.0, "step": 506 }, { "epoch": 0.08527457741148768, "grad_norm": NaN, "learning_rate": 9.8808409722074e-05, "loss": 0.0, "step": 507 }, { "epoch": 0.08544277184425196, "grad_norm": NaN, "learning_rate": 9.88025705611667e-05, "loss": 0.0, "step": 508 }, { "epoch": 0.08561096627701623, "grad_norm": NaN, "learning_rate": 9.87967173017434e-05, "loss": 0.0, "step": 509 }, { "epoch": 0.0857791607097805, "grad_norm": NaN, "learning_rate": 9.879084994549503e-05, "loss": 0.0, "step": 510 }, { "epoch": 0.08594735514254478, "grad_norm": NaN, "learning_rate": 9.878496849411659e-05, "loss": 0.0, "step": 511 }, { "epoch": 0.08611554957530906, "grad_norm": NaN, "learning_rate": 9.877907294930717e-05, "loss": 0.0, "step": 512 }, { "epoch": 0.08628374400807333, "grad_norm": NaN, "learning_rate": 9.877316331276995e-05, "loss": 0.0, "step": 513 }, { "epoch": 0.08645193844083761, "grad_norm": NaN, "learning_rate": 9.876723958621213e-05, "loss": 0.0, "step": 514 }, { "epoch": 0.08662013287360189, "grad_norm": NaN, "learning_rate": 9.8761301771345e-05, "loss": 0.0, "step": 515 }, { "epoch": 0.08678832730636615, "grad_norm": NaN, "learning_rate": 9.875534986988397e-05, "loss": 0.0, "step": 516 }, { "epoch": 0.08695652173913043, "grad_norm": NaN, "learning_rate": 9.874938388354844e-05, "loss": 0.0, "step": 517 }, { "epoch": 0.08712471617189471, "grad_norm": NaN, "learning_rate": 9.874340381406194e-05, "loss": 0.0, "step": 518 }, { "epoch": 0.08729291060465899, "grad_norm": NaN, "learning_rate": 9.873740966315203e-05, "loss": 0.0, "step": 519 }, { "epoch": 0.08746110503742326, "grad_norm": NaN, "learning_rate": 9.873140143255036e-05, "loss": 0.0, "step": 520 }, { "epoch": 0.08762929947018754, "grad_norm": NaN, "learning_rate": 9.872537912399264e-05, "loss": 0.0, "step": 521 }, { "epoch": 0.08779749390295181, "grad_norm": NaN, "learning_rate": 9.871934273921864e-05, "loss": 0.0, "step": 522 }, { "epoch": 0.0879656883357161, "grad_norm": NaN, "learning_rate": 9.871329227997221e-05, "loss": 0.0, "step": 523 }, { "epoch": 0.08813388276848036, "grad_norm": NaN, "learning_rate": 9.870722774800127e-05, "loss": 0.0, "step": 524 }, { "epoch": 0.08830207720124464, "grad_norm": NaN, "learning_rate": 9.87011491450578e-05, "loss": 0.0, "step": 525 }, { "epoch": 0.08847027163400892, "grad_norm": NaN, "learning_rate": 9.86950564728978e-05, "loss": 0.0, "step": 526 }, { "epoch": 0.08863846606677318, "grad_norm": NaN, "learning_rate": 9.86889497332814e-05, "loss": 0.0, "step": 527 }, { "epoch": 0.08880666049953746, "grad_norm": NaN, "learning_rate": 9.868282892797279e-05, "loss": 0.0, "step": 528 }, { "epoch": 0.08897485493230174, "grad_norm": NaN, "learning_rate": 9.867669405874016e-05, "loss": 0.0, "step": 529 }, { "epoch": 0.08914304936506602, "grad_norm": NaN, "learning_rate": 9.867054512735584e-05, "loss": 0.0, "step": 530 }, { "epoch": 0.08931124379783029, "grad_norm": NaN, "learning_rate": 9.866438213559617e-05, "loss": 0.0, "step": 531 }, { "epoch": 0.08947943823059457, "grad_norm": NaN, "learning_rate": 9.865820508524157e-05, "loss": 0.0, "step": 532 }, { "epoch": 0.08964763266335884, "grad_norm": NaN, "learning_rate": 9.865201397807653e-05, "loss": 0.0, "step": 533 }, { "epoch": 0.08981582709612312, "grad_norm": NaN, "learning_rate": 9.864580881588959e-05, "loss": 0.0, "step": 534 }, { "epoch": 0.08998402152888739, "grad_norm": NaN, "learning_rate": 9.863958960047335e-05, "loss": 0.0, "step": 535 }, { "epoch": 0.09015221596165167, "grad_norm": NaN, "learning_rate": 9.863335633362445e-05, "loss": 0.0, "step": 536 }, { "epoch": 0.09032041039441595, "grad_norm": NaN, "learning_rate": 9.862710901714365e-05, "loss": 0.0, "step": 537 }, { "epoch": 0.09048860482718021, "grad_norm": NaN, "learning_rate": 9.86208476528357e-05, "loss": 0.0, "step": 538 }, { "epoch": 0.09065679925994449, "grad_norm": NaN, "learning_rate": 9.861457224250944e-05, "loss": 0.0, "step": 539 }, { "epoch": 0.09082499369270877, "grad_norm": NaN, "learning_rate": 9.860828278797778e-05, "loss": 0.0, "step": 540 }, { "epoch": 0.09099318812547305, "grad_norm": NaN, "learning_rate": 9.860197929105768e-05, "loss": 0.0, "step": 541 }, { "epoch": 0.09116138255823732, "grad_norm": NaN, "learning_rate": 9.859566175357011e-05, "loss": 0.0, "step": 542 }, { "epoch": 0.0913295769910016, "grad_norm": NaN, "learning_rate": 9.858933017734018e-05, "loss": 0.0, "step": 543 }, { "epoch": 0.09149777142376588, "grad_norm": NaN, "learning_rate": 9.858298456419699e-05, "loss": 0.0, "step": 544 }, { "epoch": 0.09166596585653015, "grad_norm": NaN, "learning_rate": 9.857662491597373e-05, "loss": 0.0, "step": 545 }, { "epoch": 0.09183416028929442, "grad_norm": NaN, "learning_rate": 9.85702512345076e-05, "loss": 0.0, "step": 546 }, { "epoch": 0.0920023547220587, "grad_norm": NaN, "learning_rate": 9.856386352163992e-05, "loss": 0.0, "step": 547 }, { "epoch": 0.09217054915482298, "grad_norm": NaN, "learning_rate": 9.8557461779216e-05, "loss": 0.0, "step": 548 }, { "epoch": 0.09233874358758726, "grad_norm": NaN, "learning_rate": 9.855104600908527e-05, "loss": 0.0, "step": 549 }, { "epoch": 0.09250693802035152, "grad_norm": NaN, "learning_rate": 9.854461621310113e-05, "loss": 0.0, "step": 550 }, { "epoch": 0.0926751324531158, "grad_norm": NaN, "learning_rate": 9.853817239312112e-05, "loss": 0.0, "step": 551 }, { "epoch": 0.09284332688588008, "grad_norm": NaN, "learning_rate": 9.853171455100675e-05, "loss": 0.0, "step": 552 }, { "epoch": 0.09301152131864435, "grad_norm": NaN, "learning_rate": 9.852524268862363e-05, "loss": 0.0, "step": 553 }, { "epoch": 0.09317971575140863, "grad_norm": NaN, "learning_rate": 9.851875680784142e-05, "loss": 0.0, "step": 554 }, { "epoch": 0.0933479101841729, "grad_norm": NaN, "learning_rate": 9.85122569105338e-05, "loss": 0.0, "step": 555 }, { "epoch": 0.09351610461693718, "grad_norm": NaN, "learning_rate": 9.850574299857854e-05, "loss": 0.0, "step": 556 }, { "epoch": 0.09368429904970145, "grad_norm": NaN, "learning_rate": 9.849921507385741e-05, "loss": 0.0, "step": 557 }, { "epoch": 0.09385249348246573, "grad_norm": NaN, "learning_rate": 9.849267313825628e-05, "loss": 0.0, "step": 558 }, { "epoch": 0.09402068791523001, "grad_norm": NaN, "learning_rate": 9.8486117193665e-05, "loss": 0.0, "step": 559 }, { "epoch": 0.09418888234799429, "grad_norm": NaN, "learning_rate": 9.847954724197757e-05, "loss": 0.0, "step": 560 }, { "epoch": 0.09435707678075855, "grad_norm": NaN, "learning_rate": 9.847296328509194e-05, "loss": 0.0, "step": 561 }, { "epoch": 0.09452527121352283, "grad_norm": NaN, "learning_rate": 9.846636532491014e-05, "loss": 0.0, "step": 562 }, { "epoch": 0.09469346564628711, "grad_norm": NaN, "learning_rate": 9.845975336333825e-05, "loss": 0.0, "step": 563 }, { "epoch": 0.09486166007905138, "grad_norm": NaN, "learning_rate": 9.845312740228638e-05, "loss": 0.0, "step": 564 }, { "epoch": 0.09502985451181566, "grad_norm": NaN, "learning_rate": 9.844648744366873e-05, "loss": 0.0, "step": 565 }, { "epoch": 0.09519804894457994, "grad_norm": NaN, "learning_rate": 9.843983348940348e-05, "loss": 0.0, "step": 566 }, { "epoch": 0.09536624337734421, "grad_norm": NaN, "learning_rate": 9.84331655414129e-05, "loss": 0.0, "step": 567 }, { "epoch": 0.09553443781010848, "grad_norm": NaN, "learning_rate": 9.842648360162327e-05, "loss": 0.0, "step": 568 }, { "epoch": 0.09570263224287276, "grad_norm": NaN, "learning_rate": 9.841978767196495e-05, "loss": 0.0, "step": 569 }, { "epoch": 0.09587082667563704, "grad_norm": NaN, "learning_rate": 9.84130777543723e-05, "loss": 0.0, "step": 570 }, { "epoch": 0.09603902110840132, "grad_norm": NaN, "learning_rate": 9.840635385078372e-05, "loss": 0.0, "step": 571 }, { "epoch": 0.09620721554116558, "grad_norm": NaN, "learning_rate": 9.839961596314172e-05, "loss": 0.0, "step": 572 }, { "epoch": 0.09637540997392986, "grad_norm": NaN, "learning_rate": 9.839286409339276e-05, "loss": 0.0, "step": 573 }, { "epoch": 0.09654360440669414, "grad_norm": NaN, "learning_rate": 9.838609824348742e-05, "loss": 0.0, "step": 574 }, { "epoch": 0.09671179883945841, "grad_norm": NaN, "learning_rate": 9.837931841538022e-05, "loss": 0.0, "step": 575 }, { "epoch": 0.09687999327222269, "grad_norm": NaN, "learning_rate": 9.837252461102981e-05, "loss": 0.0, "step": 576 }, { "epoch": 0.09704818770498697, "grad_norm": NaN, "learning_rate": 9.836571683239887e-05, "loss": 0.0, "step": 577 }, { "epoch": 0.09721638213775124, "grad_norm": NaN, "learning_rate": 9.835889508145405e-05, "loss": 0.0, "step": 578 }, { "epoch": 0.09738457657051551, "grad_norm": NaN, "learning_rate": 9.83520593601661e-05, "loss": 0.0, "step": 579 }, { "epoch": 0.09755277100327979, "grad_norm": NaN, "learning_rate": 9.834520967050975e-05, "loss": 0.0, "step": 580 }, { "epoch": 0.09772096543604407, "grad_norm": NaN, "learning_rate": 9.833834601446386e-05, "loss": 0.0, "step": 581 }, { "epoch": 0.09788915986880835, "grad_norm": NaN, "learning_rate": 9.833146839401119e-05, "loss": 0.0, "step": 582 }, { "epoch": 0.09805735430157261, "grad_norm": NaN, "learning_rate": 9.832457681113866e-05, "loss": 0.0, "step": 583 }, { "epoch": 0.09822554873433689, "grad_norm": NaN, "learning_rate": 9.831767126783717e-05, "loss": 0.0, "step": 584 }, { "epoch": 0.09839374316710117, "grad_norm": NaN, "learning_rate": 9.831075176610163e-05, "loss": 0.0, "step": 585 }, { "epoch": 0.09856193759986545, "grad_norm": NaN, "learning_rate": 9.8303818307931e-05, "loss": 0.0, "step": 586 }, { "epoch": 0.09873013203262972, "grad_norm": NaN, "learning_rate": 9.829687089532831e-05, "loss": 0.0, "step": 587 }, { "epoch": 0.098898326465394, "grad_norm": NaN, "learning_rate": 9.828990953030058e-05, "loss": 0.0, "step": 588 }, { "epoch": 0.09906652089815828, "grad_norm": NaN, "learning_rate": 9.828293421485885e-05, "loss": 0.0, "step": 589 }, { "epoch": 0.09923471533092254, "grad_norm": NaN, "learning_rate": 9.827594495101823e-05, "loss": 0.0, "step": 590 }, { "epoch": 0.09940290976368682, "grad_norm": NaN, "learning_rate": 9.826894174079785e-05, "loss": 0.0, "step": 591 }, { "epoch": 0.0995711041964511, "grad_norm": NaN, "learning_rate": 9.826192458622083e-05, "loss": 0.0, "step": 592 }, { "epoch": 0.09973929862921538, "grad_norm": NaN, "learning_rate": 9.825489348931435e-05, "loss": 0.0, "step": 593 }, { "epoch": 0.09990749306197964, "grad_norm": NaN, "learning_rate": 9.824784845210966e-05, "loss": 0.0, "step": 594 }, { "epoch": 0.10007568749474392, "grad_norm": NaN, "learning_rate": 9.824078947664193e-05, "loss": 0.0, "step": 595 }, { "epoch": 0.1002438819275082, "grad_norm": NaN, "learning_rate": 9.823371656495046e-05, "loss": 0.0, "step": 596 }, { "epoch": 0.10041207636027248, "grad_norm": NaN, "learning_rate": 9.822662971907852e-05, "loss": 0.0, "step": 597 }, { "epoch": 0.10058027079303675, "grad_norm": NaN, "learning_rate": 9.821952894107343e-05, "loss": 0.0, "step": 598 }, { "epoch": 0.10074846522580103, "grad_norm": NaN, "learning_rate": 9.821241423298651e-05, "loss": 0.0, "step": 599 }, { "epoch": 0.1009166596585653, "grad_norm": NaN, "learning_rate": 9.820528559687312e-05, "loss": 0.0, "step": 600 }, { "epoch": 0.10108485409132957, "grad_norm": NaN, "learning_rate": 9.819814303479267e-05, "loss": 0.0, "step": 601 }, { "epoch": 0.10125304852409385, "grad_norm": NaN, "learning_rate": 9.819098654880854e-05, "loss": 0.0, "step": 602 }, { "epoch": 0.10142124295685813, "grad_norm": NaN, "learning_rate": 9.818381614098817e-05, "loss": 0.0, "step": 603 }, { "epoch": 0.10158943738962241, "grad_norm": NaN, "learning_rate": 9.817663181340299e-05, "loss": 0.0, "step": 604 }, { "epoch": 0.10175763182238667, "grad_norm": NaN, "learning_rate": 9.81694335681285e-05, "loss": 0.0, "step": 605 }, { "epoch": 0.10192582625515095, "grad_norm": NaN, "learning_rate": 9.816222140724418e-05, "loss": 0.0, "step": 606 }, { "epoch": 0.10209402068791523, "grad_norm": NaN, "learning_rate": 9.815499533283354e-05, "loss": 0.0, "step": 607 }, { "epoch": 0.10226221512067951, "grad_norm": NaN, "learning_rate": 9.814775534698413e-05, "loss": 0.0, "step": 608 }, { "epoch": 0.10243040955344378, "grad_norm": NaN, "learning_rate": 9.814050145178747e-05, "loss": 0.0, "step": 609 }, { "epoch": 0.10259860398620806, "grad_norm": NaN, "learning_rate": 9.813323364933915e-05, "loss": 0.0, "step": 610 }, { "epoch": 0.10276679841897234, "grad_norm": NaN, "learning_rate": 9.812595194173875e-05, "loss": 0.0, "step": 611 }, { "epoch": 0.1029349928517366, "grad_norm": NaN, "learning_rate": 9.811865633108987e-05, "loss": 0.0, "step": 612 }, { "epoch": 0.10310318728450088, "grad_norm": NaN, "learning_rate": 9.811134681950016e-05, "loss": 0.0, "step": 613 }, { "epoch": 0.10327138171726516, "grad_norm": NaN, "learning_rate": 9.810402340908121e-05, "loss": 0.0, "step": 614 }, { "epoch": 0.10343957615002944, "grad_norm": NaN, "learning_rate": 9.80966861019487e-05, "loss": 0.0, "step": 615 }, { "epoch": 0.1036077705827937, "grad_norm": NaN, "learning_rate": 9.808933490022229e-05, "loss": 0.0, "step": 616 }, { "epoch": 0.10377596501555798, "grad_norm": NaN, "learning_rate": 9.808196980602566e-05, "loss": 0.0, "step": 617 }, { "epoch": 0.10394415944832226, "grad_norm": NaN, "learning_rate": 9.807459082148648e-05, "loss": 0.0, "step": 618 }, { "epoch": 0.10411235388108654, "grad_norm": NaN, "learning_rate": 9.806719794873651e-05, "loss": 0.0, "step": 619 }, { "epoch": 0.10428054831385081, "grad_norm": NaN, "learning_rate": 9.805979118991142e-05, "loss": 0.0, "step": 620 }, { "epoch": 0.10444874274661509, "grad_norm": NaN, "learning_rate": 9.805237054715095e-05, "loss": 0.0, "step": 621 }, { "epoch": 0.10461693717937937, "grad_norm": NaN, "learning_rate": 9.804493602259885e-05, "loss": 0.0, "step": 622 }, { "epoch": 0.10478513161214364, "grad_norm": NaN, "learning_rate": 9.803748761840287e-05, "loss": 0.0, "step": 623 }, { "epoch": 0.10495332604490791, "grad_norm": NaN, "learning_rate": 9.803002533671478e-05, "loss": 0.0, "step": 624 }, { "epoch": 0.10512152047767219, "grad_norm": NaN, "learning_rate": 9.802254917969032e-05, "loss": 0.0, "step": 625 }, { "epoch": 0.10528971491043647, "grad_norm": NaN, "learning_rate": 9.801505914948929e-05, "loss": 0.0, "step": 626 }, { "epoch": 0.10545790934320073, "grad_norm": NaN, "learning_rate": 9.800755524827548e-05, "loss": 0.0, "step": 627 }, { "epoch": 0.10562610377596501, "grad_norm": NaN, "learning_rate": 9.800003747821667e-05, "loss": 0.0, "step": 628 }, { "epoch": 0.10579429820872929, "grad_norm": NaN, "learning_rate": 9.799250584148466e-05, "loss": 0.0, "step": 629 }, { "epoch": 0.10596249264149357, "grad_norm": NaN, "learning_rate": 9.798496034025526e-05, "loss": 0.0, "step": 630 }, { "epoch": 0.10613068707425784, "grad_norm": NaN, "learning_rate": 9.797740097670827e-05, "loss": 0.0, "step": 631 }, { "epoch": 0.10629888150702212, "grad_norm": NaN, "learning_rate": 9.796982775302755e-05, "loss": 0.0, "step": 632 }, { "epoch": 0.1064670759397864, "grad_norm": NaN, "learning_rate": 9.796224067140085e-05, "loss": 0.0, "step": 633 }, { "epoch": 0.10663527037255068, "grad_norm": NaN, "learning_rate": 9.795463973402003e-05, "loss": 0.0, "step": 634 }, { "epoch": 0.10680346480531494, "grad_norm": NaN, "learning_rate": 9.794702494308093e-05, "loss": 0.0, "step": 635 }, { "epoch": 0.10697165923807922, "grad_norm": NaN, "learning_rate": 9.793939630078335e-05, "loss": 0.0, "step": 636 }, { "epoch": 0.1071398536708435, "grad_norm": NaN, "learning_rate": 9.793175380933113e-05, "loss": 0.0, "step": 637 }, { "epoch": 0.10730804810360776, "grad_norm": NaN, "learning_rate": 9.792409747093212e-05, "loss": 0.0, "step": 638 }, { "epoch": 0.10747624253637204, "grad_norm": NaN, "learning_rate": 9.791642728779811e-05, "loss": 0.0, "step": 639 }, { "epoch": 0.10764443696913632, "grad_norm": NaN, "learning_rate": 9.790874326214496e-05, "loss": 0.0, "step": 640 }, { "epoch": 0.1078126314019006, "grad_norm": NaN, "learning_rate": 9.790104539619248e-05, "loss": 0.0, "step": 641 }, { "epoch": 0.10798082583466487, "grad_norm": NaN, "learning_rate": 9.789333369216451e-05, "loss": 0.0, "step": 642 }, { "epoch": 0.10814902026742915, "grad_norm": NaN, "learning_rate": 9.788560815228889e-05, "loss": 0.0, "step": 643 }, { "epoch": 0.10831721470019343, "grad_norm": NaN, "learning_rate": 9.787786877879741e-05, "loss": 0.0, "step": 644 }, { "epoch": 0.1084854091329577, "grad_norm": NaN, "learning_rate": 9.78701155739259e-05, "loss": 0.0, "step": 645 }, { "epoch": 0.10865360356572197, "grad_norm": NaN, "learning_rate": 9.786234853991418e-05, "loss": 0.0, "step": 646 }, { "epoch": 0.10882179799848625, "grad_norm": NaN, "learning_rate": 9.785456767900607e-05, "loss": 0.0, "step": 647 }, { "epoch": 0.10898999243125053, "grad_norm": NaN, "learning_rate": 9.784677299344934e-05, "loss": 0.0, "step": 648 }, { "epoch": 0.1091581868640148, "grad_norm": NaN, "learning_rate": 9.78389644854958e-05, "loss": 0.0, "step": 649 }, { "epoch": 0.10932638129677907, "grad_norm": NaN, "learning_rate": 9.783114215740128e-05, "loss": 0.0, "step": 650 }, { "epoch": 0.10949457572954335, "grad_norm": NaN, "learning_rate": 9.782330601142551e-05, "loss": 0.0, "step": 651 }, { "epoch": 0.10966277016230763, "grad_norm": NaN, "learning_rate": 9.781545604983228e-05, "loss": 0.0, "step": 652 }, { "epoch": 0.1098309645950719, "grad_norm": NaN, "learning_rate": 9.780759227488936e-05, "loss": 0.0, "step": 653 }, { "epoch": 0.10999915902783618, "grad_norm": NaN, "learning_rate": 9.779971468886853e-05, "loss": 0.0, "step": 654 }, { "epoch": 0.11016735346060046, "grad_norm": NaN, "learning_rate": 9.779182329404549e-05, "loss": 0.0, "step": 655 }, { "epoch": 0.11033554789336474, "grad_norm": NaN, "learning_rate": 9.778391809270002e-05, "loss": 0.0, "step": 656 }, { "epoch": 0.110503742326129, "grad_norm": NaN, "learning_rate": 9.77759990871158e-05, "loss": 0.0, "step": 657 }, { "epoch": 0.11067193675889328, "grad_norm": NaN, "learning_rate": 9.776806627958057e-05, "loss": 0.0, "step": 658 }, { "epoch": 0.11084013119165756, "grad_norm": NaN, "learning_rate": 9.776011967238604e-05, "loss": 0.0, "step": 659 }, { "epoch": 0.11100832562442182, "grad_norm": NaN, "learning_rate": 9.775215926782788e-05, "loss": 0.0, "step": 660 }, { "epoch": 0.1111765200571861, "grad_norm": NaN, "learning_rate": 9.774418506820575e-05, "loss": 0.0, "step": 661 }, { "epoch": 0.11134471448995038, "grad_norm": NaN, "learning_rate": 9.773619707582333e-05, "loss": 0.0, "step": 662 }, { "epoch": 0.11151290892271466, "grad_norm": NaN, "learning_rate": 9.772819529298824e-05, "loss": 0.0, "step": 663 }, { "epoch": 0.11168110335547893, "grad_norm": NaN, "learning_rate": 9.772017972201213e-05, "loss": 0.0, "step": 664 }, { "epoch": 0.11184929778824321, "grad_norm": NaN, "learning_rate": 9.771215036521057e-05, "loss": 0.0, "step": 665 }, { "epoch": 0.11201749222100749, "grad_norm": NaN, "learning_rate": 9.770410722490319e-05, "loss": 0.0, "step": 666 }, { "epoch": 0.11218568665377177, "grad_norm": NaN, "learning_rate": 9.769605030341357e-05, "loss": 0.0, "step": 667 }, { "epoch": 0.11235388108653603, "grad_norm": NaN, "learning_rate": 9.768797960306921e-05, "loss": 0.0, "step": 668 }, { "epoch": 0.11252207551930031, "grad_norm": NaN, "learning_rate": 9.76798951262017e-05, "loss": 0.0, "step": 669 }, { "epoch": 0.11269026995206459, "grad_norm": NaN, "learning_rate": 9.767179687514654e-05, "loss": 0.0, "step": 670 }, { "epoch": 0.11285846438482887, "grad_norm": NaN, "learning_rate": 9.76636848522432e-05, "loss": 0.0, "step": 671 }, { "epoch": 0.11302665881759313, "grad_norm": NaN, "learning_rate": 9.765555905983517e-05, "loss": 0.0, "step": 672 }, { "epoch": 0.11319485325035741, "grad_norm": NaN, "learning_rate": 9.764741950026991e-05, "loss": 0.0, "step": 673 }, { "epoch": 0.11336304768312169, "grad_norm": NaN, "learning_rate": 9.763926617589883e-05, "loss": 0.0, "step": 674 }, { "epoch": 0.11353124211588596, "grad_norm": NaN, "learning_rate": 9.763109908907734e-05, "loss": 0.0, "step": 675 }, { "epoch": 0.11369943654865024, "grad_norm": NaN, "learning_rate": 9.762291824216484e-05, "loss": 0.0, "step": 676 }, { "epoch": 0.11386763098141452, "grad_norm": NaN, "learning_rate": 9.761472363752465e-05, "loss": 0.0, "step": 677 }, { "epoch": 0.1140358254141788, "grad_norm": NaN, "learning_rate": 9.760651527752413e-05, "loss": 0.0, "step": 678 }, { "epoch": 0.11420401984694306, "grad_norm": NaN, "learning_rate": 9.759829316453456e-05, "loss": 0.0, "step": 679 }, { "epoch": 0.11437221427970734, "grad_norm": NaN, "learning_rate": 9.759005730093123e-05, "loss": 0.0, "step": 680 }, { "epoch": 0.11454040871247162, "grad_norm": NaN, "learning_rate": 9.758180768909337e-05, "loss": 0.0, "step": 681 }, { "epoch": 0.1147086031452359, "grad_norm": NaN, "learning_rate": 9.757354433140425e-05, "loss": 0.0, "step": 682 }, { "epoch": 0.11487679757800016, "grad_norm": NaN, "learning_rate": 9.7565267230251e-05, "loss": 0.0, "step": 683 }, { "epoch": 0.11504499201076444, "grad_norm": NaN, "learning_rate": 9.75569763880248e-05, "loss": 0.0, "step": 684 }, { "epoch": 0.11521318644352872, "grad_norm": NaN, "learning_rate": 9.754867180712082e-05, "loss": 0.0, "step": 685 }, { "epoch": 0.11538138087629299, "grad_norm": NaN, "learning_rate": 9.75403534899381e-05, "loss": 0.0, "step": 686 }, { "epoch": 0.11554957530905727, "grad_norm": NaN, "learning_rate": 9.753202143887977e-05, "loss": 0.0, "step": 687 }, { "epoch": 0.11571776974182155, "grad_norm": NaN, "learning_rate": 9.752367565635281e-05, "loss": 0.0, "step": 688 }, { "epoch": 0.11588596417458583, "grad_norm": NaN, "learning_rate": 9.751531614476826e-05, "loss": 0.0, "step": 689 }, { "epoch": 0.11605415860735009, "grad_norm": NaN, "learning_rate": 9.750694290654108e-05, "loss": 0.0, "step": 690 }, { "epoch": 0.11622235304011437, "grad_norm": NaN, "learning_rate": 9.749855594409018e-05, "loss": 0.0, "step": 691 }, { "epoch": 0.11639054747287865, "grad_norm": NaN, "learning_rate": 9.749015525983852e-05, "loss": 0.0, "step": 692 }, { "epoch": 0.11655874190564293, "grad_norm": NaN, "learning_rate": 9.74817408562129e-05, "loss": 0.0, "step": 693 }, { "epoch": 0.1167269363384072, "grad_norm": NaN, "learning_rate": 9.747331273564418e-05, "loss": 0.0, "step": 694 }, { "epoch": 0.11689513077117147, "grad_norm": NaN, "learning_rate": 9.746487090056713e-05, "loss": 0.0, "step": 695 }, { "epoch": 0.11706332520393575, "grad_norm": NaN, "learning_rate": 9.745641535342053e-05, "loss": 0.0, "step": 696 }, { "epoch": 0.11723151963670002, "grad_norm": NaN, "learning_rate": 9.744794609664707e-05, "loss": 0.0, "step": 697 }, { "epoch": 0.1173997140694643, "grad_norm": NaN, "learning_rate": 9.743946313269344e-05, "loss": 0.0, "step": 698 }, { "epoch": 0.11756790850222858, "grad_norm": NaN, "learning_rate": 9.743096646401024e-05, "loss": 0.0, "step": 699 }, { "epoch": 0.11773610293499286, "grad_norm": NaN, "learning_rate": 9.742245609305212e-05, "loss": 0.0, "step": 700 }, { "epoch": 0.11790429736775712, "grad_norm": NaN, "learning_rate": 9.741393202227758e-05, "loss": 0.0, "step": 701 }, { "epoch": 0.1180724918005214, "grad_norm": NaN, "learning_rate": 9.740539425414912e-05, "loss": 0.0, "step": 702 }, { "epoch": 0.11824068623328568, "grad_norm": NaN, "learning_rate": 9.739684279113326e-05, "loss": 0.0, "step": 703 }, { "epoch": 0.11840888066604996, "grad_norm": NaN, "learning_rate": 9.73882776357004e-05, "loss": 0.0, "step": 704 }, { "epoch": 0.11857707509881422, "grad_norm": NaN, "learning_rate": 9.73796987903249e-05, "loss": 0.0, "step": 705 }, { "epoch": 0.1187452695315785, "grad_norm": NaN, "learning_rate": 9.737110625748508e-05, "loss": 0.0, "step": 706 }, { "epoch": 0.11891346396434278, "grad_norm": NaN, "learning_rate": 9.736250003966327e-05, "loss": 0.0, "step": 707 }, { "epoch": 0.11908165839710706, "grad_norm": NaN, "learning_rate": 9.73538801393457e-05, "loss": 0.0, "step": 708 }, { "epoch": 0.11924985282987133, "grad_norm": NaN, "learning_rate": 9.734524655902253e-05, "loss": 0.0, "step": 709 }, { "epoch": 0.11941804726263561, "grad_norm": NaN, "learning_rate": 9.733659930118793e-05, "loss": 0.0, "step": 710 }, { "epoch": 0.11958624169539989, "grad_norm": NaN, "learning_rate": 9.732793836833999e-05, "loss": 0.0, "step": 711 }, { "epoch": 0.11975443612816415, "grad_norm": NaN, "learning_rate": 9.731926376298077e-05, "loss": 0.0, "step": 712 }, { "epoch": 0.11992263056092843, "grad_norm": NaN, "learning_rate": 9.731057548761626e-05, "loss": 0.0, "step": 713 }, { "epoch": 0.12009082499369271, "grad_norm": NaN, "learning_rate": 9.73018735447564e-05, "loss": 0.0, "step": 714 }, { "epoch": 0.12025901942645699, "grad_norm": NaN, "learning_rate": 9.729315793691507e-05, "loss": 0.0, "step": 715 }, { "epoch": 0.12042721385922125, "grad_norm": NaN, "learning_rate": 9.728442866661013e-05, "loss": 0.0, "step": 716 }, { "epoch": 0.12059540829198553, "grad_norm": NaN, "learning_rate": 9.727568573636337e-05, "loss": 0.0, "step": 717 }, { "epoch": 0.12076360272474981, "grad_norm": NaN, "learning_rate": 9.72669291487005e-05, "loss": 0.0, "step": 718 }, { "epoch": 0.12093179715751409, "grad_norm": NaN, "learning_rate": 9.725815890615124e-05, "loss": 0.0, "step": 719 }, { "epoch": 0.12109999159027836, "grad_norm": NaN, "learning_rate": 9.724937501124918e-05, "loss": 0.0, "step": 720 }, { "epoch": 0.12126818602304264, "grad_norm": NaN, "learning_rate": 9.72405774665319e-05, "loss": 0.0, "step": 721 }, { "epoch": 0.12143638045580692, "grad_norm": NaN, "learning_rate": 9.723176627454094e-05, "loss": 0.0, "step": 722 }, { "epoch": 0.12160457488857118, "grad_norm": NaN, "learning_rate": 9.722294143782171e-05, "loss": 0.0, "step": 723 }, { "epoch": 0.12177276932133546, "grad_norm": NaN, "learning_rate": 9.721410295892363e-05, "loss": 0.0, "step": 724 }, { "epoch": 0.12194096375409974, "grad_norm": NaN, "learning_rate": 9.720525084040005e-05, "loss": 0.0, "step": 725 }, { "epoch": 0.12210915818686402, "grad_norm": NaN, "learning_rate": 9.71963850848082e-05, "loss": 0.0, "step": 726 }, { "epoch": 0.12227735261962829, "grad_norm": NaN, "learning_rate": 9.718750569470934e-05, "loss": 0.0, "step": 727 }, { "epoch": 0.12244554705239256, "grad_norm": NaN, "learning_rate": 9.717861267266862e-05, "loss": 0.0, "step": 728 }, { "epoch": 0.12261374148515684, "grad_norm": NaN, "learning_rate": 9.716970602125512e-05, "loss": 0.0, "step": 729 }, { "epoch": 0.12278193591792112, "grad_norm": NaN, "learning_rate": 9.716078574304189e-05, "loss": 0.0, "step": 730 }, { "epoch": 0.12295013035068539, "grad_norm": NaN, "learning_rate": 9.715185184060588e-05, "loss": 0.0, "step": 731 }, { "epoch": 0.12311832478344967, "grad_norm": NaN, "learning_rate": 9.714290431652803e-05, "loss": 0.0, "step": 732 }, { "epoch": 0.12328651921621395, "grad_norm": NaN, "learning_rate": 9.713394317339313e-05, "loss": 0.0, "step": 733 }, { "epoch": 0.12345471364897821, "grad_norm": NaN, "learning_rate": 9.712496841379e-05, "loss": 0.0, "step": 734 }, { "epoch": 0.12362290808174249, "grad_norm": NaN, "learning_rate": 9.711598004031131e-05, "loss": 0.0, "step": 735 }, { "epoch": 0.12379110251450677, "grad_norm": NaN, "learning_rate": 9.710697805555372e-05, "loss": 0.0, "step": 736 }, { "epoch": 0.12395929694727105, "grad_norm": NaN, "learning_rate": 9.709796246211777e-05, "loss": 0.0, "step": 737 }, { "epoch": 0.12412749138003532, "grad_norm": NaN, "learning_rate": 9.708893326260801e-05, "loss": 0.0, "step": 738 }, { "epoch": 0.1242956858127996, "grad_norm": NaN, "learning_rate": 9.707989045963288e-05, "loss": 0.0, "step": 739 }, { "epoch": 0.12446388024556387, "grad_norm": NaN, "learning_rate": 9.70708340558047e-05, "loss": 0.0, "step": 740 }, { "epoch": 0.12463207467832815, "grad_norm": NaN, "learning_rate": 9.706176405373979e-05, "loss": 0.0, "step": 741 }, { "epoch": 0.12480026911109242, "grad_norm": NaN, "learning_rate": 9.705268045605837e-05, "loss": 0.0, "step": 742 }, { "epoch": 0.1249684635438567, "grad_norm": NaN, "learning_rate": 9.704358326538457e-05, "loss": 0.0, "step": 743 }, { "epoch": 0.12513665797662096, "grad_norm": NaN, "learning_rate": 9.70344724843465e-05, "loss": 0.0, "step": 744 }, { "epoch": 0.12530485240938524, "grad_norm": NaN, "learning_rate": 9.702534811557615e-05, "loss": 0.0, "step": 745 }, { "epoch": 0.12547304684214952, "grad_norm": NaN, "learning_rate": 9.701621016170943e-05, "loss": 0.0, "step": 746 }, { "epoch": 0.1256412412749138, "grad_norm": NaN, "learning_rate": 9.70070586253862e-05, "loss": 0.0, "step": 747 }, { "epoch": 0.12580943570767808, "grad_norm": NaN, "learning_rate": 9.699789350925026e-05, "loss": 0.0, "step": 748 }, { "epoch": 0.12597763014044236, "grad_norm": NaN, "learning_rate": 9.698871481594927e-05, "loss": 0.0, "step": 749 }, { "epoch": 0.12614582457320664, "grad_norm": NaN, "learning_rate": 9.69795225481349e-05, "loss": 0.0, "step": 750 }, { "epoch": 0.1263140190059709, "grad_norm": NaN, "learning_rate": 9.697031670846265e-05, "loss": 0.0, "step": 751 }, { "epoch": 0.12648221343873517, "grad_norm": NaN, "learning_rate": 9.6961097299592e-05, "loss": 0.0, "step": 752 }, { "epoch": 0.12665040787149945, "grad_norm": NaN, "learning_rate": 9.695186432418635e-05, "loss": 0.0, "step": 753 }, { "epoch": 0.12681860230426373, "grad_norm": NaN, "learning_rate": 9.694261778491297e-05, "loss": 0.0, "step": 754 }, { "epoch": 0.126986796737028, "grad_norm": NaN, "learning_rate": 9.693335768444311e-05, "loss": 0.0, "step": 755 }, { "epoch": 0.1271549911697923, "grad_norm": NaN, "learning_rate": 9.69240840254519e-05, "loss": 0.0, "step": 756 }, { "epoch": 0.12732318560255657, "grad_norm": NaN, "learning_rate": 9.69147968106184e-05, "loss": 0.0, "step": 757 }, { "epoch": 0.12749138003532082, "grad_norm": NaN, "learning_rate": 9.690549604262555e-05, "loss": 0.0, "step": 758 }, { "epoch": 0.1276595744680851, "grad_norm": NaN, "learning_rate": 9.689618172416028e-05, "loss": 0.0, "step": 759 }, { "epoch": 0.12782776890084938, "grad_norm": NaN, "learning_rate": 9.688685385791339e-05, "loss": 0.0, "step": 760 }, { "epoch": 0.12799596333361365, "grad_norm": NaN, "learning_rate": 9.687751244657957e-05, "loss": 0.0, "step": 761 }, { "epoch": 0.12816415776637793, "grad_norm": NaN, "learning_rate": 9.686815749285745e-05, "loss": 0.0, "step": 762 }, { "epoch": 0.1283323521991422, "grad_norm": NaN, "learning_rate": 9.68587889994496e-05, "loss": 0.0, "step": 763 }, { "epoch": 0.1285005466319065, "grad_norm": NaN, "learning_rate": 9.684940696906246e-05, "loss": 0.0, "step": 764 }, { "epoch": 0.12866874106467077, "grad_norm": NaN, "learning_rate": 9.684001140440639e-05, "loss": 0.0, "step": 765 }, { "epoch": 0.12883693549743502, "grad_norm": NaN, "learning_rate": 9.683060230819565e-05, "loss": 0.0, "step": 766 }, { "epoch": 0.1290051299301993, "grad_norm": NaN, "learning_rate": 9.682117968314846e-05, "loss": 0.0, "step": 767 }, { "epoch": 0.12917332436296358, "grad_norm": NaN, "learning_rate": 9.681174353198687e-05, "loss": 0.0, "step": 768 }, { "epoch": 0.12934151879572786, "grad_norm": NaN, "learning_rate": 9.680229385743689e-05, "loss": 0.0, "step": 769 }, { "epoch": 0.12950971322849214, "grad_norm": NaN, "learning_rate": 9.679283066222845e-05, "loss": 0.0, "step": 770 }, { "epoch": 0.12967790766125642, "grad_norm": NaN, "learning_rate": 9.678335394909532e-05, "loss": 0.0, "step": 771 }, { "epoch": 0.1298461020940207, "grad_norm": NaN, "learning_rate": 9.677386372077524e-05, "loss": 0.0, "step": 772 }, { "epoch": 0.13001429652678495, "grad_norm": NaN, "learning_rate": 9.676435998000983e-05, "loss": 0.0, "step": 773 }, { "epoch": 0.13018249095954923, "grad_norm": NaN, "learning_rate": 9.675484272954462e-05, "loss": 0.0, "step": 774 }, { "epoch": 0.1303506853923135, "grad_norm": NaN, "learning_rate": 9.674531197212903e-05, "loss": 0.0, "step": 775 }, { "epoch": 0.1305188798250778, "grad_norm": NaN, "learning_rate": 9.673576771051639e-05, "loss": 0.0, "step": 776 }, { "epoch": 0.13068707425784207, "grad_norm": NaN, "learning_rate": 9.672620994746394e-05, "loss": 0.0, "step": 777 }, { "epoch": 0.13085526869060635, "grad_norm": NaN, "learning_rate": 9.671663868573277e-05, "loss": 0.0, "step": 778 }, { "epoch": 0.13102346312337063, "grad_norm": NaN, "learning_rate": 9.670705392808796e-05, "loss": 0.0, "step": 779 }, { "epoch": 0.1311916575561349, "grad_norm": NaN, "learning_rate": 9.669745567729841e-05, "loss": 0.0, "step": 780 }, { "epoch": 0.13135985198889916, "grad_norm": NaN, "learning_rate": 9.668784393613696e-05, "loss": 0.0, "step": 781 }, { "epoch": 0.13152804642166344, "grad_norm": NaN, "learning_rate": 9.667821870738033e-05, "loss": 0.0, "step": 782 }, { "epoch": 0.13169624085442772, "grad_norm": NaN, "learning_rate": 9.666857999380914e-05, "loss": 0.0, "step": 783 }, { "epoch": 0.131864435287192, "grad_norm": NaN, "learning_rate": 9.665892779820791e-05, "loss": 0.0, "step": 784 }, { "epoch": 0.13203262971995627, "grad_norm": NaN, "learning_rate": 9.664926212336505e-05, "loss": 0.0, "step": 785 }, { "epoch": 0.13220082415272055, "grad_norm": NaN, "learning_rate": 9.663958297207286e-05, "loss": 0.0, "step": 786 }, { "epoch": 0.13236901858548483, "grad_norm": NaN, "learning_rate": 9.662989034712755e-05, "loss": 0.0, "step": 787 }, { "epoch": 0.13253721301824908, "grad_norm": NaN, "learning_rate": 9.66201842513292e-05, "loss": 0.0, "step": 788 }, { "epoch": 0.13270540745101336, "grad_norm": NaN, "learning_rate": 9.66104646874818e-05, "loss": 0.0, "step": 789 }, { "epoch": 0.13287360188377764, "grad_norm": NaN, "learning_rate": 9.66007316583932e-05, "loss": 0.0, "step": 790 }, { "epoch": 0.13304179631654192, "grad_norm": NaN, "learning_rate": 9.65909851668752e-05, "loss": 0.0, "step": 791 }, { "epoch": 0.1332099907493062, "grad_norm": NaN, "learning_rate": 9.658122521574344e-05, "loss": 0.0, "step": 792 }, { "epoch": 0.13337818518207048, "grad_norm": NaN, "learning_rate": 9.657145180781743e-05, "loss": 0.0, "step": 793 }, { "epoch": 0.13354637961483476, "grad_norm": NaN, "learning_rate": 9.656166494592064e-05, "loss": 0.0, "step": 794 }, { "epoch": 0.133714574047599, "grad_norm": NaN, "learning_rate": 9.655186463288038e-05, "loss": 0.0, "step": 795 }, { "epoch": 0.1338827684803633, "grad_norm": NaN, "learning_rate": 9.654205087152781e-05, "loss": 0.0, "step": 796 }, { "epoch": 0.13405096291312757, "grad_norm": NaN, "learning_rate": 9.653222366469808e-05, "loss": 0.0, "step": 797 }, { "epoch": 0.13421915734589185, "grad_norm": NaN, "learning_rate": 9.65223830152301e-05, "loss": 0.0, "step": 798 }, { "epoch": 0.13438735177865613, "grad_norm": NaN, "learning_rate": 9.651252892596673e-05, "loss": 0.0, "step": 799 }, { "epoch": 0.1345555462114204, "grad_norm": NaN, "learning_rate": 9.650266139975474e-05, "loss": 0.0, "step": 800 }, { "epoch": 0.1347237406441847, "grad_norm": NaN, "learning_rate": 9.649278043944474e-05, "loss": 0.0, "step": 801 }, { "epoch": 0.13489193507694897, "grad_norm": NaN, "learning_rate": 9.648288604789121e-05, "loss": 0.0, "step": 802 }, { "epoch": 0.13506012950971322, "grad_norm": NaN, "learning_rate": 9.647297822795256e-05, "loss": 0.0, "step": 803 }, { "epoch": 0.1352283239424775, "grad_norm": NaN, "learning_rate": 9.6463056982491e-05, "loss": 0.0, "step": 804 }, { "epoch": 0.13539651837524178, "grad_norm": NaN, "learning_rate": 9.645312231437271e-05, "loss": 0.0, "step": 805 }, { "epoch": 0.13556471280800605, "grad_norm": NaN, "learning_rate": 9.644317422646768e-05, "loss": 0.0, "step": 806 }, { "epoch": 0.13573290724077033, "grad_norm": NaN, "learning_rate": 9.643321272164981e-05, "loss": 0.0, "step": 807 }, { "epoch": 0.1359011016735346, "grad_norm": NaN, "learning_rate": 9.642323780279687e-05, "loss": 0.0, "step": 808 }, { "epoch": 0.1360692961062989, "grad_norm": NaN, "learning_rate": 9.641324947279049e-05, "loss": 0.0, "step": 809 }, { "epoch": 0.13623749053906314, "grad_norm": NaN, "learning_rate": 9.640324773451622e-05, "loss": 0.0, "step": 810 }, { "epoch": 0.13640568497182742, "grad_norm": NaN, "learning_rate": 9.63932325908634e-05, "loss": 0.0, "step": 811 }, { "epoch": 0.1365738794045917, "grad_norm": NaN, "learning_rate": 9.638320404472533e-05, "loss": 0.0, "step": 812 }, { "epoch": 0.13674207383735598, "grad_norm": NaN, "learning_rate": 9.637316209899912e-05, "loss": 0.0, "step": 813 }, { "epoch": 0.13691026827012026, "grad_norm": NaN, "learning_rate": 9.63631067565858e-05, "loss": 0.0, "step": 814 }, { "epoch": 0.13707846270288454, "grad_norm": NaN, "learning_rate": 9.635303802039024e-05, "loss": 0.0, "step": 815 }, { "epoch": 0.13724665713564882, "grad_norm": NaN, "learning_rate": 9.634295589332117e-05, "loss": 0.0, "step": 816 }, { "epoch": 0.1374148515684131, "grad_norm": NaN, "learning_rate": 9.633286037829121e-05, "loss": 0.0, "step": 817 }, { "epoch": 0.13758304600117735, "grad_norm": NaN, "learning_rate": 9.632275147821686e-05, "loss": 0.0, "step": 818 }, { "epoch": 0.13775124043394163, "grad_norm": NaN, "learning_rate": 9.631262919601844e-05, "loss": 0.0, "step": 819 }, { "epoch": 0.1379194348667059, "grad_norm": NaN, "learning_rate": 9.630249353462018e-05, "loss": 0.0, "step": 820 }, { "epoch": 0.1380876292994702, "grad_norm": NaN, "learning_rate": 9.629234449695015e-05, "loss": 0.0, "step": 821 }, { "epoch": 0.13825582373223447, "grad_norm": NaN, "learning_rate": 9.62821820859403e-05, "loss": 0.0, "step": 822 }, { "epoch": 0.13842401816499875, "grad_norm": NaN, "learning_rate": 9.627200630452644e-05, "loss": 0.0, "step": 823 }, { "epoch": 0.13859221259776303, "grad_norm": NaN, "learning_rate": 9.626181715564823e-05, "loss": 0.0, "step": 824 }, { "epoch": 0.13876040703052728, "grad_norm": NaN, "learning_rate": 9.625161464224922e-05, "loss": 0.0, "step": 825 }, { "epoch": 0.13892860146329156, "grad_norm": NaN, "learning_rate": 9.624139876727678e-05, "loss": 0.0, "step": 826 }, { "epoch": 0.13909679589605584, "grad_norm": NaN, "learning_rate": 9.623116953368216e-05, "loss": 0.0, "step": 827 }, { "epoch": 0.13926499032882012, "grad_norm": NaN, "learning_rate": 9.62209269444205e-05, "loss": 0.0, "step": 828 }, { "epoch": 0.1394331847615844, "grad_norm": NaN, "learning_rate": 9.621067100245073e-05, "loss": 0.0, "step": 829 }, { "epoch": 0.13960137919434867, "grad_norm": NaN, "learning_rate": 9.62004017107357e-05, "loss": 0.0, "step": 830 }, { "epoch": 0.13976957362711295, "grad_norm": NaN, "learning_rate": 9.61901190722421e-05, "loss": 0.0, "step": 831 }, { "epoch": 0.1399377680598772, "grad_norm": NaN, "learning_rate": 9.617982308994045e-05, "loss": 0.0, "step": 832 }, { "epoch": 0.14010596249264148, "grad_norm": NaN, "learning_rate": 9.616951376680516e-05, "loss": 0.0, "step": 833 }, { "epoch": 0.14027415692540576, "grad_norm": NaN, "learning_rate": 9.615919110581447e-05, "loss": 0.0, "step": 834 }, { "epoch": 0.14044235135817004, "grad_norm": NaN, "learning_rate": 9.614885510995047e-05, "loss": 0.0, "step": 835 }, { "epoch": 0.14061054579093432, "grad_norm": NaN, "learning_rate": 9.613850578219914e-05, "loss": 0.0, "step": 836 }, { "epoch": 0.1407787402236986, "grad_norm": NaN, "learning_rate": 9.612814312555027e-05, "loss": 0.0, "step": 837 }, { "epoch": 0.14094693465646288, "grad_norm": NaN, "learning_rate": 9.611776714299751e-05, "loss": 0.0, "step": 838 }, { "epoch": 0.14111512908922716, "grad_norm": NaN, "learning_rate": 9.610737783753838e-05, "loss": 0.0, "step": 839 }, { "epoch": 0.1412833235219914, "grad_norm": NaN, "learning_rate": 9.60969752121742e-05, "loss": 0.0, "step": 840 }, { "epoch": 0.1414515179547557, "grad_norm": NaN, "learning_rate": 9.608655926991021e-05, "loss": 0.0, "step": 841 }, { "epoch": 0.14161971238751997, "grad_norm": NaN, "learning_rate": 9.607613001375546e-05, "loss": 0.0, "step": 842 }, { "epoch": 0.14178790682028425, "grad_norm": NaN, "learning_rate": 9.606568744672282e-05, "loss": 0.0, "step": 843 }, { "epoch": 0.14195610125304853, "grad_norm": NaN, "learning_rate": 9.605523157182904e-05, "loss": 0.0, "step": 844 }, { "epoch": 0.1421242956858128, "grad_norm": NaN, "learning_rate": 9.604476239209472e-05, "loss": 0.0, "step": 845 }, { "epoch": 0.1422924901185771, "grad_norm": NaN, "learning_rate": 9.603427991054426e-05, "loss": 0.0, "step": 846 }, { "epoch": 0.14246068455134134, "grad_norm": NaN, "learning_rate": 9.602378413020596e-05, "loss": 0.0, "step": 847 }, { "epoch": 0.14262887898410562, "grad_norm": NaN, "learning_rate": 9.601327505411191e-05, "loss": 0.0, "step": 848 }, { "epoch": 0.1427970734168699, "grad_norm": NaN, "learning_rate": 9.600275268529807e-05, "loss": 0.0, "step": 849 }, { "epoch": 0.14296526784963418, "grad_norm": NaN, "learning_rate": 9.599221702680425e-05, "loss": 0.0, "step": 850 }, { "epoch": 0.14313346228239845, "grad_norm": NaN, "learning_rate": 9.598166808167408e-05, "loss": 0.0, "step": 851 }, { "epoch": 0.14330165671516273, "grad_norm": NaN, "learning_rate": 9.597110585295502e-05, "loss": 0.0, "step": 852 }, { "epoch": 0.143469851147927, "grad_norm": NaN, "learning_rate": 9.596053034369837e-05, "loss": 0.0, "step": 853 }, { "epoch": 0.1436380455806913, "grad_norm": NaN, "learning_rate": 9.594994155695931e-05, "loss": 0.0, "step": 854 }, { "epoch": 0.14380624001345554, "grad_norm": NaN, "learning_rate": 9.593933949579678e-05, "loss": 0.0, "step": 855 }, { "epoch": 0.14397443444621982, "grad_norm": NaN, "learning_rate": 9.592872416327365e-05, "loss": 0.0, "step": 856 }, { "epoch": 0.1441426288789841, "grad_norm": NaN, "learning_rate": 9.591809556245652e-05, "loss": 0.0, "step": 857 }, { "epoch": 0.14431082331174838, "grad_norm": NaN, "learning_rate": 9.59074536964159e-05, "loss": 0.0, "step": 858 }, { "epoch": 0.14447901774451266, "grad_norm": NaN, "learning_rate": 9.58967985682261e-05, "loss": 0.0, "step": 859 }, { "epoch": 0.14464721217727694, "grad_norm": NaN, "learning_rate": 9.588613018096529e-05, "loss": 0.0, "step": 860 }, { "epoch": 0.14481540661004122, "grad_norm": NaN, "learning_rate": 9.587544853771539e-05, "loss": 0.0, "step": 861 }, { "epoch": 0.14498360104280547, "grad_norm": NaN, "learning_rate": 9.586475364156226e-05, "loss": 0.0, "step": 862 }, { "epoch": 0.14515179547556975, "grad_norm": NaN, "learning_rate": 9.585404549559551e-05, "loss": 0.0, "step": 863 }, { "epoch": 0.14531998990833403, "grad_norm": NaN, "learning_rate": 9.584332410290861e-05, "loss": 0.0, "step": 864 }, { "epoch": 0.1454881843410983, "grad_norm": NaN, "learning_rate": 9.583258946659885e-05, "loss": 0.0, "step": 865 }, { "epoch": 0.1456563787738626, "grad_norm": NaN, "learning_rate": 9.582184158976736e-05, "loss": 0.0, "step": 866 }, { "epoch": 0.14582457320662687, "grad_norm": NaN, "learning_rate": 9.581108047551905e-05, "loss": 0.0, "step": 867 }, { "epoch": 0.14599276763939115, "grad_norm": NaN, "learning_rate": 9.580030612696273e-05, "loss": 0.0, "step": 868 }, { "epoch": 0.1461609620721554, "grad_norm": NaN, "learning_rate": 9.578951854721095e-05, "loss": 0.0, "step": 869 }, { "epoch": 0.14632915650491968, "grad_norm": NaN, "learning_rate": 9.577871773938011e-05, "loss": 0.0, "step": 870 }, { "epoch": 0.14649735093768396, "grad_norm": NaN, "learning_rate": 9.57679037065905e-05, "loss": 0.0, "step": 871 }, { "epoch": 0.14666554537044824, "grad_norm": NaN, "learning_rate": 9.575707645196612e-05, "loss": 0.0, "step": 872 }, { "epoch": 0.14683373980321252, "grad_norm": NaN, "learning_rate": 9.574623597863488e-05, "loss": 0.0, "step": 873 }, { "epoch": 0.1470019342359768, "grad_norm": NaN, "learning_rate": 9.573538228972844e-05, "loss": 0.0, "step": 874 }, { "epoch": 0.14717012866874107, "grad_norm": NaN, "learning_rate": 9.572451538838232e-05, "loss": 0.0, "step": 875 }, { "epoch": 0.14733832310150535, "grad_norm": NaN, "learning_rate": 9.571363527773585e-05, "loss": 0.0, "step": 876 }, { "epoch": 0.1475065175342696, "grad_norm": NaN, "learning_rate": 9.570274196093217e-05, "loss": 0.0, "step": 877 }, { "epoch": 0.14767471196703388, "grad_norm": NaN, "learning_rate": 9.569183544111824e-05, "loss": 0.0, "step": 878 }, { "epoch": 0.14784290639979816, "grad_norm": NaN, "learning_rate": 9.568091572144484e-05, "loss": 0.0, "step": 879 }, { "epoch": 0.14801110083256244, "grad_norm": NaN, "learning_rate": 9.566998280506653e-05, "loss": 0.0, "step": 880 }, { "epoch": 0.14817929526532672, "grad_norm": NaN, "learning_rate": 9.565903669514172e-05, "loss": 0.0, "step": 881 }, { "epoch": 0.148347489698091, "grad_norm": NaN, "learning_rate": 9.564807739483262e-05, "loss": 0.0, "step": 882 }, { "epoch": 0.14851568413085528, "grad_norm": NaN, "learning_rate": 9.563710490730525e-05, "loss": 0.0, "step": 883 }, { "epoch": 0.14868387856361953, "grad_norm": NaN, "learning_rate": 9.562611923572944e-05, "loss": 0.0, "step": 884 }, { "epoch": 0.1488520729963838, "grad_norm": NaN, "learning_rate": 9.561512038327882e-05, "loss": 0.0, "step": 885 }, { "epoch": 0.1490202674291481, "grad_norm": NaN, "learning_rate": 9.560410835313084e-05, "loss": 0.0, "step": 886 }, { "epoch": 0.14918846186191237, "grad_norm": NaN, "learning_rate": 9.559308314846675e-05, "loss": 0.0, "step": 887 }, { "epoch": 0.14935665629467665, "grad_norm": NaN, "learning_rate": 9.558204477247163e-05, "loss": 0.0, "step": 888 }, { "epoch": 0.14952485072744093, "grad_norm": NaN, "learning_rate": 9.55709932283343e-05, "loss": 0.0, "step": 889 }, { "epoch": 0.1496930451602052, "grad_norm": NaN, "learning_rate": 9.555992851924746e-05, "loss": 0.0, "step": 890 }, { "epoch": 0.1498612395929695, "grad_norm": NaN, "learning_rate": 9.554885064840758e-05, "loss": 0.0, "step": 891 }, { "epoch": 0.15002943402573374, "grad_norm": NaN, "learning_rate": 9.55377596190149e-05, "loss": 0.0, "step": 892 }, { "epoch": 0.15019762845849802, "grad_norm": NaN, "learning_rate": 9.552665543427354e-05, "loss": 0.0, "step": 893 }, { "epoch": 0.1503658228912623, "grad_norm": NaN, "learning_rate": 9.551553809739135e-05, "loss": 0.0, "step": 894 }, { "epoch": 0.15053401732402658, "grad_norm": NaN, "learning_rate": 9.550440761158e-05, "loss": 0.0, "step": 895 }, { "epoch": 0.15070221175679085, "grad_norm": NaN, "learning_rate": 9.549326398005495e-05, "loss": 0.0, "step": 896 }, { "epoch": 0.15087040618955513, "grad_norm": NaN, "learning_rate": 9.548210720603552e-05, "loss": 0.0, "step": 897 }, { "epoch": 0.1510386006223194, "grad_norm": NaN, "learning_rate": 9.547093729274474e-05, "loss": 0.0, "step": 898 }, { "epoch": 0.15120679505508366, "grad_norm": NaN, "learning_rate": 9.545975424340948e-05, "loss": 0.0, "step": 899 }, { "epoch": 0.15137498948784794, "grad_norm": NaN, "learning_rate": 9.544855806126041e-05, "loss": 0.0, "step": 900 }, { "epoch": 0.15154318392061222, "grad_norm": NaN, "learning_rate": 9.543734874953194e-05, "loss": 0.0, "step": 901 }, { "epoch": 0.1517113783533765, "grad_norm": NaN, "learning_rate": 9.542612631146237e-05, "loss": 0.0, "step": 902 }, { "epoch": 0.15187957278614078, "grad_norm": NaN, "learning_rate": 9.54148907502937e-05, "loss": 0.0, "step": 903 }, { "epoch": 0.15204776721890506, "grad_norm": NaN, "learning_rate": 9.540364206927175e-05, "loss": 0.0, "step": 904 }, { "epoch": 0.15221596165166934, "grad_norm": NaN, "learning_rate": 9.539238027164619e-05, "loss": 0.0, "step": 905 }, { "epoch": 0.1523841560844336, "grad_norm": NaN, "learning_rate": 9.538110536067037e-05, "loss": 0.0, "step": 906 }, { "epoch": 0.15255235051719787, "grad_norm": NaN, "learning_rate": 9.536981733960152e-05, "loss": 0.0, "step": 907 }, { "epoch": 0.15272054494996215, "grad_norm": NaN, "learning_rate": 9.53585162117006e-05, "loss": 0.0, "step": 908 }, { "epoch": 0.15288873938272643, "grad_norm": NaN, "learning_rate": 9.53472019802324e-05, "loss": 0.0, "step": 909 }, { "epoch": 0.1530569338154907, "grad_norm": NaN, "learning_rate": 9.533587464846548e-05, "loss": 0.0, "step": 910 }, { "epoch": 0.153225128248255, "grad_norm": NaN, "learning_rate": 9.532453421967213e-05, "loss": 0.0, "step": 911 }, { "epoch": 0.15339332268101927, "grad_norm": NaN, "learning_rate": 9.531318069712854e-05, "loss": 0.0, "step": 912 }, { "epoch": 0.15356151711378355, "grad_norm": NaN, "learning_rate": 9.530181408411456e-05, "loss": 0.0, "step": 913 }, { "epoch": 0.1537297115465478, "grad_norm": NaN, "learning_rate": 9.529043438391392e-05, "loss": 0.0, "step": 914 }, { "epoch": 0.15389790597931208, "grad_norm": NaN, "learning_rate": 9.527904159981404e-05, "loss": 0.0, "step": 915 }, { "epoch": 0.15406610041207636, "grad_norm": NaN, "learning_rate": 9.526763573510622e-05, "loss": 0.0, "step": 916 }, { "epoch": 0.15423429484484064, "grad_norm": NaN, "learning_rate": 9.525621679308546e-05, "loss": 0.0, "step": 917 }, { "epoch": 0.15440248927760492, "grad_norm": NaN, "learning_rate": 9.524478477705057e-05, "loss": 0.0, "step": 918 }, { "epoch": 0.1545706837103692, "grad_norm": NaN, "learning_rate": 9.523333969030413e-05, "loss": 0.0, "step": 919 }, { "epoch": 0.15473887814313347, "grad_norm": NaN, "learning_rate": 9.522188153615248e-05, "loss": 0.0, "step": 920 }, { "epoch": 0.15490707257589773, "grad_norm": NaN, "learning_rate": 9.521041031790578e-05, "loss": 0.0, "step": 921 }, { "epoch": 0.155075267008662, "grad_norm": NaN, "learning_rate": 9.519892603887792e-05, "loss": 0.0, "step": 922 }, { "epoch": 0.15524346144142628, "grad_norm": NaN, "learning_rate": 9.51874287023866e-05, "loss": 0.0, "step": 923 }, { "epoch": 0.15541165587419056, "grad_norm": NaN, "learning_rate": 9.517591831175323e-05, "loss": 0.0, "step": 924 }, { "epoch": 0.15557985030695484, "grad_norm": NaN, "learning_rate": 9.516439487030308e-05, "loss": 0.0, "step": 925 }, { "epoch": 0.15574804473971912, "grad_norm": NaN, "learning_rate": 9.51528583813651e-05, "loss": 0.0, "step": 926 }, { "epoch": 0.1559162391724834, "grad_norm": NaN, "learning_rate": 9.51413088482721e-05, "loss": 0.0, "step": 927 }, { "epoch": 0.15608443360524768, "grad_norm": NaN, "learning_rate": 9.512974627436058e-05, "loss": 0.0, "step": 928 }, { "epoch": 0.15625262803801193, "grad_norm": NaN, "learning_rate": 9.511817066297082e-05, "loss": 0.0, "step": 929 }, { "epoch": 0.1564208224707762, "grad_norm": NaN, "learning_rate": 9.510658201744693e-05, "loss": 0.0, "step": 930 }, { "epoch": 0.1565890169035405, "grad_norm": NaN, "learning_rate": 9.509498034113671e-05, "loss": 0.0, "step": 931 }, { "epoch": 0.15675721133630477, "grad_norm": NaN, "learning_rate": 9.508336563739178e-05, "loss": 0.0, "step": 932 }, { "epoch": 0.15692540576906905, "grad_norm": NaN, "learning_rate": 9.507173790956746e-05, "loss": 0.0, "step": 933 }, { "epoch": 0.15709360020183333, "grad_norm": NaN, "learning_rate": 9.506009716102288e-05, "loss": 0.0, "step": 934 }, { "epoch": 0.1572617946345976, "grad_norm": NaN, "learning_rate": 9.504844339512095e-05, "loss": 0.0, "step": 935 }, { "epoch": 0.15742998906736186, "grad_norm": NaN, "learning_rate": 9.50367766152283e-05, "loss": 0.0, "step": 936 }, { "epoch": 0.15759818350012614, "grad_norm": NaN, "learning_rate": 9.502509682471532e-05, "loss": 0.0, "step": 937 }, { "epoch": 0.15776637793289042, "grad_norm": NaN, "learning_rate": 9.501340402695617e-05, "loss": 0.0, "step": 938 }, { "epoch": 0.1579345723656547, "grad_norm": NaN, "learning_rate": 9.50016982253288e-05, "loss": 0.0, "step": 939 }, { "epoch": 0.15810276679841898, "grad_norm": NaN, "learning_rate": 9.498997942321483e-05, "loss": 0.0, "step": 940 }, { "epoch": 0.15827096123118325, "grad_norm": NaN, "learning_rate": 9.497824762399974e-05, "loss": 0.0, "step": 941 }, { "epoch": 0.15843915566394753, "grad_norm": NaN, "learning_rate": 9.496650283107269e-05, "loss": 0.0, "step": 942 }, { "epoch": 0.15860735009671179, "grad_norm": NaN, "learning_rate": 9.495474504782661e-05, "loss": 0.0, "step": 943 }, { "epoch": 0.15877554452947606, "grad_norm": NaN, "learning_rate": 9.494297427765824e-05, "loss": 0.0, "step": 944 }, { "epoch": 0.15894373896224034, "grad_norm": NaN, "learning_rate": 9.493119052396797e-05, "loss": 0.0, "step": 945 }, { "epoch": 0.15911193339500462, "grad_norm": NaN, "learning_rate": 9.491939379016003e-05, "loss": 0.0, "step": 946 }, { "epoch": 0.1592801278277689, "grad_norm": NaN, "learning_rate": 9.490758407964234e-05, "loss": 0.0, "step": 947 }, { "epoch": 0.15944832226053318, "grad_norm": NaN, "learning_rate": 9.489576139582661e-05, "loss": 0.0, "step": 948 }, { "epoch": 0.15961651669329746, "grad_norm": NaN, "learning_rate": 9.488392574212827e-05, "loss": 0.0, "step": 949 }, { "epoch": 0.15978471112606174, "grad_norm": NaN, "learning_rate": 9.487207712196651e-05, "loss": 0.0, "step": 950 }, { "epoch": 0.159952905558826, "grad_norm": NaN, "learning_rate": 9.486021553876426e-05, "loss": 0.0, "step": 951 }, { "epoch": 0.16012109999159027, "grad_norm": NaN, "learning_rate": 9.484834099594822e-05, "loss": 0.0, "step": 952 }, { "epoch": 0.16028929442435455, "grad_norm": NaN, "learning_rate": 9.483645349694879e-05, "loss": 0.0, "step": 953 }, { "epoch": 0.16045748885711883, "grad_norm": NaN, "learning_rate": 9.482455304520013e-05, "loss": 0.0, "step": 954 }, { "epoch": 0.1606256832898831, "grad_norm": NaN, "learning_rate": 9.481263964414016e-05, "loss": 0.0, "step": 955 }, { "epoch": 0.1607938777226474, "grad_norm": NaN, "learning_rate": 9.480071329721054e-05, "loss": 0.0, "step": 956 }, { "epoch": 0.16096207215541167, "grad_norm": NaN, "learning_rate": 9.478877400785664e-05, "loss": 0.0, "step": 957 }, { "epoch": 0.16113026658817592, "grad_norm": NaN, "learning_rate": 9.47768217795276e-05, "loss": 0.0, "step": 958 }, { "epoch": 0.1612984610209402, "grad_norm": NaN, "learning_rate": 9.476485661567626e-05, "loss": 0.0, "step": 959 }, { "epoch": 0.16146665545370448, "grad_norm": NaN, "learning_rate": 9.475287851975925e-05, "loss": 0.0, "step": 960 }, { "epoch": 0.16163484988646876, "grad_norm": NaN, "learning_rate": 9.474088749523689e-05, "loss": 0.0, "step": 961 }, { "epoch": 0.16180304431923304, "grad_norm": NaN, "learning_rate": 9.472888354557327e-05, "loss": 0.0, "step": 962 }, { "epoch": 0.16197123875199732, "grad_norm": NaN, "learning_rate": 9.471686667423618e-05, "loss": 0.0, "step": 963 }, { "epoch": 0.1621394331847616, "grad_norm": NaN, "learning_rate": 9.470483688469716e-05, "loss": 0.0, "step": 964 }, { "epoch": 0.16230762761752587, "grad_norm": NaN, "learning_rate": 9.469279418043148e-05, "loss": 0.0, "step": 965 }, { "epoch": 0.16247582205029013, "grad_norm": NaN, "learning_rate": 9.468073856491818e-05, "loss": 0.0, "step": 966 }, { "epoch": 0.1626440164830544, "grad_norm": NaN, "learning_rate": 9.466867004163992e-05, "loss": 0.0, "step": 967 }, { "epoch": 0.16281221091581868, "grad_norm": NaN, "learning_rate": 9.465658861408324e-05, "loss": 0.0, "step": 968 }, { "epoch": 0.16298040534858296, "grad_norm": NaN, "learning_rate": 9.464449428573827e-05, "loss": 0.0, "step": 969 }, { "epoch": 0.16314859978134724, "grad_norm": NaN, "learning_rate": 9.463238706009896e-05, "loss": 0.0, "step": 970 }, { "epoch": 0.16331679421411152, "grad_norm": NaN, "learning_rate": 9.462026694066293e-05, "loss": 0.0, "step": 971 }, { "epoch": 0.1634849886468758, "grad_norm": NaN, "learning_rate": 9.460813393093157e-05, "loss": 0.0, "step": 972 }, { "epoch": 0.16365318307964005, "grad_norm": NaN, "learning_rate": 9.459598803440994e-05, "loss": 0.0, "step": 973 }, { "epoch": 0.16382137751240433, "grad_norm": NaN, "learning_rate": 9.458382925460689e-05, "loss": 0.0, "step": 974 }, { "epoch": 0.1639895719451686, "grad_norm": NaN, "learning_rate": 9.457165759503493e-05, "loss": 0.0, "step": 975 }, { "epoch": 0.1641577663779329, "grad_norm": NaN, "learning_rate": 9.455947305921031e-05, "loss": 0.0, "step": 976 }, { "epoch": 0.16432596081069717, "grad_norm": NaN, "learning_rate": 9.454727565065302e-05, "loss": 0.0, "step": 977 }, { "epoch": 0.16449415524346145, "grad_norm": NaN, "learning_rate": 9.453506537288677e-05, "loss": 0.0, "step": 978 }, { "epoch": 0.16466234967622573, "grad_norm": NaN, "learning_rate": 9.452284222943894e-05, "loss": 0.0, "step": 979 }, { "epoch": 0.16483054410898998, "grad_norm": NaN, "learning_rate": 9.451060622384067e-05, "loss": 0.0, "step": 980 }, { "epoch": 0.16499873854175426, "grad_norm": NaN, "learning_rate": 9.449835735962682e-05, "loss": 0.0, "step": 981 }, { "epoch": 0.16516693297451854, "grad_norm": NaN, "learning_rate": 9.448609564033593e-05, "loss": 0.0, "step": 982 }, { "epoch": 0.16533512740728282, "grad_norm": NaN, "learning_rate": 9.447382106951029e-05, "loss": 0.0, "step": 983 }, { "epoch": 0.1655033218400471, "grad_norm": NaN, "learning_rate": 9.446153365069587e-05, "loss": 0.0, "step": 984 }, { "epoch": 0.16567151627281138, "grad_norm": NaN, "learning_rate": 9.444923338744239e-05, "loss": 0.0, "step": 985 }, { "epoch": 0.16583971070557565, "grad_norm": NaN, "learning_rate": 9.443692028330325e-05, "loss": 0.0, "step": 986 }, { "epoch": 0.16600790513833993, "grad_norm": NaN, "learning_rate": 9.442459434183554e-05, "loss": 0.0, "step": 987 }, { "epoch": 0.16617609957110419, "grad_norm": NaN, "learning_rate": 9.441225556660011e-05, "loss": 0.0, "step": 988 }, { "epoch": 0.16634429400386846, "grad_norm": NaN, "learning_rate": 9.43999039611615e-05, "loss": 0.0, "step": 989 }, { "epoch": 0.16651248843663274, "grad_norm": NaN, "learning_rate": 9.438753952908794e-05, "loss": 0.0, "step": 990 }, { "epoch": 0.16668068286939702, "grad_norm": NaN, "learning_rate": 9.437516227395139e-05, "loss": 0.0, "step": 991 }, { "epoch": 0.1668488773021613, "grad_norm": NaN, "learning_rate": 9.436277219932747e-05, "loss": 0.0, "step": 992 }, { "epoch": 0.16701707173492558, "grad_norm": NaN, "learning_rate": 9.435036930879557e-05, "loss": 0.0, "step": 993 }, { "epoch": 0.16718526616768986, "grad_norm": NaN, "learning_rate": 9.433795360593874e-05, "loss": 0.0, "step": 994 }, { "epoch": 0.1673534606004541, "grad_norm": NaN, "learning_rate": 9.432552509434369e-05, "loss": 0.0, "step": 995 }, { "epoch": 0.1675216550332184, "grad_norm": NaN, "learning_rate": 9.431308377760094e-05, "loss": 0.0, "step": 996 }, { "epoch": 0.16768984946598267, "grad_norm": NaN, "learning_rate": 9.430062965930462e-05, "loss": 0.0, "step": 997 }, { "epoch": 0.16785804389874695, "grad_norm": NaN, "learning_rate": 9.428816274305258e-05, "loss": 0.0, "step": 998 }, { "epoch": 0.16802623833151123, "grad_norm": NaN, "learning_rate": 9.427568303244639e-05, "loss": 0.0, "step": 999 }, { "epoch": 0.1681944327642755, "grad_norm": NaN, "learning_rate": 9.426319053109127e-05, "loss": 0.0, "step": 1000 }, { "epoch": 0.1683626271970398, "grad_norm": NaN, "learning_rate": 9.425068524259619e-05, "loss": 0.0, "step": 1001 }, { "epoch": 0.16853082162980407, "grad_norm": NaN, "learning_rate": 9.423816717057379e-05, "loss": 0.0, "step": 1002 }, { "epoch": 0.16869901606256832, "grad_norm": NaN, "learning_rate": 9.422563631864038e-05, "loss": 0.0, "step": 1003 }, { "epoch": 0.1688672104953326, "grad_norm": NaN, "learning_rate": 9.4213092690416e-05, "loss": 0.0, "step": 1004 }, { "epoch": 0.16903540492809688, "grad_norm": NaN, "learning_rate": 9.420053628952434e-05, "loss": 0.0, "step": 1005 }, { "epoch": 0.16920359936086116, "grad_norm": NaN, "learning_rate": 9.418796711959284e-05, "loss": 0.0, "step": 1006 }, { "epoch": 0.16937179379362544, "grad_norm": NaN, "learning_rate": 9.417538518425258e-05, "loss": 0.0, "step": 1007 }, { "epoch": 0.16953998822638972, "grad_norm": NaN, "learning_rate": 9.41627904871383e-05, "loss": 0.0, "step": 1008 }, { "epoch": 0.169708182659154, "grad_norm": NaN, "learning_rate": 9.415018303188851e-05, "loss": 0.0, "step": 1009 }, { "epoch": 0.16987637709191825, "grad_norm": NaN, "learning_rate": 9.413756282214537e-05, "loss": 0.0, "step": 1010 }, { "epoch": 0.17004457152468253, "grad_norm": NaN, "learning_rate": 9.412492986155471e-05, "loss": 0.0, "step": 1011 }, { "epoch": 0.1702127659574468, "grad_norm": NaN, "learning_rate": 9.411228415376601e-05, "loss": 0.0, "step": 1012 }, { "epoch": 0.17038096039021108, "grad_norm": NaN, "learning_rate": 9.409962570243252e-05, "loss": 0.0, "step": 1013 }, { "epoch": 0.17054915482297536, "grad_norm": NaN, "learning_rate": 9.408695451121109e-05, "loss": 0.0, "step": 1014 }, { "epoch": 0.17071734925573964, "grad_norm": NaN, "learning_rate": 9.407427058376233e-05, "loss": 0.0, "step": 1015 }, { "epoch": 0.17088554368850392, "grad_norm": NaN, "learning_rate": 9.406157392375044e-05, "loss": 0.0, "step": 1016 }, { "epoch": 0.17105373812126817, "grad_norm": NaN, "learning_rate": 9.404886453484336e-05, "loss": 0.0, "step": 1017 }, { "epoch": 0.17122193255403245, "grad_norm": NaN, "learning_rate": 9.403614242071271e-05, "loss": 0.0, "step": 1018 }, { "epoch": 0.17139012698679673, "grad_norm": NaN, "learning_rate": 9.402340758503373e-05, "loss": 0.0, "step": 1019 }, { "epoch": 0.171558321419561, "grad_norm": NaN, "learning_rate": 9.40106600314854e-05, "loss": 0.0, "step": 1020 }, { "epoch": 0.1717265158523253, "grad_norm": NaN, "learning_rate": 9.399789976375032e-05, "loss": 0.0, "step": 1021 }, { "epoch": 0.17189471028508957, "grad_norm": NaN, "learning_rate": 9.398512678551481e-05, "loss": 0.0, "step": 1022 }, { "epoch": 0.17206290471785385, "grad_norm": NaN, "learning_rate": 9.397234110046883e-05, "loss": 0.0, "step": 1023 }, { "epoch": 0.17223109915061813, "grad_norm": NaN, "learning_rate": 9.395954271230604e-05, "loss": 0.0, "step": 1024 }, { "epoch": 0.17239929358338238, "grad_norm": NaN, "learning_rate": 9.394673162472373e-05, "loss": 0.0, "step": 1025 }, { "epoch": 0.17256748801614666, "grad_norm": NaN, "learning_rate": 9.393390784142288e-05, "loss": 0.0, "step": 1026 }, { "epoch": 0.17273568244891094, "grad_norm": NaN, "learning_rate": 9.392107136610814e-05, "loss": 0.0, "step": 1027 }, { "epoch": 0.17290387688167522, "grad_norm": NaN, "learning_rate": 9.390822220248782e-05, "loss": 0.0, "step": 1028 }, { "epoch": 0.1730720713144395, "grad_norm": NaN, "learning_rate": 9.389536035427394e-05, "loss": 0.0, "step": 1029 }, { "epoch": 0.17324026574720378, "grad_norm": NaN, "learning_rate": 9.388248582518208e-05, "loss": 0.0, "step": 1030 }, { "epoch": 0.17340846017996805, "grad_norm": NaN, "learning_rate": 9.386959861893158e-05, "loss": 0.0, "step": 1031 }, { "epoch": 0.1735766546127323, "grad_norm": NaN, "learning_rate": 9.385669873924541e-05, "loss": 0.0, "step": 1032 }, { "epoch": 0.17374484904549659, "grad_norm": NaN, "learning_rate": 9.384378618985019e-05, "loss": 0.0, "step": 1033 }, { "epoch": 0.17391304347826086, "grad_norm": NaN, "learning_rate": 9.383086097447624e-05, "loss": 0.0, "step": 1034 }, { "epoch": 0.17408123791102514, "grad_norm": NaN, "learning_rate": 9.381792309685746e-05, "loss": 0.0, "step": 1035 }, { "epoch": 0.17424943234378942, "grad_norm": NaN, "learning_rate": 9.38049725607315e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.1744176267765537, "grad_norm": NaN, "learning_rate": 9.379200936983963e-05, "loss": 0.0, "step": 1037 }, { "epoch": 0.17458582120931798, "grad_norm": NaN, "learning_rate": 9.377903352792672e-05, "loss": 0.0, "step": 1038 }, { "epoch": 0.17475401564208223, "grad_norm": NaN, "learning_rate": 9.37660450387414e-05, "loss": 0.0, "step": 1039 }, { "epoch": 0.1749222100748465, "grad_norm": NaN, "learning_rate": 9.375304390603585e-05, "loss": 0.0, "step": 1040 }, { "epoch": 0.1750904045076108, "grad_norm": NaN, "learning_rate": 9.3740030133566e-05, "loss": 0.0, "step": 1041 }, { "epoch": 0.17525859894037507, "grad_norm": NaN, "learning_rate": 9.372700372509135e-05, "loss": 0.0, "step": 1042 }, { "epoch": 0.17542679337313935, "grad_norm": NaN, "learning_rate": 9.37139646843751e-05, "loss": 0.0, "step": 1043 }, { "epoch": 0.17559498780590363, "grad_norm": NaN, "learning_rate": 9.370091301518409e-05, "loss": 0.0, "step": 1044 }, { "epoch": 0.1757631822386679, "grad_norm": NaN, "learning_rate": 9.368784872128878e-05, "loss": 0.0, "step": 1045 }, { "epoch": 0.1759313766714322, "grad_norm": NaN, "learning_rate": 9.367477180646329e-05, "loss": 0.0, "step": 1046 }, { "epoch": 0.17609957110419644, "grad_norm": NaN, "learning_rate": 9.366168227448541e-05, "loss": 0.0, "step": 1047 }, { "epoch": 0.17626776553696072, "grad_norm": NaN, "learning_rate": 9.364858012913658e-05, "loss": 0.0, "step": 1048 }, { "epoch": 0.176435959969725, "grad_norm": NaN, "learning_rate": 9.363546537420184e-05, "loss": 0.0, "step": 1049 }, { "epoch": 0.17660415440248928, "grad_norm": NaN, "learning_rate": 9.36223380134699e-05, "loss": 0.0, "step": 1050 }, { "epoch": 0.17677234883525356, "grad_norm": NaN, "learning_rate": 9.36091980507331e-05, "loss": 0.0, "step": 1051 }, { "epoch": 0.17694054326801784, "grad_norm": NaN, "learning_rate": 9.359604548978742e-05, "loss": 0.0, "step": 1052 }, { "epoch": 0.17710873770078212, "grad_norm": NaN, "learning_rate": 9.358288033443251e-05, "loss": 0.0, "step": 1053 }, { "epoch": 0.17727693213354637, "grad_norm": NaN, "learning_rate": 9.356970258847164e-05, "loss": 0.0, "step": 1054 }, { "epoch": 0.17744512656631065, "grad_norm": NaN, "learning_rate": 9.35565122557117e-05, "loss": 0.0, "step": 1055 }, { "epoch": 0.17761332099907493, "grad_norm": NaN, "learning_rate": 9.354330933996321e-05, "loss": 0.0, "step": 1056 }, { "epoch": 0.1777815154318392, "grad_norm": NaN, "learning_rate": 9.353009384504038e-05, "loss": 0.0, "step": 1057 }, { "epoch": 0.17794970986460348, "grad_norm": NaN, "learning_rate": 9.3516865774761e-05, "loss": 0.0, "step": 1058 }, { "epoch": 0.17811790429736776, "grad_norm": NaN, "learning_rate": 9.350362513294651e-05, "loss": 0.0, "step": 1059 }, { "epoch": 0.17828609873013204, "grad_norm": NaN, "learning_rate": 9.3490371923422e-05, "loss": 0.0, "step": 1060 }, { "epoch": 0.17845429316289632, "grad_norm": NaN, "learning_rate": 9.347710615001614e-05, "loss": 0.0, "step": 1061 }, { "epoch": 0.17862248759566057, "grad_norm": NaN, "learning_rate": 9.346382781656131e-05, "loss": 0.0, "step": 1062 }, { "epoch": 0.17879068202842485, "grad_norm": NaN, "learning_rate": 9.345053692689344e-05, "loss": 0.0, "step": 1063 }, { "epoch": 0.17895887646118913, "grad_norm": NaN, "learning_rate": 9.343723348485212e-05, "loss": 0.0, "step": 1064 }, { "epoch": 0.1791270708939534, "grad_norm": NaN, "learning_rate": 9.342391749428057e-05, "loss": 0.0, "step": 1065 }, { "epoch": 0.1792952653267177, "grad_norm": NaN, "learning_rate": 9.341058895902563e-05, "loss": 0.0, "step": 1066 }, { "epoch": 0.17946345975948197, "grad_norm": NaN, "learning_rate": 9.339724788293778e-05, "loss": 0.0, "step": 1067 }, { "epoch": 0.17963165419224625, "grad_norm": NaN, "learning_rate": 9.338389426987108e-05, "loss": 0.0, "step": 1068 }, { "epoch": 0.1797998486250105, "grad_norm": NaN, "learning_rate": 9.337052812368325e-05, "loss": 0.0, "step": 1069 }, { "epoch": 0.17996804305777478, "grad_norm": NaN, "learning_rate": 9.335714944823564e-05, "loss": 0.0, "step": 1070 }, { "epoch": 0.18013623749053906, "grad_norm": NaN, "learning_rate": 9.334375824739317e-05, "loss": 0.0, "step": 1071 }, { "epoch": 0.18030443192330334, "grad_norm": NaN, "learning_rate": 9.333035452502444e-05, "loss": 0.0, "step": 1072 }, { "epoch": 0.18047262635606762, "grad_norm": NaN, "learning_rate": 9.331693828500159e-05, "loss": 0.0, "step": 1073 }, { "epoch": 0.1806408207888319, "grad_norm": NaN, "learning_rate": 9.330350953120045e-05, "loss": 0.0, "step": 1074 }, { "epoch": 0.18080901522159618, "grad_norm": NaN, "learning_rate": 9.329006826750044e-05, "loss": 0.0, "step": 1075 }, { "epoch": 0.18097720965436043, "grad_norm": NaN, "learning_rate": 9.327661449778457e-05, "loss": 0.0, "step": 1076 }, { "epoch": 0.1811454040871247, "grad_norm": NaN, "learning_rate": 9.326314822593953e-05, "loss": 0.0, "step": 1077 }, { "epoch": 0.18131359851988899, "grad_norm": NaN, "learning_rate": 9.324966945585551e-05, "loss": 0.0, "step": 1078 }, { "epoch": 0.18148179295265326, "grad_norm": NaN, "learning_rate": 9.323617819142643e-05, "loss": 0.0, "step": 1079 }, { "epoch": 0.18164998738541754, "grad_norm": NaN, "learning_rate": 9.322267443654972e-05, "loss": 0.0, "step": 1080 }, { "epoch": 0.18181818181818182, "grad_norm": NaN, "learning_rate": 9.320915819512651e-05, "loss": 0.0, "step": 1081 }, { "epoch": 0.1819863762509461, "grad_norm": NaN, "learning_rate": 9.319562947106146e-05, "loss": 0.0, "step": 1082 }, { "epoch": 0.18215457068371038, "grad_norm": NaN, "learning_rate": 9.318208826826287e-05, "loss": 0.0, "step": 1083 }, { "epoch": 0.18232276511647463, "grad_norm": NaN, "learning_rate": 9.316853459064265e-05, "loss": 0.0, "step": 1084 }, { "epoch": 0.1824909595492389, "grad_norm": NaN, "learning_rate": 9.31549684421163e-05, "loss": 0.0, "step": 1085 }, { "epoch": 0.1826591539820032, "grad_norm": NaN, "learning_rate": 9.314138982660295e-05, "loss": 0.0, "step": 1086 }, { "epoch": 0.18282734841476747, "grad_norm": NaN, "learning_rate": 9.312779874802526e-05, "loss": 0.0, "step": 1087 }, { "epoch": 0.18299554284753175, "grad_norm": NaN, "learning_rate": 9.311419521030959e-05, "loss": 0.0, "step": 1088 }, { "epoch": 0.18316373728029603, "grad_norm": NaN, "learning_rate": 9.310057921738582e-05, "loss": 0.0, "step": 1089 }, { "epoch": 0.1833319317130603, "grad_norm": NaN, "learning_rate": 9.308695077318747e-05, "loss": 0.0, "step": 1090 }, { "epoch": 0.18350012614582456, "grad_norm": NaN, "learning_rate": 9.307330988165165e-05, "loss": 0.0, "step": 1091 }, { "epoch": 0.18366832057858884, "grad_norm": NaN, "learning_rate": 9.305965654671904e-05, "loss": 0.0, "step": 1092 }, { "epoch": 0.18383651501135312, "grad_norm": NaN, "learning_rate": 9.304599077233395e-05, "loss": 0.0, "step": 1093 }, { "epoch": 0.1840047094441174, "grad_norm": NaN, "learning_rate": 9.30323125624443e-05, "loss": 0.0, "step": 1094 }, { "epoch": 0.18417290387688168, "grad_norm": NaN, "learning_rate": 9.30186219210015e-05, "loss": 0.0, "step": 1095 }, { "epoch": 0.18434109830964596, "grad_norm": NaN, "learning_rate": 9.30049188519607e-05, "loss": 0.0, "step": 1096 }, { "epoch": 0.18450929274241024, "grad_norm": NaN, "learning_rate": 9.29912033592805e-05, "loss": 0.0, "step": 1097 }, { "epoch": 0.18467748717517452, "grad_norm": NaN, "learning_rate": 9.297747544692319e-05, "loss": 0.0, "step": 1098 }, { "epoch": 0.18484568160793877, "grad_norm": NaN, "learning_rate": 9.29637351188546e-05, "loss": 0.0, "step": 1099 }, { "epoch": 0.18501387604070305, "grad_norm": NaN, "learning_rate": 9.294998237904414e-05, "loss": 0.0, "step": 1100 }, { "epoch": 0.18518207047346733, "grad_norm": NaN, "learning_rate": 9.293621723146485e-05, "loss": 0.0, "step": 1101 }, { "epoch": 0.1853502649062316, "grad_norm": NaN, "learning_rate": 9.292243968009331e-05, "loss": 0.0, "step": 1102 }, { "epoch": 0.18551845933899588, "grad_norm": NaN, "learning_rate": 9.290864972890971e-05, "loss": 0.0, "step": 1103 }, { "epoch": 0.18568665377176016, "grad_norm": NaN, "learning_rate": 9.28948473818978e-05, "loss": 0.0, "step": 1104 }, { "epoch": 0.18585484820452444, "grad_norm": NaN, "learning_rate": 9.288103264304496e-05, "loss": 0.0, "step": 1105 }, { "epoch": 0.1860230426372887, "grad_norm": NaN, "learning_rate": 9.286720551634205e-05, "loss": 0.0, "step": 1106 }, { "epoch": 0.18619123707005297, "grad_norm": NaN, "learning_rate": 9.285336600578361e-05, "loss": 0.0, "step": 1107 }, { "epoch": 0.18635943150281725, "grad_norm": NaN, "learning_rate": 9.283951411536773e-05, "loss": 0.0, "step": 1108 }, { "epoch": 0.18652762593558153, "grad_norm": NaN, "learning_rate": 9.282564984909604e-05, "loss": 0.0, "step": 1109 }, { "epoch": 0.1866958203683458, "grad_norm": NaN, "learning_rate": 9.28117732109738e-05, "loss": 0.0, "step": 1110 }, { "epoch": 0.1868640148011101, "grad_norm": NaN, "learning_rate": 9.279788420500979e-05, "loss": 0.0, "step": 1111 }, { "epoch": 0.18703220923387437, "grad_norm": NaN, "learning_rate": 9.278398283521639e-05, "loss": 0.0, "step": 1112 }, { "epoch": 0.18720040366663862, "grad_norm": NaN, "learning_rate": 9.277006910560958e-05, "loss": 0.0, "step": 1113 }, { "epoch": 0.1873685980994029, "grad_norm": NaN, "learning_rate": 9.275614302020883e-05, "loss": 0.0, "step": 1114 }, { "epoch": 0.18753679253216718, "grad_norm": NaN, "learning_rate": 9.274220458303727e-05, "loss": 0.0, "step": 1115 }, { "epoch": 0.18770498696493146, "grad_norm": NaN, "learning_rate": 9.272825379812153e-05, "loss": 0.0, "step": 1116 }, { "epoch": 0.18787318139769574, "grad_norm": NaN, "learning_rate": 9.271429066949186e-05, "loss": 0.0, "step": 1117 }, { "epoch": 0.18804137583046002, "grad_norm": NaN, "learning_rate": 9.270031520118205e-05, "loss": 0.0, "step": 1118 }, { "epoch": 0.1882095702632243, "grad_norm": NaN, "learning_rate": 9.268632739722947e-05, "loss": 0.0, "step": 1119 }, { "epoch": 0.18837776469598858, "grad_norm": NaN, "learning_rate": 9.267232726167499e-05, "loss": 0.0, "step": 1120 }, { "epoch": 0.18854595912875283, "grad_norm": NaN, "learning_rate": 9.265831479856313e-05, "loss": 0.0, "step": 1121 }, { "epoch": 0.1887141535615171, "grad_norm": NaN, "learning_rate": 9.264429001194193e-05, "loss": 0.0, "step": 1122 }, { "epoch": 0.18888234799428139, "grad_norm": NaN, "learning_rate": 9.263025290586299e-05, "loss": 0.0, "step": 1123 }, { "epoch": 0.18905054242704566, "grad_norm": NaN, "learning_rate": 9.261620348438145e-05, "loss": 0.0, "step": 1124 }, { "epoch": 0.18921873685980994, "grad_norm": NaN, "learning_rate": 9.260214175155606e-05, "loss": 0.0, "step": 1125 }, { "epoch": 0.18938693129257422, "grad_norm": NaN, "learning_rate": 9.258806771144911e-05, "loss": 0.0, "step": 1126 }, { "epoch": 0.1895551257253385, "grad_norm": NaN, "learning_rate": 9.257398136812638e-05, "loss": 0.0, "step": 1127 }, { "epoch": 0.18972332015810275, "grad_norm": NaN, "learning_rate": 9.255988272565732e-05, "loss": 0.0, "step": 1128 }, { "epoch": 0.18989151459086703, "grad_norm": NaN, "learning_rate": 9.254577178811482e-05, "loss": 0.0, "step": 1129 }, { "epoch": 0.1900597090236313, "grad_norm": NaN, "learning_rate": 9.25316485595754e-05, "loss": 0.0, "step": 1130 }, { "epoch": 0.1902279034563956, "grad_norm": NaN, "learning_rate": 9.251751304411909e-05, "loss": 0.0, "step": 1131 }, { "epoch": 0.19039609788915987, "grad_norm": NaN, "learning_rate": 9.250336524582948e-05, "loss": 0.0, "step": 1132 }, { "epoch": 0.19056429232192415, "grad_norm": NaN, "learning_rate": 9.248920516879371e-05, "loss": 0.0, "step": 1133 }, { "epoch": 0.19073248675468843, "grad_norm": NaN, "learning_rate": 9.24750328171025e-05, "loss": 0.0, "step": 1134 }, { "epoch": 0.1909006811874527, "grad_norm": NaN, "learning_rate": 9.246084819485004e-05, "loss": 0.0, "step": 1135 }, { "epoch": 0.19106887562021696, "grad_norm": NaN, "learning_rate": 9.244665130613411e-05, "loss": 0.0, "step": 1136 }, { "epoch": 0.19123707005298124, "grad_norm": NaN, "learning_rate": 9.243244215505607e-05, "loss": 0.0, "step": 1137 }, { "epoch": 0.19140526448574552, "grad_norm": NaN, "learning_rate": 9.241822074572076e-05, "loss": 0.0, "step": 1138 }, { "epoch": 0.1915734589185098, "grad_norm": NaN, "learning_rate": 9.240398708223656e-05, "loss": 0.0, "step": 1139 }, { "epoch": 0.19174165335127408, "grad_norm": NaN, "learning_rate": 9.238974116871548e-05, "loss": 0.0, "step": 1140 }, { "epoch": 0.19190984778403836, "grad_norm": NaN, "learning_rate": 9.237548300927294e-05, "loss": 0.0, "step": 1141 }, { "epoch": 0.19207804221680264, "grad_norm": NaN, "learning_rate": 9.236121260802802e-05, "loss": 0.0, "step": 1142 }, { "epoch": 0.1922462366495669, "grad_norm": NaN, "learning_rate": 9.234692996910323e-05, "loss": 0.0, "step": 1143 }, { "epoch": 0.19241443108233117, "grad_norm": NaN, "learning_rate": 9.233263509662469e-05, "loss": 0.0, "step": 1144 }, { "epoch": 0.19258262551509545, "grad_norm": NaN, "learning_rate": 9.231832799472202e-05, "loss": 0.0, "step": 1145 }, { "epoch": 0.19275081994785973, "grad_norm": NaN, "learning_rate": 9.23040086675284e-05, "loss": 0.0, "step": 1146 }, { "epoch": 0.192919014380624, "grad_norm": NaN, "learning_rate": 9.228967711918048e-05, "loss": 0.0, "step": 1147 }, { "epoch": 0.19308720881338828, "grad_norm": NaN, "learning_rate": 9.227533335381854e-05, "loss": 0.0, "step": 1148 }, { "epoch": 0.19325540324615256, "grad_norm": NaN, "learning_rate": 9.22609773755863e-05, "loss": 0.0, "step": 1149 }, { "epoch": 0.19342359767891681, "grad_norm": NaN, "learning_rate": 9.224660918863104e-05, "loss": 0.0, "step": 1150 }, { "epoch": 0.1935917921116811, "grad_norm": NaN, "learning_rate": 9.223222879710356e-05, "loss": 0.0, "step": 1151 }, { "epoch": 0.19375998654444537, "grad_norm": NaN, "learning_rate": 9.221783620515823e-05, "loss": 0.0, "step": 1152 }, { "epoch": 0.19392818097720965, "grad_norm": NaN, "learning_rate": 9.220343141695287e-05, "loss": 0.0, "step": 1153 }, { "epoch": 0.19409637540997393, "grad_norm": NaN, "learning_rate": 9.218901443664889e-05, "loss": 0.0, "step": 1154 }, { "epoch": 0.1942645698427382, "grad_norm": NaN, "learning_rate": 9.217458526841118e-05, "loss": 0.0, "step": 1155 }, { "epoch": 0.1944327642755025, "grad_norm": NaN, "learning_rate": 9.216014391640818e-05, "loss": 0.0, "step": 1156 }, { "epoch": 0.19460095870826677, "grad_norm": NaN, "learning_rate": 9.214569038481183e-05, "loss": 0.0, "step": 1157 }, { "epoch": 0.19476915314103102, "grad_norm": NaN, "learning_rate": 9.213122467779756e-05, "loss": 0.0, "step": 1158 }, { "epoch": 0.1949373475737953, "grad_norm": NaN, "learning_rate": 9.211674679954439e-05, "loss": 0.0, "step": 1159 }, { "epoch": 0.19510554200655958, "grad_norm": NaN, "learning_rate": 9.21022567542348e-05, "loss": 0.0, "step": 1160 }, { "epoch": 0.19527373643932386, "grad_norm": NaN, "learning_rate": 9.20877545460548e-05, "loss": 0.0, "step": 1161 }, { "epoch": 0.19544193087208814, "grad_norm": NaN, "learning_rate": 9.207324017919393e-05, "loss": 0.0, "step": 1162 }, { "epoch": 0.19561012530485242, "grad_norm": NaN, "learning_rate": 9.205871365784521e-05, "loss": 0.0, "step": 1163 }, { "epoch": 0.1957783197376167, "grad_norm": NaN, "learning_rate": 9.204417498620522e-05, "loss": 0.0, "step": 1164 }, { "epoch": 0.19594651417038095, "grad_norm": NaN, "learning_rate": 9.202962416847398e-05, "loss": 0.0, "step": 1165 }, { "epoch": 0.19611470860314523, "grad_norm": NaN, "learning_rate": 9.201506120885507e-05, "loss": 0.0, "step": 1166 }, { "epoch": 0.1962829030359095, "grad_norm": NaN, "learning_rate": 9.20004861115556e-05, "loss": 0.0, "step": 1167 }, { "epoch": 0.19645109746867379, "grad_norm": NaN, "learning_rate": 9.198589888078611e-05, "loss": 0.0, "step": 1168 }, { "epoch": 0.19661929190143806, "grad_norm": NaN, "learning_rate": 9.19712995207607e-05, "loss": 0.0, "step": 1169 }, { "epoch": 0.19678748633420234, "grad_norm": NaN, "learning_rate": 9.195668803569696e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.19695568076696662, "grad_norm": NaN, "learning_rate": 9.194206442981601e-05, "loss": 0.0, "step": 1171 }, { "epoch": 0.1971238751997309, "grad_norm": NaN, "learning_rate": 9.192742870734241e-05, "loss": 0.0, "step": 1172 }, { "epoch": 0.19729206963249515, "grad_norm": NaN, "learning_rate": 9.191278087250426e-05, "loss": 0.0, "step": 1173 }, { "epoch": 0.19746026406525943, "grad_norm": NaN, "learning_rate": 9.189812092953319e-05, "loss": 0.0, "step": 1174 }, { "epoch": 0.1976284584980237, "grad_norm": NaN, "learning_rate": 9.188344888266425e-05, "loss": 0.0, "step": 1175 }, { "epoch": 0.197796652930788, "grad_norm": NaN, "learning_rate": 9.186876473613605e-05, "loss": 0.0, "step": 1176 }, { "epoch": 0.19796484736355227, "grad_norm": NaN, "learning_rate": 9.185406849419067e-05, "loss": 0.0, "step": 1177 }, { "epoch": 0.19813304179631655, "grad_norm": NaN, "learning_rate": 9.183936016107371e-05, "loss": 0.0, "step": 1178 }, { "epoch": 0.19830123622908083, "grad_norm": NaN, "learning_rate": 9.182463974103422e-05, "loss": 0.0, "step": 1179 }, { "epoch": 0.19846943066184508, "grad_norm": NaN, "learning_rate": 9.180990723832478e-05, "loss": 0.0, "step": 1180 }, { "epoch": 0.19863762509460936, "grad_norm": NaN, "learning_rate": 9.179516265720143e-05, "loss": 0.0, "step": 1181 }, { "epoch": 0.19880581952737364, "grad_norm": NaN, "learning_rate": 9.178040600192371e-05, "loss": 0.0, "step": 1182 }, { "epoch": 0.19897401396013792, "grad_norm": NaN, "learning_rate": 9.176563727675467e-05, "loss": 0.0, "step": 1183 }, { "epoch": 0.1991422083929022, "grad_norm": NaN, "learning_rate": 9.175085648596083e-05, "loss": 0.0, "step": 1184 }, { "epoch": 0.19931040282566648, "grad_norm": NaN, "learning_rate": 9.173606363381219e-05, "loss": 0.0, "step": 1185 }, { "epoch": 0.19947859725843076, "grad_norm": NaN, "learning_rate": 9.172125872458223e-05, "loss": 0.0, "step": 1186 }, { "epoch": 0.199646791691195, "grad_norm": NaN, "learning_rate": 9.170644176254792e-05, "loss": 0.0, "step": 1187 }, { "epoch": 0.1998149861239593, "grad_norm": NaN, "learning_rate": 9.169161275198975e-05, "loss": 0.0, "step": 1188 }, { "epoch": 0.19998318055672357, "grad_norm": NaN, "learning_rate": 9.16767716971916e-05, "loss": 0.0, "step": 1189 }, { "epoch": 0.20015137498948785, "grad_norm": NaN, "learning_rate": 9.166191860244093e-05, "loss": 0.0, "step": 1190 }, { "epoch": 0.20031956942225213, "grad_norm": NaN, "learning_rate": 9.16470534720286e-05, "loss": 0.0, "step": 1191 }, { "epoch": 0.2004877638550164, "grad_norm": NaN, "learning_rate": 9.163217631024901e-05, "loss": 0.0, "step": 1192 }, { "epoch": 0.20065595828778068, "grad_norm": NaN, "learning_rate": 9.161728712139998e-05, "loss": 0.0, "step": 1193 }, { "epoch": 0.20082415272054496, "grad_norm": NaN, "learning_rate": 9.160238590978286e-05, "loss": 0.0, "step": 1194 }, { "epoch": 0.20099234715330921, "grad_norm": NaN, "learning_rate": 9.158747267970242e-05, "loss": 0.0, "step": 1195 }, { "epoch": 0.2011605415860735, "grad_norm": NaN, "learning_rate": 9.157254743546691e-05, "loss": 0.0, "step": 1196 }, { "epoch": 0.20132873601883777, "grad_norm": NaN, "learning_rate": 9.15576101813881e-05, "loss": 0.0, "step": 1197 }, { "epoch": 0.20149693045160205, "grad_norm": NaN, "learning_rate": 9.154266092178117e-05, "loss": 0.0, "step": 1198 }, { "epoch": 0.20166512488436633, "grad_norm": NaN, "learning_rate": 9.152769966096482e-05, "loss": 0.0, "step": 1199 }, { "epoch": 0.2018333193171306, "grad_norm": NaN, "learning_rate": 9.151272640326116e-05, "loss": 0.0, "step": 1200 }, { "epoch": 0.2020015137498949, "grad_norm": NaN, "learning_rate": 9.149774115299584e-05, "loss": 0.0, "step": 1201 }, { "epoch": 0.20216970818265914, "grad_norm": NaN, "learning_rate": 9.148274391449788e-05, "loss": 0.0, "step": 1202 }, { "epoch": 0.20233790261542342, "grad_norm": NaN, "learning_rate": 9.146773469209982e-05, "loss": 0.0, "step": 1203 }, { "epoch": 0.2025060970481877, "grad_norm": NaN, "learning_rate": 9.145271349013769e-05, "loss": 0.0, "step": 1204 }, { "epoch": 0.20267429148095198, "grad_norm": NaN, "learning_rate": 9.143768031295094e-05, "loss": 0.0, "step": 1205 }, { "epoch": 0.20284248591371626, "grad_norm": NaN, "learning_rate": 9.142263516488245e-05, "loss": 0.0, "step": 1206 }, { "epoch": 0.20301068034648054, "grad_norm": NaN, "learning_rate": 9.140757805027864e-05, "loss": 0.0, "step": 1207 }, { "epoch": 0.20317887477924482, "grad_norm": NaN, "learning_rate": 9.139250897348933e-05, "loss": 0.0, "step": 1208 }, { "epoch": 0.2033470692120091, "grad_norm": NaN, "learning_rate": 9.137742793886779e-05, "loss": 0.0, "step": 1209 }, { "epoch": 0.20351526364477335, "grad_norm": NaN, "learning_rate": 9.136233495077077e-05, "loss": 0.0, "step": 1210 }, { "epoch": 0.20368345807753763, "grad_norm": NaN, "learning_rate": 9.134723001355847e-05, "loss": 0.0, "step": 1211 }, { "epoch": 0.2038516525103019, "grad_norm": NaN, "learning_rate": 9.133211313159453e-05, "loss": 0.0, "step": 1212 }, { "epoch": 0.20401984694306619, "grad_norm": NaN, "learning_rate": 9.131698430924605e-05, "loss": 0.0, "step": 1213 }, { "epoch": 0.20418804137583046, "grad_norm": NaN, "learning_rate": 9.130184355088359e-05, "loss": 0.0, "step": 1214 }, { "epoch": 0.20435623580859474, "grad_norm": NaN, "learning_rate": 9.128669086088112e-05, "loss": 0.0, "step": 1215 }, { "epoch": 0.20452443024135902, "grad_norm": NaN, "learning_rate": 9.127152624361612e-05, "loss": 0.0, "step": 1216 }, { "epoch": 0.20469262467412327, "grad_norm": NaN, "learning_rate": 9.125634970346945e-05, "loss": 0.0, "step": 1217 }, { "epoch": 0.20486081910688755, "grad_norm": NaN, "learning_rate": 9.124116124482545e-05, "loss": 0.0, "step": 1218 }, { "epoch": 0.20502901353965183, "grad_norm": NaN, "learning_rate": 9.122596087207187e-05, "loss": 0.0, "step": 1219 }, { "epoch": 0.2051972079724161, "grad_norm": NaN, "learning_rate": 9.121074858959997e-05, "loss": 0.0, "step": 1220 }, { "epoch": 0.2053654024051804, "grad_norm": NaN, "learning_rate": 9.11955244018044e-05, "loss": 0.0, "step": 1221 }, { "epoch": 0.20553359683794467, "grad_norm": NaN, "learning_rate": 9.118028831308322e-05, "loss": 0.0, "step": 1222 }, { "epoch": 0.20570179127070895, "grad_norm": NaN, "learning_rate": 9.116504032783802e-05, "loss": 0.0, "step": 1223 }, { "epoch": 0.2058699857034732, "grad_norm": NaN, "learning_rate": 9.114978045047372e-05, "loss": 0.0, "step": 1224 }, { "epoch": 0.20603818013623748, "grad_norm": NaN, "learning_rate": 9.113450868539876e-05, "loss": 0.0, "step": 1225 }, { "epoch": 0.20620637456900176, "grad_norm": NaN, "learning_rate": 9.111922503702498e-05, "loss": 0.0, "step": 1226 }, { "epoch": 0.20637456900176604, "grad_norm": NaN, "learning_rate": 9.110392950976764e-05, "loss": 0.0, "step": 1227 }, { "epoch": 0.20654276343453032, "grad_norm": NaN, "learning_rate": 9.108862210804548e-05, "loss": 0.0, "step": 1228 }, { "epoch": 0.2067109578672946, "grad_norm": NaN, "learning_rate": 9.107330283628058e-05, "loss": 0.0, "step": 1229 }, { "epoch": 0.20687915230005888, "grad_norm": NaN, "learning_rate": 9.105797169889854e-05, "loss": 0.0, "step": 1230 }, { "epoch": 0.20704734673282316, "grad_norm": NaN, "learning_rate": 9.104262870032837e-05, "loss": 0.0, "step": 1231 }, { "epoch": 0.2072155411655874, "grad_norm": NaN, "learning_rate": 9.102727384500247e-05, "loss": 0.0, "step": 1232 }, { "epoch": 0.2073837355983517, "grad_norm": NaN, "learning_rate": 9.101190713735667e-05, "loss": 0.0, "step": 1233 }, { "epoch": 0.20755193003111597, "grad_norm": NaN, "learning_rate": 9.099652858183028e-05, "loss": 0.0, "step": 1234 }, { "epoch": 0.20772012446388025, "grad_norm": NaN, "learning_rate": 9.098113818286595e-05, "loss": 0.0, "step": 1235 }, { "epoch": 0.20788831889664453, "grad_norm": NaN, "learning_rate": 9.096573594490982e-05, "loss": 0.0, "step": 1236 }, { "epoch": 0.2080565133294088, "grad_norm": NaN, "learning_rate": 9.095032187241143e-05, "loss": 0.0, "step": 1237 }, { "epoch": 0.20822470776217308, "grad_norm": NaN, "learning_rate": 9.09348959698237e-05, "loss": 0.0, "step": 1238 }, { "epoch": 0.20839290219493734, "grad_norm": NaN, "learning_rate": 9.091945824160304e-05, "loss": 0.0, "step": 1239 }, { "epoch": 0.20856109662770161, "grad_norm": NaN, "learning_rate": 9.09040086922092e-05, "loss": 0.0, "step": 1240 }, { "epoch": 0.2087292910604659, "grad_norm": NaN, "learning_rate": 9.088854732610543e-05, "loss": 0.0, "step": 1241 }, { "epoch": 0.20889748549323017, "grad_norm": NaN, "learning_rate": 9.08730741477583e-05, "loss": 0.0, "step": 1242 }, { "epoch": 0.20906567992599445, "grad_norm": NaN, "learning_rate": 9.085758916163785e-05, "loss": 0.0, "step": 1243 }, { "epoch": 0.20923387435875873, "grad_norm": NaN, "learning_rate": 9.084209237221752e-05, "loss": 0.0, "step": 1244 }, { "epoch": 0.209402068791523, "grad_norm": NaN, "learning_rate": 9.082658378397417e-05, "loss": 0.0, "step": 1245 }, { "epoch": 0.2095702632242873, "grad_norm": NaN, "learning_rate": 9.081106340138803e-05, "loss": 0.0, "step": 1246 }, { "epoch": 0.20973845765705154, "grad_norm": NaN, "learning_rate": 9.07955312289428e-05, "loss": 0.0, "step": 1247 }, { "epoch": 0.20990665208981582, "grad_norm": NaN, "learning_rate": 9.077998727112554e-05, "loss": 0.0, "step": 1248 }, { "epoch": 0.2100748465225801, "grad_norm": NaN, "learning_rate": 9.076443153242671e-05, "loss": 0.0, "step": 1249 }, { "epoch": 0.21024304095534438, "grad_norm": NaN, "learning_rate": 9.07488640173402e-05, "loss": 0.0, "step": 1250 }, { "epoch": 0.21041123538810866, "grad_norm": NaN, "learning_rate": 9.073328473036328e-05, "loss": 0.0, "step": 1251 }, { "epoch": 0.21057942982087294, "grad_norm": NaN, "learning_rate": 9.071769367599665e-05, "loss": 0.0, "step": 1252 }, { "epoch": 0.21074762425363722, "grad_norm": NaN, "learning_rate": 9.070209085874437e-05, "loss": 0.0, "step": 1253 }, { "epoch": 0.21091581868640147, "grad_norm": NaN, "learning_rate": 9.068647628311395e-05, "loss": 0.0, "step": 1254 }, { "epoch": 0.21108401311916575, "grad_norm": NaN, "learning_rate": 9.067084995361623e-05, "loss": 0.0, "step": 1255 }, { "epoch": 0.21125220755193003, "grad_norm": NaN, "learning_rate": 9.065521187476551e-05, "loss": 0.0, "step": 1256 }, { "epoch": 0.2114204019846943, "grad_norm": NaN, "learning_rate": 9.063956205107944e-05, "loss": 0.0, "step": 1257 }, { "epoch": 0.21158859641745859, "grad_norm": NaN, "learning_rate": 9.062390048707907e-05, "loss": 0.0, "step": 1258 }, { "epoch": 0.21175679085022286, "grad_norm": NaN, "learning_rate": 9.060822718728888e-05, "loss": 0.0, "step": 1259 }, { "epoch": 0.21192498528298714, "grad_norm": NaN, "learning_rate": 9.059254215623668e-05, "loss": 0.0, "step": 1260 }, { "epoch": 0.2120931797157514, "grad_norm": NaN, "learning_rate": 9.057684539845372e-05, "loss": 0.0, "step": 1261 }, { "epoch": 0.21226137414851567, "grad_norm": NaN, "learning_rate": 9.056113691847461e-05, "loss": 0.0, "step": 1262 }, { "epoch": 0.21242956858127995, "grad_norm": NaN, "learning_rate": 9.054541672083736e-05, "loss": 0.0, "step": 1263 }, { "epoch": 0.21259776301404423, "grad_norm": NaN, "learning_rate": 9.052968481008334e-05, "loss": 0.0, "step": 1264 }, { "epoch": 0.2127659574468085, "grad_norm": NaN, "learning_rate": 9.051394119075736e-05, "loss": 0.0, "step": 1265 }, { "epoch": 0.2129341518795728, "grad_norm": NaN, "learning_rate": 9.049818586740755e-05, "loss": 0.0, "step": 1266 }, { "epoch": 0.21310234631233707, "grad_norm": NaN, "learning_rate": 9.048241884458543e-05, "loss": 0.0, "step": 1267 }, { "epoch": 0.21327054074510135, "grad_norm": NaN, "learning_rate": 9.046664012684594e-05, "loss": 0.0, "step": 1268 }, { "epoch": 0.2134387351778656, "grad_norm": NaN, "learning_rate": 9.045084971874738e-05, "loss": 0.0, "step": 1269 }, { "epoch": 0.21360692961062988, "grad_norm": NaN, "learning_rate": 9.04350476248514e-05, "loss": 0.0, "step": 1270 }, { "epoch": 0.21377512404339416, "grad_norm": NaN, "learning_rate": 9.041923384972306e-05, "loss": 0.0, "step": 1271 }, { "epoch": 0.21394331847615844, "grad_norm": NaN, "learning_rate": 9.040340839793078e-05, "loss": 0.0, "step": 1272 }, { "epoch": 0.21411151290892272, "grad_norm": NaN, "learning_rate": 9.038757127404638e-05, "loss": 0.0, "step": 1273 }, { "epoch": 0.214279707341687, "grad_norm": NaN, "learning_rate": 9.037172248264498e-05, "loss": 0.0, "step": 1274 }, { "epoch": 0.21444790177445128, "grad_norm": NaN, "learning_rate": 9.035586202830517e-05, "loss": 0.0, "step": 1275 }, { "epoch": 0.21461609620721553, "grad_norm": NaN, "learning_rate": 9.033998991560881e-05, "loss": 0.0, "step": 1276 }, { "epoch": 0.2147842906399798, "grad_norm": NaN, "learning_rate": 9.032410614914119e-05, "loss": 0.0, "step": 1277 }, { "epoch": 0.2149524850727441, "grad_norm": NaN, "learning_rate": 9.030821073349098e-05, "loss": 0.0, "step": 1278 }, { "epoch": 0.21512067950550837, "grad_norm": NaN, "learning_rate": 9.029230367325017e-05, "loss": 0.0, "step": 1279 }, { "epoch": 0.21528887393827265, "grad_norm": NaN, "learning_rate": 9.027638497301412e-05, "loss": 0.0, "step": 1280 }, { "epoch": 0.21545706837103692, "grad_norm": NaN, "learning_rate": 9.026045463738155e-05, "loss": 0.0, "step": 1281 }, { "epoch": 0.2156252628038012, "grad_norm": NaN, "learning_rate": 9.024451267095461e-05, "loss": 0.0, "step": 1282 }, { "epoch": 0.21579345723656548, "grad_norm": NaN, "learning_rate": 9.022855907833871e-05, "loss": 0.0, "step": 1283 }, { "epoch": 0.21596165166932974, "grad_norm": NaN, "learning_rate": 9.021259386414269e-05, "loss": 0.0, "step": 1284 }, { "epoch": 0.21612984610209401, "grad_norm": NaN, "learning_rate": 9.01966170329787e-05, "loss": 0.0, "step": 1285 }, { "epoch": 0.2162980405348583, "grad_norm": NaN, "learning_rate": 9.018062858946227e-05, "loss": 0.0, "step": 1286 }, { "epoch": 0.21646623496762257, "grad_norm": NaN, "learning_rate": 9.01646285382123e-05, "loss": 0.0, "step": 1287 }, { "epoch": 0.21663442940038685, "grad_norm": NaN, "learning_rate": 9.014861688385101e-05, "loss": 0.0, "step": 1288 }, { "epoch": 0.21680262383315113, "grad_norm": NaN, "learning_rate": 9.0132593631004e-05, "loss": 0.0, "step": 1289 }, { "epoch": 0.2169708182659154, "grad_norm": NaN, "learning_rate": 9.011655878430019e-05, "loss": 0.0, "step": 1290 }, { "epoch": 0.21713901269867966, "grad_norm": NaN, "learning_rate": 9.010051234837187e-05, "loss": 0.0, "step": 1291 }, { "epoch": 0.21730720713144394, "grad_norm": NaN, "learning_rate": 9.00844543278547e-05, "loss": 0.0, "step": 1292 }, { "epoch": 0.21747540156420822, "grad_norm": NaN, "learning_rate": 9.006838472738766e-05, "loss": 0.0, "step": 1293 }, { "epoch": 0.2176435959969725, "grad_norm": NaN, "learning_rate": 9.005230355161304e-05, "loss": 0.0, "step": 1294 }, { "epoch": 0.21781179042973678, "grad_norm": NaN, "learning_rate": 9.003621080517654e-05, "loss": 0.0, "step": 1295 }, { "epoch": 0.21797998486250106, "grad_norm": NaN, "learning_rate": 9.002010649272716e-05, "loss": 0.0, "step": 1296 }, { "epoch": 0.21814817929526534, "grad_norm": NaN, "learning_rate": 9.000399061891728e-05, "loss": 0.0, "step": 1297 }, { "epoch": 0.2183163737280296, "grad_norm": NaN, "learning_rate": 8.998786318840258e-05, "loss": 0.0, "step": 1298 }, { "epoch": 0.21848456816079387, "grad_norm": NaN, "learning_rate": 8.99717242058421e-05, "loss": 0.0, "step": 1299 }, { "epoch": 0.21865276259355815, "grad_norm": NaN, "learning_rate": 8.99555736758982e-05, "loss": 0.0, "step": 1300 }, { "epoch": 0.21882095702632243, "grad_norm": NaN, "learning_rate": 8.993941160323658e-05, "loss": 0.0, "step": 1301 }, { "epoch": 0.2189891514590867, "grad_norm": NaN, "learning_rate": 8.992323799252631e-05, "loss": 0.0, "step": 1302 }, { "epoch": 0.21915734589185099, "grad_norm": NaN, "learning_rate": 8.990705284843974e-05, "loss": 0.0, "step": 1303 }, { "epoch": 0.21932554032461526, "grad_norm": NaN, "learning_rate": 8.98908561756526e-05, "loss": 0.0, "step": 1304 }, { "epoch": 0.21949373475737954, "grad_norm": NaN, "learning_rate": 8.987464797884391e-05, "loss": 0.0, "step": 1305 }, { "epoch": 0.2196619291901438, "grad_norm": NaN, "learning_rate": 8.985842826269604e-05, "loss": 0.0, "step": 1306 }, { "epoch": 0.21983012362290807, "grad_norm": NaN, "learning_rate": 8.984219703189468e-05, "loss": 0.0, "step": 1307 }, { "epoch": 0.21999831805567235, "grad_norm": NaN, "learning_rate": 8.982595429112885e-05, "loss": 0.0, "step": 1308 }, { "epoch": 0.22016651248843663, "grad_norm": NaN, "learning_rate": 8.980970004509091e-05, "loss": 0.0, "step": 1309 }, { "epoch": 0.2203347069212009, "grad_norm": NaN, "learning_rate": 8.979343429847653e-05, "loss": 0.0, "step": 1310 }, { "epoch": 0.2205029013539652, "grad_norm": NaN, "learning_rate": 8.977715705598469e-05, "loss": 0.0, "step": 1311 }, { "epoch": 0.22067109578672947, "grad_norm": NaN, "learning_rate": 8.976086832231769e-05, "loss": 0.0, "step": 1312 }, { "epoch": 0.22083929021949372, "grad_norm": NaN, "learning_rate": 8.97445681021812e-05, "loss": 0.0, "step": 1313 }, { "epoch": 0.221007484652258, "grad_norm": NaN, "learning_rate": 8.972825640028412e-05, "loss": 0.0, "step": 1314 }, { "epoch": 0.22117567908502228, "grad_norm": NaN, "learning_rate": 8.971193322133877e-05, "loss": 0.0, "step": 1315 }, { "epoch": 0.22134387351778656, "grad_norm": NaN, "learning_rate": 8.969559857006072e-05, "loss": 0.0, "step": 1316 }, { "epoch": 0.22151206795055084, "grad_norm": NaN, "learning_rate": 8.967925245116884e-05, "loss": 0.0, "step": 1317 }, { "epoch": 0.22168026238331512, "grad_norm": NaN, "learning_rate": 8.966289486938538e-05, "loss": 0.0, "step": 1318 }, { "epoch": 0.2218484568160794, "grad_norm": NaN, "learning_rate": 8.964652582943582e-05, "loss": 0.0, "step": 1319 }, { "epoch": 0.22201665124884365, "grad_norm": NaN, "learning_rate": 8.963014533604903e-05, "loss": 0.0, "step": 1320 }, { "epoch": 0.22218484568160793, "grad_norm": NaN, "learning_rate": 8.961375339395712e-05, "loss": 0.0, "step": 1321 }, { "epoch": 0.2223530401143722, "grad_norm": NaN, "learning_rate": 8.959735000789558e-05, "loss": 0.0, "step": 1322 }, { "epoch": 0.2225212345471365, "grad_norm": NaN, "learning_rate": 8.958093518260312e-05, "loss": 0.0, "step": 1323 }, { "epoch": 0.22268942897990077, "grad_norm": NaN, "learning_rate": 8.956450892282182e-05, "loss": 0.0, "step": 1324 }, { "epoch": 0.22285762341266505, "grad_norm": NaN, "learning_rate": 8.954807123329704e-05, "loss": 0.0, "step": 1325 }, { "epoch": 0.22302581784542932, "grad_norm": NaN, "learning_rate": 8.953162211877744e-05, "loss": 0.0, "step": 1326 }, { "epoch": 0.2231940122781936, "grad_norm": NaN, "learning_rate": 8.9515161584015e-05, "loss": 0.0, "step": 1327 }, { "epoch": 0.22336220671095786, "grad_norm": NaN, "learning_rate": 8.949868963376496e-05, "loss": 0.0, "step": 1328 }, { "epoch": 0.22353040114372213, "grad_norm": NaN, "learning_rate": 8.94822062727859e-05, "loss": 0.0, "step": 1329 }, { "epoch": 0.22369859557648641, "grad_norm": NaN, "learning_rate": 8.946571150583969e-05, "loss": 0.0, "step": 1330 }, { "epoch": 0.2238667900092507, "grad_norm": NaN, "learning_rate": 8.944920533769146e-05, "loss": 0.0, "step": 1331 }, { "epoch": 0.22403498444201497, "grad_norm": NaN, "learning_rate": 8.943268777310964e-05, "loss": 0.0, "step": 1332 }, { "epoch": 0.22420317887477925, "grad_norm": NaN, "learning_rate": 8.941615881686603e-05, "loss": 0.0, "step": 1333 }, { "epoch": 0.22437137330754353, "grad_norm": NaN, "learning_rate": 8.939961847373559e-05, "loss": 0.0, "step": 1334 }, { "epoch": 0.22453956774030778, "grad_norm": NaN, "learning_rate": 8.93830667484967e-05, "loss": 0.0, "step": 1335 }, { "epoch": 0.22470776217307206, "grad_norm": NaN, "learning_rate": 8.936650364593093e-05, "loss": 0.0, "step": 1336 }, { "epoch": 0.22487595660583634, "grad_norm": NaN, "learning_rate": 8.934992917082318e-05, "loss": 0.0, "step": 1337 }, { "epoch": 0.22504415103860062, "grad_norm": NaN, "learning_rate": 8.933334332796165e-05, "loss": 0.0, "step": 1338 }, { "epoch": 0.2252123454713649, "grad_norm": NaN, "learning_rate": 8.931674612213778e-05, "loss": 0.0, "step": 1339 }, { "epoch": 0.22538053990412918, "grad_norm": NaN, "learning_rate": 8.930013755814634e-05, "loss": 0.0, "step": 1340 }, { "epoch": 0.22554873433689346, "grad_norm": NaN, "learning_rate": 8.928351764078536e-05, "loss": 0.0, "step": 1341 }, { "epoch": 0.22571692876965774, "grad_norm": NaN, "learning_rate": 8.926688637485609e-05, "loss": 0.0, "step": 1342 }, { "epoch": 0.225885123202422, "grad_norm": NaN, "learning_rate": 8.92502437651632e-05, "loss": 0.0, "step": 1343 }, { "epoch": 0.22605331763518627, "grad_norm": NaN, "learning_rate": 8.923358981651451e-05, "loss": 0.0, "step": 1344 }, { "epoch": 0.22622151206795055, "grad_norm": NaN, "learning_rate": 8.921692453372114e-05, "loss": 0.0, "step": 1345 }, { "epoch": 0.22638970650071483, "grad_norm": NaN, "learning_rate": 8.920024792159754e-05, "loss": 0.0, "step": 1346 }, { "epoch": 0.2265579009334791, "grad_norm": NaN, "learning_rate": 8.91835599849614e-05, "loss": 0.0, "step": 1347 }, { "epoch": 0.22672609536624339, "grad_norm": NaN, "learning_rate": 8.916686072863362e-05, "loss": 0.0, "step": 1348 }, { "epoch": 0.22689428979900766, "grad_norm": NaN, "learning_rate": 8.915015015743851e-05, "loss": 0.0, "step": 1349 }, { "epoch": 0.22706248423177192, "grad_norm": NaN, "learning_rate": 8.913342827620351e-05, "loss": 0.0, "step": 1350 }, { "epoch": 0.2272306786645362, "grad_norm": NaN, "learning_rate": 8.911669508975941e-05, "loss": 0.0, "step": 1351 }, { "epoch": 0.22739887309730047, "grad_norm": NaN, "learning_rate": 8.90999506029402e-05, "loss": 0.0, "step": 1352 }, { "epoch": 0.22756706753006475, "grad_norm": NaN, "learning_rate": 8.908319482058325e-05, "loss": 0.0, "step": 1353 }, { "epoch": 0.22773526196282903, "grad_norm": NaN, "learning_rate": 8.906642774752905e-05, "loss": 0.0, "step": 1354 }, { "epoch": 0.2279034563955933, "grad_norm": NaN, "learning_rate": 8.904964938862144e-05, "loss": 0.0, "step": 1355 }, { "epoch": 0.2280716508283576, "grad_norm": NaN, "learning_rate": 8.903285974870751e-05, "loss": 0.0, "step": 1356 }, { "epoch": 0.22823984526112184, "grad_norm": NaN, "learning_rate": 8.901605883263759e-05, "loss": 0.0, "step": 1357 }, { "epoch": 0.22840803969388612, "grad_norm": NaN, "learning_rate": 8.899924664526527e-05, "loss": 0.0, "step": 1358 }, { "epoch": 0.2285762341266504, "grad_norm": NaN, "learning_rate": 8.898242319144743e-05, "loss": 0.0, "step": 1359 }, { "epoch": 0.22874442855941468, "grad_norm": NaN, "learning_rate": 8.896558847604414e-05, "loss": 0.0, "step": 1360 }, { "epoch": 0.22891262299217896, "grad_norm": NaN, "learning_rate": 8.894874250391878e-05, "loss": 0.0, "step": 1361 }, { "epoch": 0.22908081742494324, "grad_norm": NaN, "learning_rate": 8.893188527993796e-05, "loss": 0.0, "step": 1362 }, { "epoch": 0.22924901185770752, "grad_norm": NaN, "learning_rate": 8.891501680897156e-05, "loss": 0.0, "step": 1363 }, { "epoch": 0.2294172062904718, "grad_norm": NaN, "learning_rate": 8.889813709589266e-05, "loss": 0.0, "step": 1364 }, { "epoch": 0.22958540072323605, "grad_norm": NaN, "learning_rate": 8.888124614557764e-05, "loss": 0.0, "step": 1365 }, { "epoch": 0.22975359515600033, "grad_norm": NaN, "learning_rate": 8.886434396290611e-05, "loss": 0.0, "step": 1366 }, { "epoch": 0.2299217895887646, "grad_norm": NaN, "learning_rate": 8.884743055276092e-05, "loss": 0.0, "step": 1367 }, { "epoch": 0.2300899840215289, "grad_norm": NaN, "learning_rate": 8.883050592002815e-05, "loss": 0.0, "step": 1368 }, { "epoch": 0.23025817845429317, "grad_norm": NaN, "learning_rate": 8.881357006959715e-05, "loss": 0.0, "step": 1369 }, { "epoch": 0.23042637288705745, "grad_norm": NaN, "learning_rate": 8.879662300636048e-05, "loss": 0.0, "step": 1370 }, { "epoch": 0.23059456731982172, "grad_norm": NaN, "learning_rate": 8.8779664735214e-05, "loss": 0.0, "step": 1371 }, { "epoch": 0.23076276175258598, "grad_norm": NaN, "learning_rate": 8.876269526105672e-05, "loss": 0.0, "step": 1372 }, { "epoch": 0.23093095618535026, "grad_norm": NaN, "learning_rate": 8.874571458879095e-05, "loss": 0.0, "step": 1373 }, { "epoch": 0.23109915061811453, "grad_norm": NaN, "learning_rate": 8.87287227233222e-05, "loss": 0.0, "step": 1374 }, { "epoch": 0.23126734505087881, "grad_norm": NaN, "learning_rate": 8.871171966955924e-05, "loss": 0.0, "step": 1375 }, { "epoch": 0.2314355394836431, "grad_norm": NaN, "learning_rate": 8.869470543241408e-05, "loss": 0.0, "step": 1376 }, { "epoch": 0.23160373391640737, "grad_norm": NaN, "learning_rate": 8.867768001680192e-05, "loss": 0.0, "step": 1377 }, { "epoch": 0.23177192834917165, "grad_norm": NaN, "learning_rate": 8.866064342764123e-05, "loss": 0.0, "step": 1378 }, { "epoch": 0.23194012278193593, "grad_norm": NaN, "learning_rate": 8.864359566985365e-05, "loss": 0.0, "step": 1379 }, { "epoch": 0.23210831721470018, "grad_norm": NaN, "learning_rate": 8.862653674836413e-05, "loss": 0.0, "step": 1380 }, { "epoch": 0.23227651164746446, "grad_norm": NaN, "learning_rate": 8.860946666810079e-05, "loss": 0.0, "step": 1381 }, { "epoch": 0.23244470608022874, "grad_norm": NaN, "learning_rate": 8.859238543399498e-05, "loss": 0.0, "step": 1382 }, { "epoch": 0.23261290051299302, "grad_norm": NaN, "learning_rate": 8.857529305098127e-05, "loss": 0.0, "step": 1383 }, { "epoch": 0.2327810949457573, "grad_norm": NaN, "learning_rate": 8.855818952399746e-05, "loss": 0.0, "step": 1384 }, { "epoch": 0.23294928937852158, "grad_norm": NaN, "learning_rate": 8.854107485798458e-05, "loss": 0.0, "step": 1385 }, { "epoch": 0.23311748381128586, "grad_norm": NaN, "learning_rate": 8.852394905788685e-05, "loss": 0.0, "step": 1386 }, { "epoch": 0.2332856782440501, "grad_norm": NaN, "learning_rate": 8.850681212865175e-05, "loss": 0.0, "step": 1387 }, { "epoch": 0.2334538726768144, "grad_norm": NaN, "learning_rate": 8.848966407522992e-05, "loss": 0.0, "step": 1388 }, { "epoch": 0.23362206710957867, "grad_norm": NaN, "learning_rate": 8.847250490257524e-05, "loss": 0.0, "step": 1389 }, { "epoch": 0.23379026154234295, "grad_norm": NaN, "learning_rate": 8.84553346156448e-05, "loss": 0.0, "step": 1390 }, { "epoch": 0.23395845597510723, "grad_norm": NaN, "learning_rate": 8.843815321939893e-05, "loss": 0.0, "step": 1391 }, { "epoch": 0.2341266504078715, "grad_norm": NaN, "learning_rate": 8.842096071880113e-05, "loss": 0.0, "step": 1392 }, { "epoch": 0.23429484484063579, "grad_norm": NaN, "learning_rate": 8.840375711881811e-05, "loss": 0.0, "step": 1393 }, { "epoch": 0.23446303927340004, "grad_norm": NaN, "learning_rate": 8.838654242441981e-05, "loss": 0.0, "step": 1394 }, { "epoch": 0.23463123370616432, "grad_norm": NaN, "learning_rate": 8.836931664057935e-05, "loss": 0.0, "step": 1395 }, { "epoch": 0.2347994281389286, "grad_norm": NaN, "learning_rate": 8.83520797722731e-05, "loss": 0.0, "step": 1396 }, { "epoch": 0.23496762257169287, "grad_norm": NaN, "learning_rate": 8.833483182448052e-05, "loss": 0.0, "step": 1397 }, { "epoch": 0.23513581700445715, "grad_norm": NaN, "learning_rate": 8.831757280218444e-05, "loss": 0.0, "step": 1398 }, { "epoch": 0.23530401143722143, "grad_norm": NaN, "learning_rate": 8.830030271037076e-05, "loss": 0.0, "step": 1399 }, { "epoch": 0.2354722058699857, "grad_norm": NaN, "learning_rate": 8.82830215540286e-05, "loss": 0.0, "step": 1400 }, { "epoch": 0.23564040030275, "grad_norm": NaN, "learning_rate": 8.826572933815032e-05, "loss": 0.0, "step": 1401 }, { "epoch": 0.23580859473551424, "grad_norm": NaN, "learning_rate": 8.824842606773142e-05, "loss": 0.0, "step": 1402 }, { "epoch": 0.23597678916827852, "grad_norm": NaN, "learning_rate": 8.823111174777064e-05, "loss": 0.0, "step": 1403 }, { "epoch": 0.2361449836010428, "grad_norm": NaN, "learning_rate": 8.82137863832699e-05, "loss": 0.0, "step": 1404 }, { "epoch": 0.23631317803380708, "grad_norm": NaN, "learning_rate": 8.819644997923427e-05, "loss": 0.0, "step": 1405 }, { "epoch": 0.23648137246657136, "grad_norm": NaN, "learning_rate": 8.817910254067209e-05, "loss": 0.0, "step": 1406 }, { "epoch": 0.23664956689933564, "grad_norm": NaN, "learning_rate": 8.816174407259481e-05, "loss": 0.0, "step": 1407 }, { "epoch": 0.23681776133209992, "grad_norm": NaN, "learning_rate": 8.81443745800171e-05, "loss": 0.0, "step": 1408 }, { "epoch": 0.23698595576486417, "grad_norm": NaN, "learning_rate": 8.812699406795682e-05, "loss": 0.0, "step": 1409 }, { "epoch": 0.23715415019762845, "grad_norm": NaN, "learning_rate": 8.810960254143499e-05, "loss": 0.0, "step": 1410 }, { "epoch": 0.23732234463039273, "grad_norm": NaN, "learning_rate": 8.809220000547584e-05, "loss": 0.0, "step": 1411 }, { "epoch": 0.237490539063157, "grad_norm": NaN, "learning_rate": 8.807478646510677e-05, "loss": 0.0, "step": 1412 }, { "epoch": 0.2376587334959213, "grad_norm": NaN, "learning_rate": 8.805736192535835e-05, "loss": 0.0, "step": 1413 }, { "epoch": 0.23782692792868557, "grad_norm": NaN, "learning_rate": 8.803992639126433e-05, "loss": 0.0, "step": 1414 }, { "epoch": 0.23799512236144985, "grad_norm": NaN, "learning_rate": 8.802247986786167e-05, "loss": 0.0, "step": 1415 }, { "epoch": 0.23816331679421412, "grad_norm": NaN, "learning_rate": 8.800502236019044e-05, "loss": 0.0, "step": 1416 }, { "epoch": 0.23833151122697838, "grad_norm": NaN, "learning_rate": 8.798755387329392e-05, "loss": 0.0, "step": 1417 }, { "epoch": 0.23849970565974266, "grad_norm": NaN, "learning_rate": 8.797007441221859e-05, "loss": 0.0, "step": 1418 }, { "epoch": 0.23866790009250693, "grad_norm": NaN, "learning_rate": 8.795258398201403e-05, "loss": 0.0, "step": 1419 }, { "epoch": 0.23883609452527121, "grad_norm": NaN, "learning_rate": 8.793508258773307e-05, "loss": 0.0, "step": 1420 }, { "epoch": 0.2390042889580355, "grad_norm": NaN, "learning_rate": 8.791757023443163e-05, "loss": 0.0, "step": 1421 }, { "epoch": 0.23917248339079977, "grad_norm": NaN, "learning_rate": 8.790004692716887e-05, "loss": 0.0, "step": 1422 }, { "epoch": 0.23934067782356405, "grad_norm": NaN, "learning_rate": 8.788251267100704e-05, "loss": 0.0, "step": 1423 }, { "epoch": 0.2395088722563283, "grad_norm": NaN, "learning_rate": 8.786496747101163e-05, "loss": 0.0, "step": 1424 }, { "epoch": 0.23967706668909258, "grad_norm": NaN, "learning_rate": 8.78474113322512e-05, "loss": 0.0, "step": 1425 }, { "epoch": 0.23984526112185686, "grad_norm": NaN, "learning_rate": 8.782984425979757e-05, "loss": 0.0, "step": 1426 }, { "epoch": 0.24001345555462114, "grad_norm": NaN, "learning_rate": 8.781226625872564e-05, "loss": 0.0, "step": 1427 }, { "epoch": 0.24018164998738542, "grad_norm": NaN, "learning_rate": 8.779467733411353e-05, "loss": 0.0, "step": 1428 }, { "epoch": 0.2403498444201497, "grad_norm": NaN, "learning_rate": 8.777707749104244e-05, "loss": 0.0, "step": 1429 }, { "epoch": 0.24051803885291398, "grad_norm": NaN, "learning_rate": 8.775946673459681e-05, "loss": 0.0, "step": 1430 }, { "epoch": 0.24068623328567823, "grad_norm": NaN, "learning_rate": 8.774184506986416e-05, "loss": 0.0, "step": 1431 }, { "epoch": 0.2408544277184425, "grad_norm": NaN, "learning_rate": 8.772421250193521e-05, "loss": 0.0, "step": 1432 }, { "epoch": 0.2410226221512068, "grad_norm": NaN, "learning_rate": 8.77065690359038e-05, "loss": 0.0, "step": 1433 }, { "epoch": 0.24119081658397107, "grad_norm": NaN, "learning_rate": 8.768891467686693e-05, "loss": 0.0, "step": 1434 }, { "epoch": 0.24135901101673535, "grad_norm": NaN, "learning_rate": 8.767124942992475e-05, "loss": 0.0, "step": 1435 }, { "epoch": 0.24152720544949963, "grad_norm": NaN, "learning_rate": 8.765357330018056e-05, "loss": 0.0, "step": 1436 }, { "epoch": 0.2416953998822639, "grad_norm": NaN, "learning_rate": 8.763588629274077e-05, "loss": 0.0, "step": 1437 }, { "epoch": 0.24186359431502819, "grad_norm": NaN, "learning_rate": 8.761818841271498e-05, "loss": 0.0, "step": 1438 }, { "epoch": 0.24203178874779244, "grad_norm": NaN, "learning_rate": 8.760047966521591e-05, "loss": 0.0, "step": 1439 }, { "epoch": 0.24219998318055672, "grad_norm": NaN, "learning_rate": 8.758276005535943e-05, "loss": 0.0, "step": 1440 }, { "epoch": 0.242368177613321, "grad_norm": NaN, "learning_rate": 8.75650295882645e-05, "loss": 0.0, "step": 1441 }, { "epoch": 0.24253637204608527, "grad_norm": NaN, "learning_rate": 8.75472882690533e-05, "loss": 0.0, "step": 1442 }, { "epoch": 0.24270456647884955, "grad_norm": NaN, "learning_rate": 8.752953610285104e-05, "loss": 0.0, "step": 1443 }, { "epoch": 0.24287276091161383, "grad_norm": NaN, "learning_rate": 8.751177309478618e-05, "loss": 0.0, "step": 1444 }, { "epoch": 0.2430409553443781, "grad_norm": NaN, "learning_rate": 8.74939992499902e-05, "loss": 0.0, "step": 1445 }, { "epoch": 0.24320914977714236, "grad_norm": NaN, "learning_rate": 8.747621457359782e-05, "loss": 0.0, "step": 1446 }, { "epoch": 0.24337734420990664, "grad_norm": NaN, "learning_rate": 8.745841907074682e-05, "loss": 0.0, "step": 1447 }, { "epoch": 0.24354553864267092, "grad_norm": NaN, "learning_rate": 8.744061274657806e-05, "loss": 0.0, "step": 1448 }, { "epoch": 0.2437137330754352, "grad_norm": NaN, "learning_rate": 8.742279560623567e-05, "loss": 0.0, "step": 1449 }, { "epoch": 0.24388192750819948, "grad_norm": NaN, "learning_rate": 8.740496765486679e-05, "loss": 0.0, "step": 1450 }, { "epoch": 0.24405012194096376, "grad_norm": NaN, "learning_rate": 8.738712889762171e-05, "loss": 0.0, "step": 1451 }, { "epoch": 0.24421831637372804, "grad_norm": NaN, "learning_rate": 8.736927933965385e-05, "loss": 0.0, "step": 1452 }, { "epoch": 0.24438651080649232, "grad_norm": NaN, "learning_rate": 8.735141898611974e-05, "loss": 0.0, "step": 1453 }, { "epoch": 0.24455470523925657, "grad_norm": NaN, "learning_rate": 8.733354784217905e-05, "loss": 0.0, "step": 1454 }, { "epoch": 0.24472289967202085, "grad_norm": NaN, "learning_rate": 8.731566591299455e-05, "loss": 0.0, "step": 1455 }, { "epoch": 0.24489109410478513, "grad_norm": NaN, "learning_rate": 8.729777320373213e-05, "loss": 0.0, "step": 1456 }, { "epoch": 0.2450592885375494, "grad_norm": NaN, "learning_rate": 8.727986971956079e-05, "loss": 0.0, "step": 1457 }, { "epoch": 0.2452274829703137, "grad_norm": NaN, "learning_rate": 8.726195546565263e-05, "loss": 0.0, "step": 1458 }, { "epoch": 0.24539567740307797, "grad_norm": NaN, "learning_rate": 8.724403044718292e-05, "loss": 0.0, "step": 1459 }, { "epoch": 0.24556387183584225, "grad_norm": NaN, "learning_rate": 8.722609466932996e-05, "loss": 0.0, "step": 1460 }, { "epoch": 0.2457320662686065, "grad_norm": NaN, "learning_rate": 8.72081481372752e-05, "loss": 0.0, "step": 1461 }, { "epoch": 0.24590026070137078, "grad_norm": NaN, "learning_rate": 8.719019085620321e-05, "loss": 0.0, "step": 1462 }, { "epoch": 0.24606845513413506, "grad_norm": NaN, "learning_rate": 8.717222283130163e-05, "loss": 0.0, "step": 1463 }, { "epoch": 0.24623664956689933, "grad_norm": NaN, "learning_rate": 8.715424406776123e-05, "loss": 0.0, "step": 1464 }, { "epoch": 0.24640484399966361, "grad_norm": NaN, "learning_rate": 8.713625457077585e-05, "loss": 0.0, "step": 1465 }, { "epoch": 0.2465730384324279, "grad_norm": NaN, "learning_rate": 8.71182543455425e-05, "loss": 0.0, "step": 1466 }, { "epoch": 0.24674123286519217, "grad_norm": NaN, "learning_rate": 8.710024339726121e-05, "loss": 0.0, "step": 1467 }, { "epoch": 0.24690942729795642, "grad_norm": NaN, "learning_rate": 8.708222173113514e-05, "loss": 0.0, "step": 1468 }, { "epoch": 0.2470776217307207, "grad_norm": NaN, "learning_rate": 8.706418935237057e-05, "loss": 0.0, "step": 1469 }, { "epoch": 0.24724581616348498, "grad_norm": NaN, "learning_rate": 8.704614626617683e-05, "loss": 0.0, "step": 1470 }, { "epoch": 0.24741401059624926, "grad_norm": NaN, "learning_rate": 8.702809247776638e-05, "loss": 0.0, "step": 1471 }, { "epoch": 0.24758220502901354, "grad_norm": NaN, "learning_rate": 8.701002799235475e-05, "loss": 0.0, "step": 1472 }, { "epoch": 0.24775039946177782, "grad_norm": NaN, "learning_rate": 8.699195281516056e-05, "loss": 0.0, "step": 1473 }, { "epoch": 0.2479185938945421, "grad_norm": NaN, "learning_rate": 8.697386695140554e-05, "loss": 0.0, "step": 1474 }, { "epoch": 0.24808678832730638, "grad_norm": NaN, "learning_rate": 8.69557704063145e-05, "loss": 0.0, "step": 1475 }, { "epoch": 0.24825498276007063, "grad_norm": NaN, "learning_rate": 8.693766318511532e-05, "loss": 0.0, "step": 1476 }, { "epoch": 0.2484231771928349, "grad_norm": NaN, "learning_rate": 8.691954529303895e-05, "loss": 0.0, "step": 1477 }, { "epoch": 0.2485913716255992, "grad_norm": NaN, "learning_rate": 8.690141673531948e-05, "loss": 0.0, "step": 1478 }, { "epoch": 0.24875956605836347, "grad_norm": NaN, "learning_rate": 8.688327751719403e-05, "loss": 0.0, "step": 1479 }, { "epoch": 0.24892776049112775, "grad_norm": NaN, "learning_rate": 8.686512764390283e-05, "loss": 0.0, "step": 1480 }, { "epoch": 0.24909595492389203, "grad_norm": NaN, "learning_rate": 8.684696712068916e-05, "loss": 0.0, "step": 1481 }, { "epoch": 0.2492641493566563, "grad_norm": NaN, "learning_rate": 8.68287959527994e-05, "loss": 0.0, "step": 1482 }, { "epoch": 0.24943234378942056, "grad_norm": NaN, "learning_rate": 8.6810614145483e-05, "loss": 0.0, "step": 1483 }, { "epoch": 0.24960053822218484, "grad_norm": NaN, "learning_rate": 8.679242170399246e-05, "loss": 0.0, "step": 1484 }, { "epoch": 0.24976873265494912, "grad_norm": NaN, "learning_rate": 8.677421863358341e-05, "loss": 0.0, "step": 1485 }, { "epoch": 0.2499369270877134, "grad_norm": NaN, "learning_rate": 8.675600493951448e-05, "loss": 0.0, "step": 1486 }, { "epoch": 0.25010512152047765, "grad_norm": NaN, "learning_rate": 8.673778062704741e-05, "loss": 0.0, "step": 1487 }, { "epoch": 0.2502733159532419, "grad_norm": NaN, "learning_rate": 8.671954570144701e-05, "loss": 0.0, "step": 1488 }, { "epoch": 0.2504415103860062, "grad_norm": NaN, "learning_rate": 8.670130016798115e-05, "loss": 0.0, "step": 1489 }, { "epoch": 0.2506097048187705, "grad_norm": NaN, "learning_rate": 8.668304403192074e-05, "loss": 0.0, "step": 1490 }, { "epoch": 0.25077789925153476, "grad_norm": NaN, "learning_rate": 8.666477729853977e-05, "loss": 0.0, "step": 1491 }, { "epoch": 0.25094609368429904, "grad_norm": NaN, "learning_rate": 8.664649997311531e-05, "loss": 0.0, "step": 1492 }, { "epoch": 0.2511142881170633, "grad_norm": NaN, "learning_rate": 8.662821206092748e-05, "loss": 0.0, "step": 1493 }, { "epoch": 0.2512824825498276, "grad_norm": NaN, "learning_rate": 8.660991356725946e-05, "loss": 0.0, "step": 1494 }, { "epoch": 0.2514506769825919, "grad_norm": NaN, "learning_rate": 8.659160449739745e-05, "loss": 0.0, "step": 1495 }, { "epoch": 0.25161887141535616, "grad_norm": NaN, "learning_rate": 8.657328485663074e-05, "loss": 0.0, "step": 1496 }, { "epoch": 0.25178706584812044, "grad_norm": NaN, "learning_rate": 8.65549546502517e-05, "loss": 0.0, "step": 1497 }, { "epoch": 0.2519552602808847, "grad_norm": NaN, "learning_rate": 8.653661388355568e-05, "loss": 0.0, "step": 1498 }, { "epoch": 0.252123454713649, "grad_norm": NaN, "learning_rate": 8.651826256184117e-05, "loss": 0.0, "step": 1499 }, { "epoch": 0.2522916491464133, "grad_norm": NaN, "learning_rate": 8.649990069040961e-05, "loss": 0.0, "step": 1500 }, { "epoch": 0.25245984357917756, "grad_norm": NaN, "learning_rate": 8.648152827456557e-05, "loss": 0.0, "step": 1501 }, { "epoch": 0.2526280380119418, "grad_norm": NaN, "learning_rate": 8.646314531961663e-05, "loss": 0.0, "step": 1502 }, { "epoch": 0.25279623244470606, "grad_norm": NaN, "learning_rate": 8.644475183087341e-05, "loss": 0.0, "step": 1503 }, { "epoch": 0.25296442687747034, "grad_norm": NaN, "learning_rate": 8.64263478136496e-05, "loss": 0.0, "step": 1504 }, { "epoch": 0.2531326213102346, "grad_norm": NaN, "learning_rate": 8.64079332732619e-05, "loss": 0.0, "step": 1505 }, { "epoch": 0.2533008157429989, "grad_norm": NaN, "learning_rate": 8.638950821503007e-05, "loss": 0.0, "step": 1506 }, { "epoch": 0.2534690101757632, "grad_norm": NaN, "learning_rate": 8.63710726442769e-05, "loss": 0.0, "step": 1507 }, { "epoch": 0.25363720460852746, "grad_norm": NaN, "learning_rate": 8.635262656632823e-05, "loss": 0.0, "step": 1508 }, { "epoch": 0.25380539904129173, "grad_norm": NaN, "learning_rate": 8.633416998651292e-05, "loss": 0.0, "step": 1509 }, { "epoch": 0.253973593474056, "grad_norm": NaN, "learning_rate": 8.631570291016284e-05, "loss": 0.0, "step": 1510 }, { "epoch": 0.2541417879068203, "grad_norm": NaN, "learning_rate": 8.629722534261298e-05, "loss": 0.0, "step": 1511 }, { "epoch": 0.2543099823395846, "grad_norm": NaN, "learning_rate": 8.627873728920127e-05, "loss": 0.0, "step": 1512 }, { "epoch": 0.25447817677234885, "grad_norm": NaN, "learning_rate": 8.626023875526868e-05, "loss": 0.0, "step": 1513 }, { "epoch": 0.25464637120511313, "grad_norm": NaN, "learning_rate": 8.624172974615926e-05, "loss": 0.0, "step": 1514 }, { "epoch": 0.2548145656378774, "grad_norm": NaN, "learning_rate": 8.622321026722005e-05, "loss": 0.0, "step": 1515 }, { "epoch": 0.25498276007064163, "grad_norm": NaN, "learning_rate": 8.620468032380111e-05, "loss": 0.0, "step": 1516 }, { "epoch": 0.2551509545034059, "grad_norm": NaN, "learning_rate": 8.618613992125556e-05, "loss": 0.0, "step": 1517 }, { "epoch": 0.2553191489361702, "grad_norm": NaN, "learning_rate": 8.616758906493948e-05, "loss": 0.0, "step": 1518 }, { "epoch": 0.25548734336893447, "grad_norm": NaN, "learning_rate": 8.614902776021202e-05, "loss": 0.0, "step": 1519 }, { "epoch": 0.25565553780169875, "grad_norm": NaN, "learning_rate": 8.613045601243534e-05, "loss": 0.0, "step": 1520 }, { "epoch": 0.25582373223446303, "grad_norm": NaN, "learning_rate": 8.611187382697458e-05, "loss": 0.0, "step": 1521 }, { "epoch": 0.2559919266672273, "grad_norm": NaN, "learning_rate": 8.609328120919797e-05, "loss": 0.0, "step": 1522 }, { "epoch": 0.2561601210999916, "grad_norm": NaN, "learning_rate": 8.607467816447669e-05, "loss": 0.0, "step": 1523 }, { "epoch": 0.25632831553275587, "grad_norm": NaN, "learning_rate": 8.605606469818495e-05, "loss": 0.0, "step": 1524 }, { "epoch": 0.25649650996552015, "grad_norm": NaN, "learning_rate": 8.603744081569994e-05, "loss": 0.0, "step": 1525 }, { "epoch": 0.2566647043982844, "grad_norm": NaN, "learning_rate": 8.601880652240195e-05, "loss": 0.0, "step": 1526 }, { "epoch": 0.2568328988310487, "grad_norm": NaN, "learning_rate": 8.600016182367417e-05, "loss": 0.0, "step": 1527 }, { "epoch": 0.257001093263813, "grad_norm": NaN, "learning_rate": 8.598150672490288e-05, "loss": 0.0, "step": 1528 }, { "epoch": 0.25716928769657726, "grad_norm": NaN, "learning_rate": 8.596284123147731e-05, "loss": 0.0, "step": 1529 }, { "epoch": 0.25733748212934154, "grad_norm": NaN, "learning_rate": 8.59441653487897e-05, "loss": 0.0, "step": 1530 }, { "epoch": 0.25750567656210577, "grad_norm": NaN, "learning_rate": 8.592547908223532e-05, "loss": 0.0, "step": 1531 }, { "epoch": 0.25767387099487005, "grad_norm": NaN, "learning_rate": 8.590678243721245e-05, "loss": 0.0, "step": 1532 }, { "epoch": 0.2578420654276343, "grad_norm": NaN, "learning_rate": 8.58880754191223e-05, "loss": 0.0, "step": 1533 }, { "epoch": 0.2580102598603986, "grad_norm": NaN, "learning_rate": 8.586935803336911e-05, "loss": 0.0, "step": 1534 }, { "epoch": 0.2581784542931629, "grad_norm": NaN, "learning_rate": 8.585063028536016e-05, "loss": 0.0, "step": 1535 }, { "epoch": 0.25834664872592716, "grad_norm": NaN, "learning_rate": 8.583189218050567e-05, "loss": 0.0, "step": 1536 }, { "epoch": 0.25851484315869144, "grad_norm": NaN, "learning_rate": 8.581314372421887e-05, "loss": 0.0, "step": 1537 }, { "epoch": 0.2586830375914557, "grad_norm": NaN, "learning_rate": 8.579438492191597e-05, "loss": 0.0, "step": 1538 }, { "epoch": 0.25885123202422, "grad_norm": NaN, "learning_rate": 8.57756157790162e-05, "loss": 0.0, "step": 1539 }, { "epoch": 0.2590194264569843, "grad_norm": NaN, "learning_rate": 8.575683630094173e-05, "loss": 0.0, "step": 1540 }, { "epoch": 0.25918762088974856, "grad_norm": NaN, "learning_rate": 8.573804649311776e-05, "loss": 0.0, "step": 1541 }, { "epoch": 0.25935581532251284, "grad_norm": NaN, "learning_rate": 8.571924636097243e-05, "loss": 0.0, "step": 1542 }, { "epoch": 0.2595240097552771, "grad_norm": NaN, "learning_rate": 8.570043590993693e-05, "loss": 0.0, "step": 1543 }, { "epoch": 0.2596922041880414, "grad_norm": NaN, "learning_rate": 8.568161514544535e-05, "loss": 0.0, "step": 1544 }, { "epoch": 0.2598603986208057, "grad_norm": NaN, "learning_rate": 8.566278407293481e-05, "loss": 0.0, "step": 1545 }, { "epoch": 0.2600285930535699, "grad_norm": NaN, "learning_rate": 8.56439426978454e-05, "loss": 0.0, "step": 1546 }, { "epoch": 0.2601967874863342, "grad_norm": NaN, "learning_rate": 8.562509102562018e-05, "loss": 0.0, "step": 1547 }, { "epoch": 0.26036498191909846, "grad_norm": NaN, "learning_rate": 8.560622906170518e-05, "loss": 0.0, "step": 1548 }, { "epoch": 0.26053317635186274, "grad_norm": NaN, "learning_rate": 8.558735681154943e-05, "loss": 0.0, "step": 1549 }, { "epoch": 0.260701370784627, "grad_norm": NaN, "learning_rate": 8.55684742806049e-05, "loss": 0.0, "step": 1550 }, { "epoch": 0.2608695652173913, "grad_norm": NaN, "learning_rate": 8.554958147432652e-05, "loss": 0.0, "step": 1551 }, { "epoch": 0.2610377596501556, "grad_norm": NaN, "learning_rate": 8.553067839817225e-05, "loss": 0.0, "step": 1552 }, { "epoch": 0.26120595408291986, "grad_norm": NaN, "learning_rate": 8.551176505760294e-05, "loss": 0.0, "step": 1553 }, { "epoch": 0.26137414851568413, "grad_norm": NaN, "learning_rate": 8.549284145808248e-05, "loss": 0.0, "step": 1554 }, { "epoch": 0.2615423429484484, "grad_norm": NaN, "learning_rate": 8.547390760507765e-05, "loss": 0.0, "step": 1555 }, { "epoch": 0.2617105373812127, "grad_norm": NaN, "learning_rate": 8.545496350405825e-05, "loss": 0.0, "step": 1556 }, { "epoch": 0.261878731813977, "grad_norm": NaN, "learning_rate": 8.543600916049701e-05, "loss": 0.0, "step": 1557 }, { "epoch": 0.26204692624674125, "grad_norm": NaN, "learning_rate": 8.541704457986963e-05, "loss": 0.0, "step": 1558 }, { "epoch": 0.26221512067950553, "grad_norm": NaN, "learning_rate": 8.539806976765476e-05, "loss": 0.0, "step": 1559 }, { "epoch": 0.2623833151122698, "grad_norm": NaN, "learning_rate": 8.537908472933402e-05, "loss": 0.0, "step": 1560 }, { "epoch": 0.26255150954503403, "grad_norm": NaN, "learning_rate": 8.536008947039197e-05, "loss": 0.0, "step": 1561 }, { "epoch": 0.2627197039777983, "grad_norm": NaN, "learning_rate": 8.534108399631612e-05, "loss": 0.0, "step": 1562 }, { "epoch": 0.2628878984105626, "grad_norm": NaN, "learning_rate": 8.532206831259696e-05, "loss": 0.0, "step": 1563 }, { "epoch": 0.26305609284332687, "grad_norm": NaN, "learning_rate": 8.530304242472788e-05, "loss": 0.0, "step": 1564 }, { "epoch": 0.26322428727609115, "grad_norm": NaN, "learning_rate": 8.528400633820528e-05, "loss": 0.0, "step": 1565 }, { "epoch": 0.26339248170885543, "grad_norm": NaN, "learning_rate": 8.526496005852845e-05, "loss": 0.0, "step": 1566 }, { "epoch": 0.2635606761416197, "grad_norm": NaN, "learning_rate": 8.524590359119961e-05, "loss": 0.0, "step": 1567 }, { "epoch": 0.263728870574384, "grad_norm": NaN, "learning_rate": 8.522683694172404e-05, "loss": 0.0, "step": 1568 }, { "epoch": 0.26389706500714827, "grad_norm": NaN, "learning_rate": 8.520776011560983e-05, "loss": 0.0, "step": 1569 }, { "epoch": 0.26406525943991255, "grad_norm": NaN, "learning_rate": 8.518867311836808e-05, "loss": 0.0, "step": 1570 }, { "epoch": 0.2642334538726768, "grad_norm": NaN, "learning_rate": 8.516957595551279e-05, "loss": 0.0, "step": 1571 }, { "epoch": 0.2644016483054411, "grad_norm": NaN, "learning_rate": 8.515046863256094e-05, "loss": 0.0, "step": 1572 }, { "epoch": 0.2645698427382054, "grad_norm": NaN, "learning_rate": 8.513135115503239e-05, "loss": 0.0, "step": 1573 }, { "epoch": 0.26473803717096966, "grad_norm": NaN, "learning_rate": 8.511222352845e-05, "loss": 0.0, "step": 1574 }, { "epoch": 0.26490623160373394, "grad_norm": NaN, "learning_rate": 8.50930857583395e-05, "loss": 0.0, "step": 1575 }, { "epoch": 0.26507442603649817, "grad_norm": NaN, "learning_rate": 8.507393785022961e-05, "loss": 0.0, "step": 1576 }, { "epoch": 0.26524262046926245, "grad_norm": NaN, "learning_rate": 8.505477980965191e-05, "loss": 0.0, "step": 1577 }, { "epoch": 0.2654108149020267, "grad_norm": NaN, "learning_rate": 8.503561164214095e-05, "loss": 0.0, "step": 1578 }, { "epoch": 0.265579009334791, "grad_norm": NaN, "learning_rate": 8.501643335323424e-05, "loss": 0.0, "step": 1579 }, { "epoch": 0.2657472037675553, "grad_norm": NaN, "learning_rate": 8.499724494847212e-05, "loss": 0.0, "step": 1580 }, { "epoch": 0.26591539820031956, "grad_norm": NaN, "learning_rate": 8.497804643339792e-05, "loss": 0.0, "step": 1581 }, { "epoch": 0.26608359263308384, "grad_norm": NaN, "learning_rate": 8.495883781355789e-05, "loss": 0.0, "step": 1582 }, { "epoch": 0.2662517870658481, "grad_norm": NaN, "learning_rate": 8.493961909450118e-05, "loss": 0.0, "step": 1583 }, { "epoch": 0.2664199814986124, "grad_norm": NaN, "learning_rate": 8.492039028177986e-05, "loss": 0.0, "step": 1584 }, { "epoch": 0.2665881759313767, "grad_norm": NaN, "learning_rate": 8.490115138094889e-05, "loss": 0.0, "step": 1585 }, { "epoch": 0.26675637036414096, "grad_norm": NaN, "learning_rate": 8.488190239756623e-05, "loss": 0.0, "step": 1586 }, { "epoch": 0.26692456479690524, "grad_norm": NaN, "learning_rate": 8.486264333719267e-05, "loss": 0.0, "step": 1587 }, { "epoch": 0.2670927592296695, "grad_norm": NaN, "learning_rate": 8.484337420539193e-05, "loss": 0.0, "step": 1588 }, { "epoch": 0.2672609536624338, "grad_norm": NaN, "learning_rate": 8.482409500773066e-05, "loss": 0.0, "step": 1589 }, { "epoch": 0.267429148095198, "grad_norm": NaN, "learning_rate": 8.480480574977836e-05, "loss": 0.0, "step": 1590 }, { "epoch": 0.2675973425279623, "grad_norm": NaN, "learning_rate": 8.478550643710754e-05, "loss": 0.0, "step": 1591 }, { "epoch": 0.2677655369607266, "grad_norm": NaN, "learning_rate": 8.476619707529354e-05, "loss": 0.0, "step": 1592 }, { "epoch": 0.26793373139349086, "grad_norm": NaN, "learning_rate": 8.474687766991459e-05, "loss": 0.0, "step": 1593 }, { "epoch": 0.26810192582625514, "grad_norm": NaN, "learning_rate": 8.472754822655187e-05, "loss": 0.0, "step": 1594 }, { "epoch": 0.2682701202590194, "grad_norm": NaN, "learning_rate": 8.470820875078944e-05, "loss": 0.0, "step": 1595 }, { "epoch": 0.2684383146917837, "grad_norm": NaN, "learning_rate": 8.468885924821426e-05, "loss": 0.0, "step": 1596 }, { "epoch": 0.268606509124548, "grad_norm": NaN, "learning_rate": 8.466949972441618e-05, "loss": 0.0, "step": 1597 }, { "epoch": 0.26877470355731226, "grad_norm": NaN, "learning_rate": 8.465013018498795e-05, "loss": 0.0, "step": 1598 }, { "epoch": 0.26894289799007653, "grad_norm": NaN, "learning_rate": 8.463075063552521e-05, "loss": 0.0, "step": 1599 }, { "epoch": 0.2691110924228408, "grad_norm": NaN, "learning_rate": 8.46113610816265e-05, "loss": 0.0, "step": 1600 }, { "epoch": 0.2692792868556051, "grad_norm": NaN, "learning_rate": 8.459196152889325e-05, "loss": 0.0, "step": 1601 }, { "epoch": 0.2694474812883694, "grad_norm": NaN, "learning_rate": 8.457255198292977e-05, "loss": 0.0, "step": 1602 }, { "epoch": 0.26961567572113365, "grad_norm": NaN, "learning_rate": 8.455313244934324e-05, "loss": 0.0, "step": 1603 }, { "epoch": 0.26978387015389793, "grad_norm": NaN, "learning_rate": 8.453370293374376e-05, "loss": 0.0, "step": 1604 }, { "epoch": 0.26995206458666215, "grad_norm": NaN, "learning_rate": 8.451426344174433e-05, "loss": 0.0, "step": 1605 }, { "epoch": 0.27012025901942643, "grad_norm": NaN, "learning_rate": 8.449481397896075e-05, "loss": 0.0, "step": 1606 }, { "epoch": 0.2702884534521907, "grad_norm": NaN, "learning_rate": 8.44753545510118e-05, "loss": 0.0, "step": 1607 }, { "epoch": 0.270456647884955, "grad_norm": NaN, "learning_rate": 8.445588516351902e-05, "loss": 0.0, "step": 1608 }, { "epoch": 0.27062484231771927, "grad_norm": NaN, "learning_rate": 8.443640582210697e-05, "loss": 0.0, "step": 1609 }, { "epoch": 0.27079303675048355, "grad_norm": NaN, "learning_rate": 8.4416916532403e-05, "loss": 0.0, "step": 1610 }, { "epoch": 0.27096123118324783, "grad_norm": NaN, "learning_rate": 8.439741730003732e-05, "loss": 0.0, "step": 1611 }, { "epoch": 0.2711294256160121, "grad_norm": NaN, "learning_rate": 8.437790813064305e-05, "loss": 0.0, "step": 1612 }, { "epoch": 0.2712976200487764, "grad_norm": NaN, "learning_rate": 8.43583890298562e-05, "loss": 0.0, "step": 1613 }, { "epoch": 0.27146581448154067, "grad_norm": NaN, "learning_rate": 8.433886000331557e-05, "loss": 0.0, "step": 1614 }, { "epoch": 0.27163400891430495, "grad_norm": NaN, "learning_rate": 8.431932105666291e-05, "loss": 0.0, "step": 1615 }, { "epoch": 0.2718022033470692, "grad_norm": NaN, "learning_rate": 8.429977219554282e-05, "loss": 0.0, "step": 1616 }, { "epoch": 0.2719703977798335, "grad_norm": NaN, "learning_rate": 8.42802134256027e-05, "loss": 0.0, "step": 1617 }, { "epoch": 0.2721385922125978, "grad_norm": NaN, "learning_rate": 8.426064475249287e-05, "loss": 0.0, "step": 1618 }, { "epoch": 0.27230678664536206, "grad_norm": NaN, "learning_rate": 8.424106618186653e-05, "loss": 0.0, "step": 1619 }, { "epoch": 0.2724749810781263, "grad_norm": NaN, "learning_rate": 8.422147771937969e-05, "loss": 0.0, "step": 1620 }, { "epoch": 0.27264317551089057, "grad_norm": NaN, "learning_rate": 8.420187937069124e-05, "loss": 0.0, "step": 1621 }, { "epoch": 0.27281136994365485, "grad_norm": NaN, "learning_rate": 8.418227114146291e-05, "loss": 0.0, "step": 1622 }, { "epoch": 0.2729795643764191, "grad_norm": NaN, "learning_rate": 8.416265303735932e-05, "loss": 0.0, "step": 1623 }, { "epoch": 0.2731477588091834, "grad_norm": NaN, "learning_rate": 8.414302506404791e-05, "loss": 0.0, "step": 1624 }, { "epoch": 0.2733159532419477, "grad_norm": NaN, "learning_rate": 8.412338722719899e-05, "loss": 0.0, "step": 1625 }, { "epoch": 0.27348414767471196, "grad_norm": NaN, "learning_rate": 8.41037395324857e-05, "loss": 0.0, "step": 1626 }, { "epoch": 0.27365234210747624, "grad_norm": NaN, "learning_rate": 8.408408198558403e-05, "loss": 0.0, "step": 1627 }, { "epoch": 0.2738205365402405, "grad_norm": NaN, "learning_rate": 8.406441459217285e-05, "loss": 0.0, "step": 1628 }, { "epoch": 0.2739887309730048, "grad_norm": NaN, "learning_rate": 8.404473735793384e-05, "loss": 0.0, "step": 1629 }, { "epoch": 0.2741569254057691, "grad_norm": NaN, "learning_rate": 8.402505028855152e-05, "loss": 0.0, "step": 1630 }, { "epoch": 0.27432511983853336, "grad_norm": NaN, "learning_rate": 8.400535338971326e-05, "loss": 0.0, "step": 1631 }, { "epoch": 0.27449331427129764, "grad_norm": NaN, "learning_rate": 8.398564666710932e-05, "loss": 0.0, "step": 1632 }, { "epoch": 0.2746615087040619, "grad_norm": NaN, "learning_rate": 8.396593012643272e-05, "loss": 0.0, "step": 1633 }, { "epoch": 0.2748297031368262, "grad_norm": NaN, "learning_rate": 8.394620377337931e-05, "loss": 0.0, "step": 1634 }, { "epoch": 0.2749978975695904, "grad_norm": NaN, "learning_rate": 8.392646761364789e-05, "loss": 0.0, "step": 1635 }, { "epoch": 0.2751660920023547, "grad_norm": NaN, "learning_rate": 8.390672165293995e-05, "loss": 0.0, "step": 1636 }, { "epoch": 0.275334286435119, "grad_norm": NaN, "learning_rate": 8.388696589695991e-05, "loss": 0.0, "step": 1637 }, { "epoch": 0.27550248086788326, "grad_norm": NaN, "learning_rate": 8.386720035141497e-05, "loss": 0.0, "step": 1638 }, { "epoch": 0.27567067530064754, "grad_norm": NaN, "learning_rate": 8.38474250220152e-05, "loss": 0.0, "step": 1639 }, { "epoch": 0.2758388697334118, "grad_norm": NaN, "learning_rate": 8.382763991447344e-05, "loss": 0.0, "step": 1640 }, { "epoch": 0.2760070641661761, "grad_norm": NaN, "learning_rate": 8.38078450345054e-05, "loss": 0.0, "step": 1641 }, { "epoch": 0.2761752585989404, "grad_norm": NaN, "learning_rate": 8.37880403878296e-05, "loss": 0.0, "step": 1642 }, { "epoch": 0.27634345303170466, "grad_norm": NaN, "learning_rate": 8.376822598016739e-05, "loss": 0.0, "step": 1643 }, { "epoch": 0.27651164746446893, "grad_norm": NaN, "learning_rate": 8.37484018172429e-05, "loss": 0.0, "step": 1644 }, { "epoch": 0.2766798418972332, "grad_norm": NaN, "learning_rate": 8.372856790478315e-05, "loss": 0.0, "step": 1645 }, { "epoch": 0.2768480363299975, "grad_norm": NaN, "learning_rate": 8.37087242485179e-05, "loss": 0.0, "step": 1646 }, { "epoch": 0.2770162307627618, "grad_norm": NaN, "learning_rate": 8.368887085417977e-05, "loss": 0.0, "step": 1647 }, { "epoch": 0.27718442519552605, "grad_norm": NaN, "learning_rate": 8.366900772750421e-05, "loss": 0.0, "step": 1648 }, { "epoch": 0.27735261962829033, "grad_norm": NaN, "learning_rate": 8.364913487422942e-05, "loss": 0.0, "step": 1649 }, { "epoch": 0.27752081406105455, "grad_norm": NaN, "learning_rate": 8.362925230009646e-05, "loss": 0.0, "step": 1650 }, { "epoch": 0.27768900849381883, "grad_norm": NaN, "learning_rate": 8.360936001084917e-05, "loss": 0.0, "step": 1651 }, { "epoch": 0.2778572029265831, "grad_norm": NaN, "learning_rate": 8.358945801223422e-05, "loss": 0.0, "step": 1652 }, { "epoch": 0.2780253973593474, "grad_norm": NaN, "learning_rate": 8.356954631000107e-05, "loss": 0.0, "step": 1653 }, { "epoch": 0.27819359179211167, "grad_norm": NaN, "learning_rate": 8.354962490990202e-05, "loss": 0.0, "step": 1654 }, { "epoch": 0.27836178622487595, "grad_norm": NaN, "learning_rate": 8.352969381769208e-05, "loss": 0.0, "step": 1655 }, { "epoch": 0.27852998065764023, "grad_norm": NaN, "learning_rate": 8.350975303912916e-05, "loss": 0.0, "step": 1656 }, { "epoch": 0.2786981750904045, "grad_norm": NaN, "learning_rate": 8.348980257997392e-05, "loss": 0.0, "step": 1657 }, { "epoch": 0.2788663695231688, "grad_norm": NaN, "learning_rate": 8.346984244598981e-05, "loss": 0.0, "step": 1658 }, { "epoch": 0.27903456395593307, "grad_norm": NaN, "learning_rate": 8.344987264294312e-05, "loss": 0.0, "step": 1659 }, { "epoch": 0.27920275838869735, "grad_norm": NaN, "learning_rate": 8.342989317660289e-05, "loss": 0.0, "step": 1660 }, { "epoch": 0.2793709528214616, "grad_norm": NaN, "learning_rate": 8.340990405274091e-05, "loss": 0.0, "step": 1661 }, { "epoch": 0.2795391472542259, "grad_norm": NaN, "learning_rate": 8.338990527713191e-05, "loss": 0.0, "step": 1662 }, { "epoch": 0.2797073416869902, "grad_norm": NaN, "learning_rate": 8.336989685555324e-05, "loss": 0.0, "step": 1663 }, { "epoch": 0.2798755361197544, "grad_norm": NaN, "learning_rate": 8.334987879378516e-05, "loss": 0.0, "step": 1664 }, { "epoch": 0.2800437305525187, "grad_norm": NaN, "learning_rate": 8.33298510976106e-05, "loss": 0.0, "step": 1665 }, { "epoch": 0.28021192498528297, "grad_norm": NaN, "learning_rate": 8.33098137728154e-05, "loss": 0.0, "step": 1666 }, { "epoch": 0.28038011941804725, "grad_norm": NaN, "learning_rate": 8.328976682518807e-05, "loss": 0.0, "step": 1667 }, { "epoch": 0.2805483138508115, "grad_norm": NaN, "learning_rate": 8.326971026052e-05, "loss": 0.0, "step": 1668 }, { "epoch": 0.2807165082835758, "grad_norm": NaN, "learning_rate": 8.324964408460525e-05, "loss": 0.0, "step": 1669 }, { "epoch": 0.2808847027163401, "grad_norm": NaN, "learning_rate": 8.322956830324076e-05, "loss": 0.0, "step": 1670 }, { "epoch": 0.28105289714910436, "grad_norm": NaN, "learning_rate": 8.320948292222617e-05, "loss": 0.0, "step": 1671 }, { "epoch": 0.28122109158186864, "grad_norm": NaN, "learning_rate": 8.318938794736392e-05, "loss": 0.0, "step": 1672 }, { "epoch": 0.2813892860146329, "grad_norm": NaN, "learning_rate": 8.316928338445927e-05, "loss": 0.0, "step": 1673 }, { "epoch": 0.2815574804473972, "grad_norm": NaN, "learning_rate": 8.314916923932014e-05, "loss": 0.0, "step": 1674 }, { "epoch": 0.2817256748801615, "grad_norm": NaN, "learning_rate": 8.312904551775731e-05, "loss": 0.0, "step": 1675 }, { "epoch": 0.28189386931292576, "grad_norm": NaN, "learning_rate": 8.31089122255843e-05, "loss": 0.0, "step": 1676 }, { "epoch": 0.28206206374569004, "grad_norm": NaN, "learning_rate": 8.308876936861739e-05, "loss": 0.0, "step": 1677 }, { "epoch": 0.2822302581784543, "grad_norm": NaN, "learning_rate": 8.306861695267563e-05, "loss": 0.0, "step": 1678 }, { "epoch": 0.28239845261121854, "grad_norm": NaN, "learning_rate": 8.304845498358081e-05, "loss": 0.0, "step": 1679 }, { "epoch": 0.2825666470439828, "grad_norm": NaN, "learning_rate": 8.302828346715752e-05, "loss": 0.0, "step": 1680 }, { "epoch": 0.2827348414767471, "grad_norm": NaN, "learning_rate": 8.300810240923307e-05, "loss": 0.0, "step": 1681 }, { "epoch": 0.2829030359095114, "grad_norm": NaN, "learning_rate": 8.298791181563754e-05, "loss": 0.0, "step": 1682 }, { "epoch": 0.28307123034227566, "grad_norm": NaN, "learning_rate": 8.296771169220378e-05, "loss": 0.0, "step": 1683 }, { "epoch": 0.28323942477503994, "grad_norm": NaN, "learning_rate": 8.294750204476736e-05, "loss": 0.0, "step": 1684 }, { "epoch": 0.2834076192078042, "grad_norm": NaN, "learning_rate": 8.292728287916665e-05, "loss": 0.0, "step": 1685 }, { "epoch": 0.2835758136405685, "grad_norm": NaN, "learning_rate": 8.290705420124272e-05, "loss": 0.0, "step": 1686 }, { "epoch": 0.2837440080733328, "grad_norm": NaN, "learning_rate": 8.28868160168394e-05, "loss": 0.0, "step": 1687 }, { "epoch": 0.28391220250609706, "grad_norm": NaN, "learning_rate": 8.286656833180329e-05, "loss": 0.0, "step": 1688 }, { "epoch": 0.28408039693886133, "grad_norm": NaN, "learning_rate": 8.284631115198371e-05, "loss": 0.0, "step": 1689 }, { "epoch": 0.2842485913716256, "grad_norm": NaN, "learning_rate": 8.282604448323277e-05, "loss": 0.0, "step": 1690 }, { "epoch": 0.2844167858043899, "grad_norm": NaN, "learning_rate": 8.280576833140522e-05, "loss": 0.0, "step": 1691 }, { "epoch": 0.2845849802371542, "grad_norm": NaN, "learning_rate": 8.278548270235865e-05, "loss": 0.0, "step": 1692 }, { "epoch": 0.28475317466991845, "grad_norm": NaN, "learning_rate": 8.276518760195335e-05, "loss": 0.0, "step": 1693 }, { "epoch": 0.2849213691026827, "grad_norm": NaN, "learning_rate": 8.274488303605234e-05, "loss": 0.0, "step": 1694 }, { "epoch": 0.28508956353544695, "grad_norm": NaN, "learning_rate": 8.272456901052139e-05, "loss": 0.0, "step": 1695 }, { "epoch": 0.28525775796821123, "grad_norm": NaN, "learning_rate": 8.270424553122899e-05, "loss": 0.0, "step": 1696 }, { "epoch": 0.2854259524009755, "grad_norm": NaN, "learning_rate": 8.268391260404635e-05, "loss": 0.0, "step": 1697 }, { "epoch": 0.2855941468337398, "grad_norm": NaN, "learning_rate": 8.266357023484745e-05, "loss": 0.0, "step": 1698 }, { "epoch": 0.28576234126650407, "grad_norm": NaN, "learning_rate": 8.264321842950898e-05, "loss": 0.0, "step": 1699 }, { "epoch": 0.28593053569926835, "grad_norm": NaN, "learning_rate": 8.262285719391029e-05, "loss": 0.0, "step": 1700 }, { "epoch": 0.28609873013203263, "grad_norm": NaN, "learning_rate": 8.260248653393359e-05, "loss": 0.0, "step": 1701 }, { "epoch": 0.2862669245647969, "grad_norm": NaN, "learning_rate": 8.258210645546368e-05, "loss": 0.0, "step": 1702 }, { "epoch": 0.2864351189975612, "grad_norm": NaN, "learning_rate": 8.256171696438817e-05, "loss": 0.0, "step": 1703 }, { "epoch": 0.28660331343032547, "grad_norm": NaN, "learning_rate": 8.254131806659733e-05, "loss": 0.0, "step": 1704 }, { "epoch": 0.28677150786308975, "grad_norm": NaN, "learning_rate": 8.252090976798419e-05, "loss": 0.0, "step": 1705 }, { "epoch": 0.286939702295854, "grad_norm": NaN, "learning_rate": 8.250049207444446e-05, "loss": 0.0, "step": 1706 }, { "epoch": 0.2871078967286183, "grad_norm": NaN, "learning_rate": 8.248006499187661e-05, "loss": 0.0, "step": 1707 }, { "epoch": 0.2872760911613826, "grad_norm": NaN, "learning_rate": 8.245962852618176e-05, "loss": 0.0, "step": 1708 }, { "epoch": 0.2874442855941468, "grad_norm": NaN, "learning_rate": 8.243918268326382e-05, "loss": 0.0, "step": 1709 }, { "epoch": 0.2876124800269111, "grad_norm": NaN, "learning_rate": 8.241872746902935e-05, "loss": 0.0, "step": 1710 }, { "epoch": 0.28778067445967537, "grad_norm": NaN, "learning_rate": 8.23982628893876e-05, "loss": 0.0, "step": 1711 }, { "epoch": 0.28794886889243965, "grad_norm": NaN, "learning_rate": 8.237778895025059e-05, "loss": 0.0, "step": 1712 }, { "epoch": 0.2881170633252039, "grad_norm": NaN, "learning_rate": 8.235730565753301e-05, "loss": 0.0, "step": 1713 }, { "epoch": 0.2882852577579682, "grad_norm": NaN, "learning_rate": 8.233681301715225e-05, "loss": 0.0, "step": 1714 }, { "epoch": 0.2884534521907325, "grad_norm": NaN, "learning_rate": 8.23163110350284e-05, "loss": 0.0, "step": 1715 }, { "epoch": 0.28862164662349676, "grad_norm": NaN, "learning_rate": 8.229579971708426e-05, "loss": 0.0, "step": 1716 }, { "epoch": 0.28878984105626104, "grad_norm": NaN, "learning_rate": 8.22752790692453e-05, "loss": 0.0, "step": 1717 }, { "epoch": 0.2889580354890253, "grad_norm": NaN, "learning_rate": 8.225474909743973e-05, "loss": 0.0, "step": 1718 }, { "epoch": 0.2891262299217896, "grad_norm": NaN, "learning_rate": 8.223420980759842e-05, "loss": 0.0, "step": 1719 }, { "epoch": 0.2892944243545539, "grad_norm": NaN, "learning_rate": 8.221366120565493e-05, "loss": 0.0, "step": 1720 }, { "epoch": 0.28946261878731816, "grad_norm": NaN, "learning_rate": 8.219310329754554e-05, "loss": 0.0, "step": 1721 }, { "epoch": 0.28963081322008244, "grad_norm": NaN, "learning_rate": 8.21725360892092e-05, "loss": 0.0, "step": 1722 }, { "epoch": 0.2897990076528467, "grad_norm": NaN, "learning_rate": 8.215195958658752e-05, "loss": 0.0, "step": 1723 }, { "epoch": 0.28996720208561094, "grad_norm": NaN, "learning_rate": 8.213137379562485e-05, "loss": 0.0, "step": 1724 }, { "epoch": 0.2901353965183752, "grad_norm": NaN, "learning_rate": 8.211077872226819e-05, "loss": 0.0, "step": 1725 }, { "epoch": 0.2903035909511395, "grad_norm": NaN, "learning_rate": 8.20901743724672e-05, "loss": 0.0, "step": 1726 }, { "epoch": 0.2904717853839038, "grad_norm": NaN, "learning_rate": 8.206956075217428e-05, "loss": 0.0, "step": 1727 }, { "epoch": 0.29063997981666806, "grad_norm": NaN, "learning_rate": 8.204893786734444e-05, "loss": 0.0, "step": 1728 }, { "epoch": 0.29080817424943234, "grad_norm": NaN, "learning_rate": 8.202830572393543e-05, "loss": 0.0, "step": 1729 }, { "epoch": 0.2909763686821966, "grad_norm": NaN, "learning_rate": 8.200766432790764e-05, "loss": 0.0, "step": 1730 }, { "epoch": 0.2911445631149609, "grad_norm": NaN, "learning_rate": 8.198701368522412e-05, "loss": 0.0, "step": 1731 }, { "epoch": 0.2913127575477252, "grad_norm": NaN, "learning_rate": 8.196635380185063e-05, "loss": 0.0, "step": 1732 }, { "epoch": 0.29148095198048946, "grad_norm": NaN, "learning_rate": 8.194568468375558e-05, "loss": 0.0, "step": 1733 }, { "epoch": 0.29164914641325373, "grad_norm": NaN, "learning_rate": 8.192500633691001e-05, "loss": 0.0, "step": 1734 }, { "epoch": 0.291817340846018, "grad_norm": NaN, "learning_rate": 8.190431876728772e-05, "loss": 0.0, "step": 1735 }, { "epoch": 0.2919855352787823, "grad_norm": NaN, "learning_rate": 8.188362198086506e-05, "loss": 0.0, "step": 1736 }, { "epoch": 0.2921537297115466, "grad_norm": NaN, "learning_rate": 8.186291598362114e-05, "loss": 0.0, "step": 1737 }, { "epoch": 0.2923219241443108, "grad_norm": NaN, "learning_rate": 8.184220078153768e-05, "loss": 0.0, "step": 1738 }, { "epoch": 0.2924901185770751, "grad_norm": NaN, "learning_rate": 8.182147638059906e-05, "loss": 0.0, "step": 1739 }, { "epoch": 0.29265831300983935, "grad_norm": NaN, "learning_rate": 8.180074278679233e-05, "loss": 0.0, "step": 1740 }, { "epoch": 0.29282650744260363, "grad_norm": NaN, "learning_rate": 8.178000000610722e-05, "loss": 0.0, "step": 1741 }, { "epoch": 0.2929947018753679, "grad_norm": NaN, "learning_rate": 8.175924804453604e-05, "loss": 0.0, "step": 1742 }, { "epoch": 0.2931628963081322, "grad_norm": NaN, "learning_rate": 8.173848690807385e-05, "loss": 0.0, "step": 1743 }, { "epoch": 0.29333109074089647, "grad_norm": NaN, "learning_rate": 8.171771660271825e-05, "loss": 0.0, "step": 1744 }, { "epoch": 0.29349928517366075, "grad_norm": NaN, "learning_rate": 8.169693713446959e-05, "loss": 0.0, "step": 1745 }, { "epoch": 0.29366747960642503, "grad_norm": NaN, "learning_rate": 8.167614850933083e-05, "loss": 0.0, "step": 1746 }, { "epoch": 0.2938356740391893, "grad_norm": NaN, "learning_rate": 8.165535073330757e-05, "loss": 0.0, "step": 1747 }, { "epoch": 0.2940038684719536, "grad_norm": NaN, "learning_rate": 8.163454381240803e-05, "loss": 0.0, "step": 1748 }, { "epoch": 0.29417206290471787, "grad_norm": NaN, "learning_rate": 8.161372775264311e-05, "loss": 0.0, "step": 1749 }, { "epoch": 0.29434025733748215, "grad_norm": NaN, "learning_rate": 8.159290256002632e-05, "loss": 0.0, "step": 1750 }, { "epoch": 0.2945084517702464, "grad_norm": NaN, "learning_rate": 8.157206824057386e-05, "loss": 0.0, "step": 1751 }, { "epoch": 0.2946766462030107, "grad_norm": NaN, "learning_rate": 8.155122480030453e-05, "loss": 0.0, "step": 1752 }, { "epoch": 0.29484484063577493, "grad_norm": NaN, "learning_rate": 8.153037224523972e-05, "loss": 0.0, "step": 1753 }, { "epoch": 0.2950130350685392, "grad_norm": NaN, "learning_rate": 8.150951058140354e-05, "loss": 0.0, "step": 1754 }, { "epoch": 0.2951812295013035, "grad_norm": NaN, "learning_rate": 8.148863981482267e-05, "loss": 0.0, "step": 1755 }, { "epoch": 0.29534942393406777, "grad_norm": NaN, "learning_rate": 8.146775995152646e-05, "loss": 0.0, "step": 1756 }, { "epoch": 0.29551761836683205, "grad_norm": NaN, "learning_rate": 8.144687099754685e-05, "loss": 0.0, "step": 1757 }, { "epoch": 0.2956858127995963, "grad_norm": NaN, "learning_rate": 8.142597295891846e-05, "loss": 0.0, "step": 1758 }, { "epoch": 0.2958540072323606, "grad_norm": NaN, "learning_rate": 8.140506584167845e-05, "loss": 0.0, "step": 1759 }, { "epoch": 0.2960222016651249, "grad_norm": NaN, "learning_rate": 8.138414965186667e-05, "loss": 0.0, "step": 1760 }, { "epoch": 0.29619039609788916, "grad_norm": NaN, "learning_rate": 8.13632243955256e-05, "loss": 0.0, "step": 1761 }, { "epoch": 0.29635859053065344, "grad_norm": NaN, "learning_rate": 8.134229007870027e-05, "loss": 0.0, "step": 1762 }, { "epoch": 0.2965267849634177, "grad_norm": NaN, "learning_rate": 8.132134670743838e-05, "loss": 0.0, "step": 1763 }, { "epoch": 0.296694979396182, "grad_norm": NaN, "learning_rate": 8.130039428779028e-05, "loss": 0.0, "step": 1764 }, { "epoch": 0.2968631738289463, "grad_norm": NaN, "learning_rate": 8.127943282580884e-05, "loss": 0.0, "step": 1765 }, { "epoch": 0.29703136826171056, "grad_norm": NaN, "learning_rate": 8.12584623275496e-05, "loss": 0.0, "step": 1766 }, { "epoch": 0.29719956269447484, "grad_norm": NaN, "learning_rate": 8.123748279907071e-05, "loss": 0.0, "step": 1767 }, { "epoch": 0.29736775712723906, "grad_norm": NaN, "learning_rate": 8.121649424643293e-05, "loss": 0.0, "step": 1768 }, { "epoch": 0.29753595156000334, "grad_norm": NaN, "learning_rate": 8.119549667569959e-05, "loss": 0.0, "step": 1769 }, { "epoch": 0.2977041459927676, "grad_norm": NaN, "learning_rate": 8.117449009293668e-05, "loss": 0.0, "step": 1770 }, { "epoch": 0.2978723404255319, "grad_norm": NaN, "learning_rate": 8.115347450421275e-05, "loss": 0.0, "step": 1771 }, { "epoch": 0.2980405348582962, "grad_norm": NaN, "learning_rate": 8.113244991559898e-05, "loss": 0.0, "step": 1772 }, { "epoch": 0.29820872929106046, "grad_norm": NaN, "learning_rate": 8.111141633316914e-05, "loss": 0.0, "step": 1773 }, { "epoch": 0.29837692372382474, "grad_norm": NaN, "learning_rate": 8.109037376299958e-05, "loss": 0.0, "step": 1774 }, { "epoch": 0.298545118156589, "grad_norm": NaN, "learning_rate": 8.106932221116928e-05, "loss": 0.0, "step": 1775 }, { "epoch": 0.2987133125893533, "grad_norm": NaN, "learning_rate": 8.104826168375979e-05, "loss": 0.0, "step": 1776 }, { "epoch": 0.2988815070221176, "grad_norm": NaN, "learning_rate": 8.102719218685524e-05, "loss": 0.0, "step": 1777 }, { "epoch": 0.29904970145488186, "grad_norm": NaN, "learning_rate": 8.100611372654241e-05, "loss": 0.0, "step": 1778 }, { "epoch": 0.29921789588764613, "grad_norm": NaN, "learning_rate": 8.09850263089106e-05, "loss": 0.0, "step": 1779 }, { "epoch": 0.2993860903204104, "grad_norm": NaN, "learning_rate": 8.096392994005177e-05, "loss": 0.0, "step": 1780 }, { "epoch": 0.2995542847531747, "grad_norm": NaN, "learning_rate": 8.094282462606036e-05, "loss": 0.0, "step": 1781 }, { "epoch": 0.299722479185939, "grad_norm": NaN, "learning_rate": 8.092171037303352e-05, "loss": 0.0, "step": 1782 }, { "epoch": 0.2998906736187032, "grad_norm": NaN, "learning_rate": 8.090058718707087e-05, "loss": 0.0, "step": 1783 }, { "epoch": 0.3000588680514675, "grad_norm": NaN, "learning_rate": 8.087945507427473e-05, "loss": 0.0, "step": 1784 }, { "epoch": 0.30022706248423175, "grad_norm": NaN, "learning_rate": 8.085831404074984e-05, "loss": 0.0, "step": 1785 }, { "epoch": 0.30039525691699603, "grad_norm": NaN, "learning_rate": 8.08371640926037e-05, "loss": 0.0, "step": 1786 }, { "epoch": 0.3005634513497603, "grad_norm": NaN, "learning_rate": 8.081600523594622e-05, "loss": 0.0, "step": 1787 }, { "epoch": 0.3007316457825246, "grad_norm": NaN, "learning_rate": 8.079483747689e-05, "loss": 0.0, "step": 1788 }, { "epoch": 0.30089984021528887, "grad_norm": NaN, "learning_rate": 8.077366082155016e-05, "loss": 0.0, "step": 1789 }, { "epoch": 0.30106803464805315, "grad_norm": NaN, "learning_rate": 8.075247527604437e-05, "loss": 0.0, "step": 1790 }, { "epoch": 0.30123622908081743, "grad_norm": NaN, "learning_rate": 8.073128084649294e-05, "loss": 0.0, "step": 1791 }, { "epoch": 0.3014044235135817, "grad_norm": NaN, "learning_rate": 8.071007753901868e-05, "loss": 0.0, "step": 1792 }, { "epoch": 0.301572617946346, "grad_norm": NaN, "learning_rate": 8.068886535974699e-05, "loss": 0.0, "step": 1793 }, { "epoch": 0.30174081237911027, "grad_norm": NaN, "learning_rate": 8.066764431480583e-05, "loss": 0.0, "step": 1794 }, { "epoch": 0.30190900681187455, "grad_norm": NaN, "learning_rate": 8.064641441032572e-05, "loss": 0.0, "step": 1795 }, { "epoch": 0.3020772012446388, "grad_norm": NaN, "learning_rate": 8.062517565243977e-05, "loss": 0.0, "step": 1796 }, { "epoch": 0.30224539567740305, "grad_norm": NaN, "learning_rate": 8.060392804728357e-05, "loss": 0.0, "step": 1797 }, { "epoch": 0.30241359011016733, "grad_norm": NaN, "learning_rate": 8.058267160099535e-05, "loss": 0.0, "step": 1798 }, { "epoch": 0.3025817845429316, "grad_norm": NaN, "learning_rate": 8.056140631971586e-05, "loss": 0.0, "step": 1799 }, { "epoch": 0.3027499789756959, "grad_norm": NaN, "learning_rate": 8.054013220958838e-05, "loss": 0.0, "step": 1800 }, { "epoch": 0.30291817340846017, "grad_norm": NaN, "learning_rate": 8.051884927675877e-05, "loss": 0.0, "step": 1801 }, { "epoch": 0.30308636784122445, "grad_norm": NaN, "learning_rate": 8.049755752737545e-05, "loss": 0.0, "step": 1802 }, { "epoch": 0.3032545622739887, "grad_norm": NaN, "learning_rate": 8.047625696758933e-05, "loss": 0.0, "step": 1803 }, { "epoch": 0.303422756706753, "grad_norm": NaN, "learning_rate": 8.045494760355395e-05, "loss": 0.0, "step": 1804 }, { "epoch": 0.3035909511395173, "grad_norm": NaN, "learning_rate": 8.043362944142531e-05, "loss": 0.0, "step": 1805 }, { "epoch": 0.30375914557228156, "grad_norm": NaN, "learning_rate": 8.0412302487362e-05, "loss": 0.0, "step": 1806 }, { "epoch": 0.30392734000504584, "grad_norm": NaN, "learning_rate": 8.039096674752514e-05, "loss": 0.0, "step": 1807 }, { "epoch": 0.3040955344378101, "grad_norm": NaN, "learning_rate": 8.036962222807838e-05, "loss": 0.0, "step": 1808 }, { "epoch": 0.3042637288705744, "grad_norm": NaN, "learning_rate": 8.034826893518793e-05, "loss": 0.0, "step": 1809 }, { "epoch": 0.3044319233033387, "grad_norm": NaN, "learning_rate": 8.03269068750225e-05, "loss": 0.0, "step": 1810 }, { "epoch": 0.30460011773610296, "grad_norm": NaN, "learning_rate": 8.030553605375335e-05, "loss": 0.0, "step": 1811 }, { "epoch": 0.3047683121688672, "grad_norm": NaN, "learning_rate": 8.028415647755427e-05, "loss": 0.0, "step": 1812 }, { "epoch": 0.30493650660163146, "grad_norm": NaN, "learning_rate": 8.026276815260161e-05, "loss": 0.0, "step": 1813 }, { "epoch": 0.30510470103439574, "grad_norm": NaN, "learning_rate": 8.024137108507417e-05, "loss": 0.0, "step": 1814 }, { "epoch": 0.30527289546716, "grad_norm": NaN, "learning_rate": 8.021996528115335e-05, "loss": 0.0, "step": 1815 }, { "epoch": 0.3054410898999243, "grad_norm": NaN, "learning_rate": 8.019855074702304e-05, "loss": 0.0, "step": 1816 }, { "epoch": 0.3056092843326886, "grad_norm": NaN, "learning_rate": 8.017712748886967e-05, "loss": 0.0, "step": 1817 }, { "epoch": 0.30577747876545286, "grad_norm": NaN, "learning_rate": 8.015569551288217e-05, "loss": 0.0, "step": 1818 }, { "epoch": 0.30594567319821714, "grad_norm": NaN, "learning_rate": 8.013425482525201e-05, "loss": 0.0, "step": 1819 }, { "epoch": 0.3061138676309814, "grad_norm": NaN, "learning_rate": 8.011280543217313e-05, "loss": 0.0, "step": 1820 }, { "epoch": 0.3062820620637457, "grad_norm": NaN, "learning_rate": 8.009134733984206e-05, "loss": 0.0, "step": 1821 }, { "epoch": 0.30645025649651, "grad_norm": NaN, "learning_rate": 8.006988055445778e-05, "loss": 0.0, "step": 1822 }, { "epoch": 0.30661845092927426, "grad_norm": NaN, "learning_rate": 8.004840508222182e-05, "loss": 0.0, "step": 1823 }, { "epoch": 0.30678664536203853, "grad_norm": NaN, "learning_rate": 8.002692092933818e-05, "loss": 0.0, "step": 1824 }, { "epoch": 0.3069548397948028, "grad_norm": NaN, "learning_rate": 8.000542810201341e-05, "loss": 0.0, "step": 1825 }, { "epoch": 0.3071230342275671, "grad_norm": NaN, "learning_rate": 7.998392660645655e-05, "loss": 0.0, "step": 1826 }, { "epoch": 0.3072912286603313, "grad_norm": NaN, "learning_rate": 7.996241644887912e-05, "loss": 0.0, "step": 1827 }, { "epoch": 0.3074594230930956, "grad_norm": NaN, "learning_rate": 7.994089763549517e-05, "loss": 0.0, "step": 1828 }, { "epoch": 0.3076276175258599, "grad_norm": NaN, "learning_rate": 7.991937017252126e-05, "loss": 0.0, "step": 1829 }, { "epoch": 0.30779581195862415, "grad_norm": NaN, "learning_rate": 7.989783406617643e-05, "loss": 0.0, "step": 1830 }, { "epoch": 0.30796400639138843, "grad_norm": NaN, "learning_rate": 7.98762893226822e-05, "loss": 0.0, "step": 1831 }, { "epoch": 0.3081322008241527, "grad_norm": NaN, "learning_rate": 7.985473594826263e-05, "loss": 0.0, "step": 1832 }, { "epoch": 0.308300395256917, "grad_norm": NaN, "learning_rate": 7.983317394914425e-05, "loss": 0.0, "step": 1833 }, { "epoch": 0.30846858968968127, "grad_norm": NaN, "learning_rate": 7.981160333155605e-05, "loss": 0.0, "step": 1834 }, { "epoch": 0.30863678412244555, "grad_norm": NaN, "learning_rate": 7.979002410172954e-05, "loss": 0.0, "step": 1835 }, { "epoch": 0.30880497855520983, "grad_norm": NaN, "learning_rate": 7.976843626589875e-05, "loss": 0.0, "step": 1836 }, { "epoch": 0.3089731729879741, "grad_norm": NaN, "learning_rate": 7.974683983030016e-05, "loss": 0.0, "step": 1837 }, { "epoch": 0.3091413674207384, "grad_norm": NaN, "learning_rate": 7.972523480117271e-05, "loss": 0.0, "step": 1838 }, { "epoch": 0.30930956185350267, "grad_norm": NaN, "learning_rate": 7.970362118475787e-05, "loss": 0.0, "step": 1839 }, { "epoch": 0.30947775628626695, "grad_norm": NaN, "learning_rate": 7.968199898729958e-05, "loss": 0.0, "step": 1840 }, { "epoch": 0.3096459507190312, "grad_norm": NaN, "learning_rate": 7.966036821504424e-05, "loss": 0.0, "step": 1841 }, { "epoch": 0.30981414515179545, "grad_norm": NaN, "learning_rate": 7.96387288742407e-05, "loss": 0.0, "step": 1842 }, { "epoch": 0.30998233958455973, "grad_norm": NaN, "learning_rate": 7.96170809711404e-05, "loss": 0.0, "step": 1843 }, { "epoch": 0.310150534017324, "grad_norm": NaN, "learning_rate": 7.959542451199713e-05, "loss": 0.0, "step": 1844 }, { "epoch": 0.3103187284500883, "grad_norm": NaN, "learning_rate": 7.957375950306721e-05, "loss": 0.0, "step": 1845 }, { "epoch": 0.31048692288285257, "grad_norm": NaN, "learning_rate": 7.955208595060941e-05, "loss": 0.0, "step": 1846 }, { "epoch": 0.31065511731561685, "grad_norm": NaN, "learning_rate": 7.953040386088497e-05, "loss": 0.0, "step": 1847 }, { "epoch": 0.3108233117483811, "grad_norm": NaN, "learning_rate": 7.950871324015763e-05, "loss": 0.0, "step": 1848 }, { "epoch": 0.3109915061811454, "grad_norm": NaN, "learning_rate": 7.948701409469354e-05, "loss": 0.0, "step": 1849 }, { "epoch": 0.3111597006139097, "grad_norm": NaN, "learning_rate": 7.946530643076138e-05, "loss": 0.0, "step": 1850 }, { "epoch": 0.31132789504667396, "grad_norm": NaN, "learning_rate": 7.944359025463219e-05, "loss": 0.0, "step": 1851 }, { "epoch": 0.31149608947943824, "grad_norm": NaN, "learning_rate": 7.942186557257958e-05, "loss": 0.0, "step": 1852 }, { "epoch": 0.3116642839122025, "grad_norm": NaN, "learning_rate": 7.940013239087953e-05, "loss": 0.0, "step": 1853 }, { "epoch": 0.3118324783449668, "grad_norm": NaN, "learning_rate": 7.937839071581055e-05, "loss": 0.0, "step": 1854 }, { "epoch": 0.3120006727777311, "grad_norm": NaN, "learning_rate": 7.935664055365355e-05, "loss": 0.0, "step": 1855 }, { "epoch": 0.31216886721049536, "grad_norm": NaN, "learning_rate": 7.933488191069189e-05, "loss": 0.0, "step": 1856 }, { "epoch": 0.3123370616432596, "grad_norm": NaN, "learning_rate": 7.931311479321144e-05, "loss": 0.0, "step": 1857 }, { "epoch": 0.31250525607602386, "grad_norm": NaN, "learning_rate": 7.929133920750044e-05, "loss": 0.0, "step": 1858 }, { "epoch": 0.31267345050878814, "grad_norm": NaN, "learning_rate": 7.926955515984961e-05, "loss": 0.0, "step": 1859 }, { "epoch": 0.3128416449415524, "grad_norm": NaN, "learning_rate": 7.924776265655216e-05, "loss": 0.0, "step": 1860 }, { "epoch": 0.3130098393743167, "grad_norm": NaN, "learning_rate": 7.922596170390365e-05, "loss": 0.0, "step": 1861 }, { "epoch": 0.313178033807081, "grad_norm": NaN, "learning_rate": 7.920415230820218e-05, "loss": 0.0, "step": 1862 }, { "epoch": 0.31334622823984526, "grad_norm": NaN, "learning_rate": 7.918233447574818e-05, "loss": 0.0, "step": 1863 }, { "epoch": 0.31351442267260954, "grad_norm": NaN, "learning_rate": 7.916050821284462e-05, "loss": 0.0, "step": 1864 }, { "epoch": 0.3136826171053738, "grad_norm": NaN, "learning_rate": 7.913867352579688e-05, "loss": 0.0, "step": 1865 }, { "epoch": 0.3138508115381381, "grad_norm": NaN, "learning_rate": 7.911683042091269e-05, "loss": 0.0, "step": 1866 }, { "epoch": 0.3140190059709024, "grad_norm": NaN, "learning_rate": 7.909497890450235e-05, "loss": 0.0, "step": 1867 }, { "epoch": 0.31418720040366666, "grad_norm": NaN, "learning_rate": 7.907311898287847e-05, "loss": 0.0, "step": 1868 }, { "epoch": 0.31435539483643093, "grad_norm": NaN, "learning_rate": 7.905125066235616e-05, "loss": 0.0, "step": 1869 }, { "epoch": 0.3145235892691952, "grad_norm": NaN, "learning_rate": 7.902937394925293e-05, "loss": 0.0, "step": 1870 }, { "epoch": 0.31469178370195944, "grad_norm": NaN, "learning_rate": 7.90074888498887e-05, "loss": 0.0, "step": 1871 }, { "epoch": 0.3148599781347237, "grad_norm": NaN, "learning_rate": 7.898559537058586e-05, "loss": 0.0, "step": 1872 }, { "epoch": 0.315028172567488, "grad_norm": NaN, "learning_rate": 7.896369351766916e-05, "loss": 0.0, "step": 1873 }, { "epoch": 0.3151963670002523, "grad_norm": NaN, "learning_rate": 7.894178329746582e-05, "loss": 0.0, "step": 1874 }, { "epoch": 0.31536456143301655, "grad_norm": NaN, "learning_rate": 7.891986471630546e-05, "loss": 0.0, "step": 1875 }, { "epoch": 0.31553275586578083, "grad_norm": NaN, "learning_rate": 7.889793778052011e-05, "loss": 0.0, "step": 1876 }, { "epoch": 0.3157009502985451, "grad_norm": NaN, "learning_rate": 7.887600249644418e-05, "loss": 0.0, "step": 1877 }, { "epoch": 0.3158691447313094, "grad_norm": NaN, "learning_rate": 7.88540588704146e-05, "loss": 0.0, "step": 1878 }, { "epoch": 0.31603733916407367, "grad_norm": NaN, "learning_rate": 7.883210690877055e-05, "loss": 0.0, "step": 1879 }, { "epoch": 0.31620553359683795, "grad_norm": NaN, "learning_rate": 7.881014661785379e-05, "loss": 0.0, "step": 1880 }, { "epoch": 0.31637372802960223, "grad_norm": NaN, "learning_rate": 7.878817800400833e-05, "loss": 0.0, "step": 1881 }, { "epoch": 0.3165419224623665, "grad_norm": NaN, "learning_rate": 7.876620107358073e-05, "loss": 0.0, "step": 1882 }, { "epoch": 0.3167101168951308, "grad_norm": NaN, "learning_rate": 7.874421583291982e-05, "loss": 0.0, "step": 1883 }, { "epoch": 0.31687831132789507, "grad_norm": NaN, "learning_rate": 7.872222228837691e-05, "loss": 0.0, "step": 1884 }, { "epoch": 0.31704650576065935, "grad_norm": NaN, "learning_rate": 7.870022044630569e-05, "loss": 0.0, "step": 1885 }, { "epoch": 0.31721470019342357, "grad_norm": NaN, "learning_rate": 7.867821031306223e-05, "loss": 0.0, "step": 1886 }, { "epoch": 0.31738289462618785, "grad_norm": NaN, "learning_rate": 7.865619189500504e-05, "loss": 0.0, "step": 1887 }, { "epoch": 0.31755108905895213, "grad_norm": NaN, "learning_rate": 7.863416519849499e-05, "loss": 0.0, "step": 1888 }, { "epoch": 0.3177192834917164, "grad_norm": NaN, "learning_rate": 7.861213022989531e-05, "loss": 0.0, "step": 1889 }, { "epoch": 0.3178874779244807, "grad_norm": NaN, "learning_rate": 7.85900869955717e-05, "loss": 0.0, "step": 1890 }, { "epoch": 0.31805567235724497, "grad_norm": NaN, "learning_rate": 7.856803550189218e-05, "loss": 0.0, "step": 1891 }, { "epoch": 0.31822386679000925, "grad_norm": NaN, "learning_rate": 7.854597575522717e-05, "loss": 0.0, "step": 1892 }, { "epoch": 0.3183920612227735, "grad_norm": NaN, "learning_rate": 7.85239077619495e-05, "loss": 0.0, "step": 1893 }, { "epoch": 0.3185602556555378, "grad_norm": NaN, "learning_rate": 7.850183152843439e-05, "loss": 0.0, "step": 1894 }, { "epoch": 0.3187284500883021, "grad_norm": NaN, "learning_rate": 7.847974706105935e-05, "loss": 0.0, "step": 1895 }, { "epoch": 0.31889664452106636, "grad_norm": NaN, "learning_rate": 7.845765436620441e-05, "loss": 0.0, "step": 1896 }, { "epoch": 0.31906483895383064, "grad_norm": NaN, "learning_rate": 7.843555345025186e-05, "loss": 0.0, "step": 1897 }, { "epoch": 0.3192330333865949, "grad_norm": NaN, "learning_rate": 7.841344431958641e-05, "loss": 0.0, "step": 1898 }, { "epoch": 0.3194012278193592, "grad_norm": NaN, "learning_rate": 7.839132698059515e-05, "loss": 0.0, "step": 1899 }, { "epoch": 0.3195694222521235, "grad_norm": NaN, "learning_rate": 7.836920143966751e-05, "loss": 0.0, "step": 1900 }, { "epoch": 0.3197376166848877, "grad_norm": NaN, "learning_rate": 7.834706770319536e-05, "loss": 0.0, "step": 1901 }, { "epoch": 0.319905811117652, "grad_norm": NaN, "learning_rate": 7.832492577757282e-05, "loss": 0.0, "step": 1902 }, { "epoch": 0.32007400555041626, "grad_norm": NaN, "learning_rate": 7.83027756691965e-05, "loss": 0.0, "step": 1903 }, { "epoch": 0.32024219998318054, "grad_norm": NaN, "learning_rate": 7.828061738446528e-05, "loss": 0.0, "step": 1904 }, { "epoch": 0.3204103944159448, "grad_norm": NaN, "learning_rate": 7.825845092978048e-05, "loss": 0.0, "step": 1905 }, { "epoch": 0.3205785888487091, "grad_norm": NaN, "learning_rate": 7.823627631154571e-05, "loss": 0.0, "step": 1906 }, { "epoch": 0.3207467832814734, "grad_norm": NaN, "learning_rate": 7.821409353616699e-05, "loss": 0.0, "step": 1907 }, { "epoch": 0.32091497771423766, "grad_norm": NaN, "learning_rate": 7.819190261005264e-05, "loss": 0.0, "step": 1908 }, { "epoch": 0.32108317214700194, "grad_norm": NaN, "learning_rate": 7.816970353961341e-05, "loss": 0.0, "step": 1909 }, { "epoch": 0.3212513665797662, "grad_norm": NaN, "learning_rate": 7.814749633126235e-05, "loss": 0.0, "step": 1910 }, { "epoch": 0.3214195610125305, "grad_norm": NaN, "learning_rate": 7.812528099141486e-05, "loss": 0.0, "step": 1911 }, { "epoch": 0.3215877554452948, "grad_norm": NaN, "learning_rate": 7.810305752648871e-05, "loss": 0.0, "step": 1912 }, { "epoch": 0.32175594987805906, "grad_norm": NaN, "learning_rate": 7.808082594290402e-05, "loss": 0.0, "step": 1913 }, { "epoch": 0.32192414431082333, "grad_norm": NaN, "learning_rate": 7.805858624708323e-05, "loss": 0.0, "step": 1914 }, { "epoch": 0.3220923387435876, "grad_norm": NaN, "learning_rate": 7.803633844545114e-05, "loss": 0.0, "step": 1915 }, { "epoch": 0.32226053317635184, "grad_norm": NaN, "learning_rate": 7.801408254443493e-05, "loss": 0.0, "step": 1916 }, { "epoch": 0.3224287276091161, "grad_norm": NaN, "learning_rate": 7.799181855046403e-05, "loss": 0.0, "step": 1917 }, { "epoch": 0.3225969220418804, "grad_norm": NaN, "learning_rate": 7.796954646997028e-05, "loss": 0.0, "step": 1918 }, { "epoch": 0.3227651164746447, "grad_norm": NaN, "learning_rate": 7.794726630938782e-05, "loss": 0.0, "step": 1919 }, { "epoch": 0.32293331090740895, "grad_norm": NaN, "learning_rate": 7.792497807515317e-05, "loss": 0.0, "step": 1920 }, { "epoch": 0.32310150534017323, "grad_norm": NaN, "learning_rate": 7.790268177370512e-05, "loss": 0.0, "step": 1921 }, { "epoch": 0.3232696997729375, "grad_norm": NaN, "learning_rate": 7.788037741148483e-05, "loss": 0.0, "step": 1922 }, { "epoch": 0.3234378942057018, "grad_norm": NaN, "learning_rate": 7.78580649949358e-05, "loss": 0.0, "step": 1923 }, { "epoch": 0.32360608863846607, "grad_norm": NaN, "learning_rate": 7.78357445305038e-05, "loss": 0.0, "step": 1924 }, { "epoch": 0.32377428307123035, "grad_norm": NaN, "learning_rate": 7.7813416024637e-05, "loss": 0.0, "step": 1925 }, { "epoch": 0.32394247750399463, "grad_norm": NaN, "learning_rate": 7.779107948378582e-05, "loss": 0.0, "step": 1926 }, { "epoch": 0.3241106719367589, "grad_norm": NaN, "learning_rate": 7.776873491440307e-05, "loss": 0.0, "step": 1927 }, { "epoch": 0.3242788663695232, "grad_norm": NaN, "learning_rate": 7.774638232294382e-05, "loss": 0.0, "step": 1928 }, { "epoch": 0.32444706080228747, "grad_norm": NaN, "learning_rate": 7.772402171586552e-05, "loss": 0.0, "step": 1929 }, { "epoch": 0.32461525523505175, "grad_norm": NaN, "learning_rate": 7.770165309962785e-05, "loss": 0.0, "step": 1930 }, { "epoch": 0.32478344966781597, "grad_norm": NaN, "learning_rate": 7.767927648069289e-05, "loss": 0.0, "step": 1931 }, { "epoch": 0.32495164410058025, "grad_norm": NaN, "learning_rate": 7.765689186552496e-05, "loss": 0.0, "step": 1932 }, { "epoch": 0.32511983853334453, "grad_norm": NaN, "learning_rate": 7.763449926059076e-05, "loss": 0.0, "step": 1933 }, { "epoch": 0.3252880329661088, "grad_norm": NaN, "learning_rate": 7.761209867235924e-05, "loss": 0.0, "step": 1934 }, { "epoch": 0.3254562273988731, "grad_norm": NaN, "learning_rate": 7.75896901073017e-05, "loss": 0.0, "step": 1935 }, { "epoch": 0.32562442183163737, "grad_norm": NaN, "learning_rate": 7.756727357189169e-05, "loss": 0.0, "step": 1936 }, { "epoch": 0.32579261626440165, "grad_norm": NaN, "learning_rate": 7.754484907260513e-05, "loss": 0.0, "step": 1937 }, { "epoch": 0.3259608106971659, "grad_norm": NaN, "learning_rate": 7.75224166159202e-05, "loss": 0.0, "step": 1938 }, { "epoch": 0.3261290051299302, "grad_norm": NaN, "learning_rate": 7.749997620831737e-05, "loss": 0.0, "step": 1939 }, { "epoch": 0.3262971995626945, "grad_norm": NaN, "learning_rate": 7.747752785627942e-05, "loss": 0.0, "step": 1940 }, { "epoch": 0.32646539399545876, "grad_norm": NaN, "learning_rate": 7.745507156629145e-05, "loss": 0.0, "step": 1941 }, { "epoch": 0.32663358842822304, "grad_norm": NaN, "learning_rate": 7.743260734484081e-05, "loss": 0.0, "step": 1942 }, { "epoch": 0.3268017828609873, "grad_norm": NaN, "learning_rate": 7.741013519841718e-05, "loss": 0.0, "step": 1943 }, { "epoch": 0.3269699772937516, "grad_norm": NaN, "learning_rate": 7.73876551335125e-05, "loss": 0.0, "step": 1944 }, { "epoch": 0.3271381717265158, "grad_norm": NaN, "learning_rate": 7.736516715662101e-05, "loss": 0.0, "step": 1945 }, { "epoch": 0.3273063661592801, "grad_norm": NaN, "learning_rate": 7.734267127423924e-05, "loss": 0.0, "step": 1946 }, { "epoch": 0.3274745605920444, "grad_norm": NaN, "learning_rate": 7.7320167492866e-05, "loss": 0.0, "step": 1947 }, { "epoch": 0.32764275502480866, "grad_norm": NaN, "learning_rate": 7.729765581900235e-05, "loss": 0.0, "step": 1948 }, { "epoch": 0.32781094945757294, "grad_norm": NaN, "learning_rate": 7.72751362591517e-05, "loss": 0.0, "step": 1949 }, { "epoch": 0.3279791438903372, "grad_norm": NaN, "learning_rate": 7.725260881981971e-05, "loss": 0.0, "step": 1950 }, { "epoch": 0.3281473383231015, "grad_norm": NaN, "learning_rate": 7.723007350751423e-05, "loss": 0.0, "step": 1951 }, { "epoch": 0.3283155327558658, "grad_norm": NaN, "learning_rate": 7.720753032874555e-05, "loss": 0.0, "step": 1952 }, { "epoch": 0.32848372718863006, "grad_norm": NaN, "learning_rate": 7.718497929002608e-05, "loss": 0.0, "step": 1953 }, { "epoch": 0.32865192162139434, "grad_norm": NaN, "learning_rate": 7.716242039787059e-05, "loss": 0.0, "step": 1954 }, { "epoch": 0.3288201160541586, "grad_norm": NaN, "learning_rate": 7.713985365879606e-05, "loss": 0.0, "step": 1955 }, { "epoch": 0.3289883104869229, "grad_norm": NaN, "learning_rate": 7.711727907932181e-05, "loss": 0.0, "step": 1956 }, { "epoch": 0.3291565049196872, "grad_norm": NaN, "learning_rate": 7.709469666596937e-05, "loss": 0.0, "step": 1957 }, { "epoch": 0.32932469935245146, "grad_norm": NaN, "learning_rate": 7.707210642526253e-05, "loss": 0.0, "step": 1958 }, { "epoch": 0.32949289378521573, "grad_norm": NaN, "learning_rate": 7.704950836372738e-05, "loss": 0.0, "step": 1959 }, { "epoch": 0.32966108821797996, "grad_norm": NaN, "learning_rate": 7.702690248789223e-05, "loss": 0.0, "step": 1960 }, { "epoch": 0.32982928265074424, "grad_norm": NaN, "learning_rate": 7.700428880428768e-05, "loss": 0.0, "step": 1961 }, { "epoch": 0.3299974770835085, "grad_norm": NaN, "learning_rate": 7.698166731944654e-05, "loss": 0.0, "step": 1962 }, { "epoch": 0.3301656715162728, "grad_norm": NaN, "learning_rate": 7.695903803990392e-05, "loss": 0.0, "step": 1963 }, { "epoch": 0.3303338659490371, "grad_norm": NaN, "learning_rate": 7.693640097219719e-05, "loss": 0.0, "step": 1964 }, { "epoch": 0.33050206038180135, "grad_norm": NaN, "learning_rate": 7.691375612286591e-05, "loss": 0.0, "step": 1965 }, { "epoch": 0.33067025481456563, "grad_norm": NaN, "learning_rate": 7.689110349845195e-05, "loss": 0.0, "step": 1966 }, { "epoch": 0.3308384492473299, "grad_norm": NaN, "learning_rate": 7.686844310549936e-05, "loss": 0.0, "step": 1967 }, { "epoch": 0.3310066436800942, "grad_norm": NaN, "learning_rate": 7.684577495055453e-05, "loss": 0.0, "step": 1968 }, { "epoch": 0.33117483811285847, "grad_norm": NaN, "learning_rate": 7.682309904016601e-05, "loss": 0.0, "step": 1969 }, { "epoch": 0.33134303254562275, "grad_norm": NaN, "learning_rate": 7.68004153808846e-05, "loss": 0.0, "step": 1970 }, { "epoch": 0.33151122697838703, "grad_norm": NaN, "learning_rate": 7.677772397926337e-05, "loss": 0.0, "step": 1971 }, { "epoch": 0.3316794214111513, "grad_norm": NaN, "learning_rate": 7.675502484185761e-05, "loss": 0.0, "step": 1972 }, { "epoch": 0.3318476158439156, "grad_norm": NaN, "learning_rate": 7.673231797522484e-05, "loss": 0.0, "step": 1973 }, { "epoch": 0.33201581027667987, "grad_norm": NaN, "learning_rate": 7.670960338592483e-05, "loss": 0.0, "step": 1974 }, { "epoch": 0.3321840047094441, "grad_norm": NaN, "learning_rate": 7.668688108051957e-05, "loss": 0.0, "step": 1975 }, { "epoch": 0.33235219914220837, "grad_norm": NaN, "learning_rate": 7.666415106557327e-05, "loss": 0.0, "step": 1976 }, { "epoch": 0.33252039357497265, "grad_norm": NaN, "learning_rate": 7.664141334765238e-05, "loss": 0.0, "step": 1977 }, { "epoch": 0.33268858800773693, "grad_norm": NaN, "learning_rate": 7.66186679333256e-05, "loss": 0.0, "step": 1978 }, { "epoch": 0.3328567824405012, "grad_norm": NaN, "learning_rate": 7.659591482916378e-05, "loss": 0.0, "step": 1979 }, { "epoch": 0.3330249768732655, "grad_norm": NaN, "learning_rate": 7.657315404174006e-05, "loss": 0.0, "step": 1980 }, { "epoch": 0.33319317130602977, "grad_norm": NaN, "learning_rate": 7.655038557762977e-05, "loss": 0.0, "step": 1981 }, { "epoch": 0.33336136573879405, "grad_norm": NaN, "learning_rate": 7.652760944341046e-05, "loss": 0.0, "step": 1982 }, { "epoch": 0.3335295601715583, "grad_norm": NaN, "learning_rate": 7.650482564566193e-05, "loss": 0.0, "step": 1983 }, { "epoch": 0.3336977546043226, "grad_norm": NaN, "learning_rate": 7.648203419096613e-05, "loss": 0.0, "step": 1984 }, { "epoch": 0.3338659490370869, "grad_norm": NaN, "learning_rate": 7.645923508590726e-05, "loss": 0.0, "step": 1985 }, { "epoch": 0.33403414346985116, "grad_norm": NaN, "learning_rate": 7.643642833707174e-05, "loss": 0.0, "step": 1986 }, { "epoch": 0.33420233790261544, "grad_norm": NaN, "learning_rate": 7.64136139510482e-05, "loss": 0.0, "step": 1987 }, { "epoch": 0.3343705323353797, "grad_norm": NaN, "learning_rate": 7.639079193442742e-05, "loss": 0.0, "step": 1988 }, { "epoch": 0.334538726768144, "grad_norm": NaN, "learning_rate": 7.636796229380243e-05, "loss": 0.0, "step": 1989 }, { "epoch": 0.3347069212009082, "grad_norm": NaN, "learning_rate": 7.63451250357685e-05, "loss": 0.0, "step": 1990 }, { "epoch": 0.3348751156336725, "grad_norm": NaN, "learning_rate": 7.6322280166923e-05, "loss": 0.0, "step": 1991 }, { "epoch": 0.3350433100664368, "grad_norm": NaN, "learning_rate": 7.629942769386562e-05, "loss": 0.0, "step": 1992 }, { "epoch": 0.33521150449920106, "grad_norm": NaN, "learning_rate": 7.627656762319812e-05, "loss": 0.0, "step": 1993 }, { "epoch": 0.33537969893196534, "grad_norm": NaN, "learning_rate": 7.625369996152456e-05, "loss": 0.0, "step": 1994 }, { "epoch": 0.3355478933647296, "grad_norm": NaN, "learning_rate": 7.623082471545113e-05, "loss": 0.0, "step": 1995 }, { "epoch": 0.3357160877974939, "grad_norm": NaN, "learning_rate": 7.620794189158626e-05, "loss": 0.0, "step": 1996 }, { "epoch": 0.3358842822302582, "grad_norm": NaN, "learning_rate": 7.618505149654051e-05, "loss": 0.0, "step": 1997 }, { "epoch": 0.33605247666302246, "grad_norm": NaN, "learning_rate": 7.616215353692668e-05, "loss": 0.0, "step": 1998 }, { "epoch": 0.33622067109578674, "grad_norm": NaN, "learning_rate": 7.613924801935973e-05, "loss": 0.0, "step": 1999 }, { "epoch": 0.336388865528551, "grad_norm": NaN, "learning_rate": 7.611633495045679e-05, "loss": 0.0, "step": 2000 }, { "epoch": 0.3365570599613153, "grad_norm": NaN, "learning_rate": 7.609341433683721e-05, "loss": 0.0, "step": 2001 }, { "epoch": 0.3367252543940796, "grad_norm": NaN, "learning_rate": 7.60704861851225e-05, "loss": 0.0, "step": 2002 }, { "epoch": 0.33689344882684386, "grad_norm": NaN, "learning_rate": 7.604755050193631e-05, "loss": 0.0, "step": 2003 }, { "epoch": 0.33706164325960813, "grad_norm": NaN, "learning_rate": 7.602460729390455e-05, "loss": 0.0, "step": 2004 }, { "epoch": 0.33722983769237236, "grad_norm": NaN, "learning_rate": 7.600165656765523e-05, "loss": 0.0, "step": 2005 }, { "epoch": 0.33739803212513664, "grad_norm": NaN, "learning_rate": 7.597869832981857e-05, "loss": 0.0, "step": 2006 }, { "epoch": 0.3375662265579009, "grad_norm": NaN, "learning_rate": 7.595573258702693e-05, "loss": 0.0, "step": 2007 }, { "epoch": 0.3377344209906652, "grad_norm": NaN, "learning_rate": 7.593275934591487e-05, "loss": 0.0, "step": 2008 }, { "epoch": 0.3379026154234295, "grad_norm": NaN, "learning_rate": 7.59097786131191e-05, "loss": 0.0, "step": 2009 }, { "epoch": 0.33807080985619375, "grad_norm": NaN, "learning_rate": 7.588679039527848e-05, "loss": 0.0, "step": 2010 }, { "epoch": 0.33823900428895803, "grad_norm": NaN, "learning_rate": 7.586379469903408e-05, "loss": 0.0, "step": 2011 }, { "epoch": 0.3384071987217223, "grad_norm": NaN, "learning_rate": 7.584079153102908e-05, "loss": 0.0, "step": 2012 }, { "epoch": 0.3385753931544866, "grad_norm": NaN, "learning_rate": 7.581778089790885e-05, "loss": 0.0, "step": 2013 }, { "epoch": 0.33874358758725087, "grad_norm": NaN, "learning_rate": 7.579476280632088e-05, "loss": 0.0, "step": 2014 }, { "epoch": 0.33891178202001515, "grad_norm": NaN, "learning_rate": 7.577173726291487e-05, "loss": 0.0, "step": 2015 }, { "epoch": 0.33907997645277943, "grad_norm": NaN, "learning_rate": 7.574870427434261e-05, "loss": 0.0, "step": 2016 }, { "epoch": 0.3392481708855437, "grad_norm": NaN, "learning_rate": 7.572566384725809e-05, "loss": 0.0, "step": 2017 }, { "epoch": 0.339416365318308, "grad_norm": NaN, "learning_rate": 7.570261598831743e-05, "loss": 0.0, "step": 2018 }, { "epoch": 0.3395845597510722, "grad_norm": NaN, "learning_rate": 7.567956070417889e-05, "loss": 0.0, "step": 2019 }, { "epoch": 0.3397527541838365, "grad_norm": NaN, "learning_rate": 7.565649800150289e-05, "loss": 0.0, "step": 2020 }, { "epoch": 0.33992094861660077, "grad_norm": NaN, "learning_rate": 7.5633427886952e-05, "loss": 0.0, "step": 2021 }, { "epoch": 0.34008914304936505, "grad_norm": NaN, "learning_rate": 7.561035036719088e-05, "loss": 0.0, "step": 2022 }, { "epoch": 0.34025733748212933, "grad_norm": NaN, "learning_rate": 7.558726544888642e-05, "loss": 0.0, "step": 2023 }, { "epoch": 0.3404255319148936, "grad_norm": NaN, "learning_rate": 7.556417313870754e-05, "loss": 0.0, "step": 2024 }, { "epoch": 0.3405937263476579, "grad_norm": NaN, "learning_rate": 7.55410734433254e-05, "loss": 0.0, "step": 2025 }, { "epoch": 0.34076192078042217, "grad_norm": NaN, "learning_rate": 7.55179663694132e-05, "loss": 0.0, "step": 2026 }, { "epoch": 0.34093011521318645, "grad_norm": NaN, "learning_rate": 7.549485192364635e-05, "loss": 0.0, "step": 2027 }, { "epoch": 0.3410983096459507, "grad_norm": NaN, "learning_rate": 7.547173011270232e-05, "loss": 0.0, "step": 2028 }, { "epoch": 0.341266504078715, "grad_norm": NaN, "learning_rate": 7.544860094326078e-05, "loss": 0.0, "step": 2029 }, { "epoch": 0.3414346985114793, "grad_norm": NaN, "learning_rate": 7.542546442200345e-05, "loss": 0.0, "step": 2030 }, { "epoch": 0.34160289294424356, "grad_norm": NaN, "learning_rate": 7.540232055561423e-05, "loss": 0.0, "step": 2031 }, { "epoch": 0.34177108737700784, "grad_norm": NaN, "learning_rate": 7.537916935077914e-05, "loss": 0.0, "step": 2032 }, { "epoch": 0.3419392818097721, "grad_norm": NaN, "learning_rate": 7.535601081418627e-05, "loss": 0.0, "step": 2033 }, { "epoch": 0.34210747624253635, "grad_norm": NaN, "learning_rate": 7.533284495252589e-05, "loss": 0.0, "step": 2034 }, { "epoch": 0.3422756706753006, "grad_norm": NaN, "learning_rate": 7.530967177249036e-05, "loss": 0.0, "step": 2035 }, { "epoch": 0.3424438651080649, "grad_norm": NaN, "learning_rate": 7.528649128077412e-05, "loss": 0.0, "step": 2036 }, { "epoch": 0.3426120595408292, "grad_norm": NaN, "learning_rate": 7.526330348407378e-05, "loss": 0.0, "step": 2037 }, { "epoch": 0.34278025397359346, "grad_norm": NaN, "learning_rate": 7.524010838908802e-05, "loss": 0.0, "step": 2038 }, { "epoch": 0.34294844840635774, "grad_norm": NaN, "learning_rate": 7.521690600251766e-05, "loss": 0.0, "step": 2039 }, { "epoch": 0.343116642839122, "grad_norm": NaN, "learning_rate": 7.519369633106559e-05, "loss": 0.0, "step": 2040 }, { "epoch": 0.3432848372718863, "grad_norm": NaN, "learning_rate": 7.517047938143684e-05, "loss": 0.0, "step": 2041 }, { "epoch": 0.3434530317046506, "grad_norm": NaN, "learning_rate": 7.514725516033854e-05, "loss": 0.0, "step": 2042 }, { "epoch": 0.34362122613741486, "grad_norm": NaN, "learning_rate": 7.512402367447986e-05, "loss": 0.0, "step": 2043 }, { "epoch": 0.34378942057017914, "grad_norm": NaN, "learning_rate": 7.510078493057218e-05, "loss": 0.0, "step": 2044 }, { "epoch": 0.3439576150029434, "grad_norm": NaN, "learning_rate": 7.507753893532886e-05, "loss": 0.0, "step": 2045 }, { "epoch": 0.3441258094357077, "grad_norm": NaN, "learning_rate": 7.505428569546542e-05, "loss": 0.0, "step": 2046 }, { "epoch": 0.344294003868472, "grad_norm": NaN, "learning_rate": 7.503102521769949e-05, "loss": 0.0, "step": 2047 }, { "epoch": 0.34446219830123626, "grad_norm": NaN, "learning_rate": 7.500775750875071e-05, "loss": 0.0, "step": 2048 }, { "epoch": 0.3446303927340005, "grad_norm": NaN, "learning_rate": 7.49844825753409e-05, "loss": 0.0, "step": 2049 }, { "epoch": 0.34479858716676476, "grad_norm": NaN, "learning_rate": 7.496120042419393e-05, "loss": 0.0, "step": 2050 }, { "epoch": 0.34496678159952904, "grad_norm": NaN, "learning_rate": 7.493791106203571e-05, "loss": 0.0, "step": 2051 }, { "epoch": 0.3451349760322933, "grad_norm": NaN, "learning_rate": 7.491461449559434e-05, "loss": 0.0, "step": 2052 }, { "epoch": 0.3453031704650576, "grad_norm": NaN, "learning_rate": 7.489131073159987e-05, "loss": 0.0, "step": 2053 }, { "epoch": 0.3454713648978219, "grad_norm": NaN, "learning_rate": 7.486799977678455e-05, "loss": 0.0, "step": 2054 }, { "epoch": 0.34563955933058615, "grad_norm": NaN, "learning_rate": 7.484468163788262e-05, "loss": 0.0, "step": 2055 }, { "epoch": 0.34580775376335043, "grad_norm": NaN, "learning_rate": 7.482135632163045e-05, "loss": 0.0, "step": 2056 }, { "epoch": 0.3459759481961147, "grad_norm": NaN, "learning_rate": 7.479802383476643e-05, "loss": 0.0, "step": 2057 }, { "epoch": 0.346144142628879, "grad_norm": NaN, "learning_rate": 7.477468418403111e-05, "loss": 0.0, "step": 2058 }, { "epoch": 0.34631233706164327, "grad_norm": NaN, "learning_rate": 7.475133737616699e-05, "loss": 0.0, "step": 2059 }, { "epoch": 0.34648053149440755, "grad_norm": NaN, "learning_rate": 7.472798341791877e-05, "loss": 0.0, "step": 2060 }, { "epoch": 0.34664872592717183, "grad_norm": NaN, "learning_rate": 7.470462231603308e-05, "loss": 0.0, "step": 2061 }, { "epoch": 0.3468169203599361, "grad_norm": NaN, "learning_rate": 7.468125407725872e-05, "loss": 0.0, "step": 2062 }, { "epoch": 0.3469851147927004, "grad_norm": NaN, "learning_rate": 7.465787870834652e-05, "loss": 0.0, "step": 2063 }, { "epoch": 0.3471533092254646, "grad_norm": NaN, "learning_rate": 7.463449621604932e-05, "loss": 0.0, "step": 2064 }, { "epoch": 0.3473215036582289, "grad_norm": NaN, "learning_rate": 7.461110660712209e-05, "loss": 0.0, "step": 2065 }, { "epoch": 0.34748969809099317, "grad_norm": NaN, "learning_rate": 7.458770988832183e-05, "loss": 0.0, "step": 2066 }, { "epoch": 0.34765789252375745, "grad_norm": NaN, "learning_rate": 7.456430606640757e-05, "loss": 0.0, "step": 2067 }, { "epoch": 0.34782608695652173, "grad_norm": NaN, "learning_rate": 7.454089514814044e-05, "loss": 0.0, "step": 2068 }, { "epoch": 0.347994281389286, "grad_norm": NaN, "learning_rate": 7.451747714028356e-05, "loss": 0.0, "step": 2069 }, { "epoch": 0.3481624758220503, "grad_norm": NaN, "learning_rate": 7.449405204960216e-05, "loss": 0.0, "step": 2070 }, { "epoch": 0.34833067025481457, "grad_norm": NaN, "learning_rate": 7.447061988286346e-05, "loss": 0.0, "step": 2071 }, { "epoch": 0.34849886468757885, "grad_norm": NaN, "learning_rate": 7.444718064683676e-05, "loss": 0.0, "step": 2072 }, { "epoch": 0.3486670591203431, "grad_norm": NaN, "learning_rate": 7.442373434829341e-05, "loss": 0.0, "step": 2073 }, { "epoch": 0.3488352535531074, "grad_norm": NaN, "learning_rate": 7.440028099400677e-05, "loss": 0.0, "step": 2074 }, { "epoch": 0.3490034479858717, "grad_norm": NaN, "learning_rate": 7.437682059075223e-05, "loss": 0.0, "step": 2075 }, { "epoch": 0.34917164241863596, "grad_norm": NaN, "learning_rate": 7.435335314530727e-05, "loss": 0.0, "step": 2076 }, { "epoch": 0.34933983685140024, "grad_norm": NaN, "learning_rate": 7.432987866445135e-05, "loss": 0.0, "step": 2077 }, { "epoch": 0.34950803128416447, "grad_norm": NaN, "learning_rate": 7.4306397154966e-05, "loss": 0.0, "step": 2078 }, { "epoch": 0.34967622571692875, "grad_norm": NaN, "learning_rate": 7.428290862363477e-05, "loss": 0.0, "step": 2079 }, { "epoch": 0.349844420149693, "grad_norm": NaN, "learning_rate": 7.42594130772432e-05, "loss": 0.0, "step": 2080 }, { "epoch": 0.3500126145824573, "grad_norm": NaN, "learning_rate": 7.423591052257893e-05, "loss": 0.0, "step": 2081 }, { "epoch": 0.3501808090152216, "grad_norm": NaN, "learning_rate": 7.421240096643157e-05, "loss": 0.0, "step": 2082 }, { "epoch": 0.35034900344798586, "grad_norm": NaN, "learning_rate": 7.418888441559275e-05, "loss": 0.0, "step": 2083 }, { "epoch": 0.35051719788075014, "grad_norm": NaN, "learning_rate": 7.416536087685617e-05, "loss": 0.0, "step": 2084 }, { "epoch": 0.3506853923135144, "grad_norm": NaN, "learning_rate": 7.414183035701749e-05, "loss": 0.0, "step": 2085 }, { "epoch": 0.3508535867462787, "grad_norm": NaN, "learning_rate": 7.411829286287442e-05, "loss": 0.0, "step": 2086 }, { "epoch": 0.351021781179043, "grad_norm": NaN, "learning_rate": 7.409474840122669e-05, "loss": 0.0, "step": 2087 }, { "epoch": 0.35118997561180726, "grad_norm": NaN, "learning_rate": 7.407119697887602e-05, "loss": 0.0, "step": 2088 }, { "epoch": 0.35135817004457154, "grad_norm": NaN, "learning_rate": 7.404763860262616e-05, "loss": 0.0, "step": 2089 }, { "epoch": 0.3515263644773358, "grad_norm": NaN, "learning_rate": 7.402407327928285e-05, "loss": 0.0, "step": 2090 }, { "epoch": 0.3516945589101001, "grad_norm": NaN, "learning_rate": 7.400050101565387e-05, "loss": 0.0, "step": 2091 }, { "epoch": 0.3518627533428644, "grad_norm": NaN, "learning_rate": 7.397692181854895e-05, "loss": 0.0, "step": 2092 }, { "epoch": 0.3520309477756286, "grad_norm": NaN, "learning_rate": 7.395333569477988e-05, "loss": 0.0, "step": 2093 }, { "epoch": 0.3521991422083929, "grad_norm": NaN, "learning_rate": 7.392974265116043e-05, "loss": 0.0, "step": 2094 }, { "epoch": 0.35236733664115716, "grad_norm": NaN, "learning_rate": 7.390614269450634e-05, "loss": 0.0, "step": 2095 }, { "epoch": 0.35253553107392144, "grad_norm": NaN, "learning_rate": 7.38825358316354e-05, "loss": 0.0, "step": 2096 }, { "epoch": 0.3527037255066857, "grad_norm": NaN, "learning_rate": 7.385892206936735e-05, "loss": 0.0, "step": 2097 }, { "epoch": 0.35287191993945, "grad_norm": NaN, "learning_rate": 7.383530141452396e-05, "loss": 0.0, "step": 2098 }, { "epoch": 0.3530401143722143, "grad_norm": NaN, "learning_rate": 7.381167387392895e-05, "loss": 0.0, "step": 2099 }, { "epoch": 0.35320830880497855, "grad_norm": NaN, "learning_rate": 7.378803945440808e-05, "loss": 0.0, "step": 2100 }, { "epoch": 0.35337650323774283, "grad_norm": NaN, "learning_rate": 7.376439816278904e-05, "loss": 0.0, "step": 2101 }, { "epoch": 0.3535446976705071, "grad_norm": NaN, "learning_rate": 7.374075000590155e-05, "loss": 0.0, "step": 2102 }, { "epoch": 0.3537128921032714, "grad_norm": NaN, "learning_rate": 7.371709499057729e-05, "loss": 0.0, "step": 2103 }, { "epoch": 0.35388108653603567, "grad_norm": NaN, "learning_rate": 7.369343312364993e-05, "loss": 0.0, "step": 2104 }, { "epoch": 0.35404928096879995, "grad_norm": NaN, "learning_rate": 7.366976441195515e-05, "loss": 0.0, "step": 2105 }, { "epoch": 0.35421747540156423, "grad_norm": NaN, "learning_rate": 7.364608886233052e-05, "loss": 0.0, "step": 2106 }, { "epoch": 0.3543856698343285, "grad_norm": NaN, "learning_rate": 7.362240648161568e-05, "loss": 0.0, "step": 2107 }, { "epoch": 0.35455386426709273, "grad_norm": NaN, "learning_rate": 7.359871727665219e-05, "loss": 0.0, "step": 2108 }, { "epoch": 0.354722058699857, "grad_norm": NaN, "learning_rate": 7.35750212542836e-05, "loss": 0.0, "step": 2109 }, { "epoch": 0.3548902531326213, "grad_norm": NaN, "learning_rate": 7.355131842135543e-05, "loss": 0.0, "step": 2110 }, { "epoch": 0.35505844756538557, "grad_norm": NaN, "learning_rate": 7.352760878471516e-05, "loss": 0.0, "step": 2111 }, { "epoch": 0.35522664199814985, "grad_norm": NaN, "learning_rate": 7.350389235121223e-05, "loss": 0.0, "step": 2112 }, { "epoch": 0.35539483643091413, "grad_norm": NaN, "learning_rate": 7.348016912769806e-05, "loss": 0.0, "step": 2113 }, { "epoch": 0.3555630308636784, "grad_norm": NaN, "learning_rate": 7.345643912102601e-05, "loss": 0.0, "step": 2114 }, { "epoch": 0.3557312252964427, "grad_norm": NaN, "learning_rate": 7.343270233805144e-05, "loss": 0.0, "step": 2115 }, { "epoch": 0.35589941972920697, "grad_norm": NaN, "learning_rate": 7.340895878563162e-05, "loss": 0.0, "step": 2116 }, { "epoch": 0.35606761416197125, "grad_norm": NaN, "learning_rate": 7.338520847062582e-05, "loss": 0.0, "step": 2117 }, { "epoch": 0.3562358085947355, "grad_norm": NaN, "learning_rate": 7.336145139989518e-05, "loss": 0.0, "step": 2118 }, { "epoch": 0.3564040030274998, "grad_norm": NaN, "learning_rate": 7.333768758030292e-05, "loss": 0.0, "step": 2119 }, { "epoch": 0.3565721974602641, "grad_norm": NaN, "learning_rate": 7.331391701871413e-05, "loss": 0.0, "step": 2120 }, { "epoch": 0.35674039189302836, "grad_norm": NaN, "learning_rate": 7.329013972199583e-05, "loss": 0.0, "step": 2121 }, { "epoch": 0.35690858632579264, "grad_norm": NaN, "learning_rate": 7.326635569701703e-05, "loss": 0.0, "step": 2122 }, { "epoch": 0.35707678075855687, "grad_norm": NaN, "learning_rate": 7.324256495064867e-05, "loss": 0.0, "step": 2123 }, { "epoch": 0.35724497519132115, "grad_norm": NaN, "learning_rate": 7.321876748976361e-05, "loss": 0.0, "step": 2124 }, { "epoch": 0.3574131696240854, "grad_norm": NaN, "learning_rate": 7.319496332123671e-05, "loss": 0.0, "step": 2125 }, { "epoch": 0.3575813640568497, "grad_norm": NaN, "learning_rate": 7.317115245194469e-05, "loss": 0.0, "step": 2126 }, { "epoch": 0.357749558489614, "grad_norm": NaN, "learning_rate": 7.314733488876626e-05, "loss": 0.0, "step": 2127 }, { "epoch": 0.35791775292237826, "grad_norm": NaN, "learning_rate": 7.312351063858206e-05, "loss": 0.0, "step": 2128 }, { "epoch": 0.35808594735514254, "grad_norm": NaN, "learning_rate": 7.309967970827462e-05, "loss": 0.0, "step": 2129 }, { "epoch": 0.3582541417879068, "grad_norm": NaN, "learning_rate": 7.307584210472844e-05, "loss": 0.0, "step": 2130 }, { "epoch": 0.3584223362206711, "grad_norm": NaN, "learning_rate": 7.305199783482997e-05, "loss": 0.0, "step": 2131 }, { "epoch": 0.3585905306534354, "grad_norm": NaN, "learning_rate": 7.302814690546749e-05, "loss": 0.0, "step": 2132 }, { "epoch": 0.35875872508619966, "grad_norm": NaN, "learning_rate": 7.30042893235313e-05, "loss": 0.0, "step": 2133 }, { "epoch": 0.35892691951896394, "grad_norm": NaN, "learning_rate": 7.29804250959136e-05, "loss": 0.0, "step": 2134 }, { "epoch": 0.3590951139517282, "grad_norm": NaN, "learning_rate": 7.295655422950849e-05, "loss": 0.0, "step": 2135 }, { "epoch": 0.3592633083844925, "grad_norm": NaN, "learning_rate": 7.293267673121197e-05, "loss": 0.0, "step": 2136 }, { "epoch": 0.3594315028172568, "grad_norm": NaN, "learning_rate": 7.290879260792203e-05, "loss": 0.0, "step": 2137 }, { "epoch": 0.359599697250021, "grad_norm": NaN, "learning_rate": 7.288490186653848e-05, "loss": 0.0, "step": 2138 }, { "epoch": 0.3597678916827853, "grad_norm": NaN, "learning_rate": 7.286100451396312e-05, "loss": 0.0, "step": 2139 }, { "epoch": 0.35993608611554956, "grad_norm": NaN, "learning_rate": 7.283710055709959e-05, "loss": 0.0, "step": 2140 }, { "epoch": 0.36010428054831384, "grad_norm": NaN, "learning_rate": 7.281319000285352e-05, "loss": 0.0, "step": 2141 }, { "epoch": 0.3602724749810781, "grad_norm": NaN, "learning_rate": 7.278927285813238e-05, "loss": 0.0, "step": 2142 }, { "epoch": 0.3604406694138424, "grad_norm": NaN, "learning_rate": 7.276534912984556e-05, "loss": 0.0, "step": 2143 }, { "epoch": 0.3606088638466067, "grad_norm": NaN, "learning_rate": 7.274141882490435e-05, "loss": 0.0, "step": 2144 }, { "epoch": 0.36077705827937095, "grad_norm": NaN, "learning_rate": 7.271748195022199e-05, "loss": 0.0, "step": 2145 }, { "epoch": 0.36094525271213523, "grad_norm": NaN, "learning_rate": 7.269353851271352e-05, "loss": 0.0, "step": 2146 }, { "epoch": 0.3611134471448995, "grad_norm": NaN, "learning_rate": 7.266958851929598e-05, "loss": 0.0, "step": 2147 }, { "epoch": 0.3612816415776638, "grad_norm": NaN, "learning_rate": 7.26456319768882e-05, "loss": 0.0, "step": 2148 }, { "epoch": 0.36144983601042807, "grad_norm": NaN, "learning_rate": 7.2621668892411e-05, "loss": 0.0, "step": 2149 }, { "epoch": 0.36161803044319235, "grad_norm": NaN, "learning_rate": 7.259769927278705e-05, "loss": 0.0, "step": 2150 }, { "epoch": 0.36178622487595663, "grad_norm": NaN, "learning_rate": 7.257372312494088e-05, "loss": 0.0, "step": 2151 }, { "epoch": 0.36195441930872085, "grad_norm": NaN, "learning_rate": 7.254974045579893e-05, "loss": 0.0, "step": 2152 }, { "epoch": 0.36212261374148513, "grad_norm": NaN, "learning_rate": 7.252575127228954e-05, "loss": 0.0, "step": 2153 }, { "epoch": 0.3622908081742494, "grad_norm": NaN, "learning_rate": 7.250175558134291e-05, "loss": 0.0, "step": 2154 }, { "epoch": 0.3624590026070137, "grad_norm": NaN, "learning_rate": 7.247775338989111e-05, "loss": 0.0, "step": 2155 }, { "epoch": 0.36262719703977797, "grad_norm": NaN, "learning_rate": 7.245374470486814e-05, "loss": 0.0, "step": 2156 }, { "epoch": 0.36279539147254225, "grad_norm": NaN, "learning_rate": 7.242972953320984e-05, "loss": 0.0, "step": 2157 }, { "epoch": 0.36296358590530653, "grad_norm": NaN, "learning_rate": 7.240570788185388e-05, "loss": 0.0, "step": 2158 }, { "epoch": 0.3631317803380708, "grad_norm": NaN, "learning_rate": 7.238167975773987e-05, "loss": 0.0, "step": 2159 }, { "epoch": 0.3632999747708351, "grad_norm": NaN, "learning_rate": 7.235764516780927e-05, "loss": 0.0, "step": 2160 }, { "epoch": 0.36346816920359937, "grad_norm": NaN, "learning_rate": 7.23336041190054e-05, "loss": 0.0, "step": 2161 }, { "epoch": 0.36363636363636365, "grad_norm": NaN, "learning_rate": 7.230955661827346e-05, "loss": 0.0, "step": 2162 }, { "epoch": 0.3638045580691279, "grad_norm": NaN, "learning_rate": 7.228550267256048e-05, "loss": 0.0, "step": 2163 }, { "epoch": 0.3639727525018922, "grad_norm": NaN, "learning_rate": 7.22614422888154e-05, "loss": 0.0, "step": 2164 }, { "epoch": 0.3641409469346565, "grad_norm": NaN, "learning_rate": 7.223737547398898e-05, "loss": 0.0, "step": 2165 }, { "epoch": 0.36430914136742076, "grad_norm": NaN, "learning_rate": 7.221330223503387e-05, "loss": 0.0, "step": 2166 }, { "epoch": 0.364477335800185, "grad_norm": NaN, "learning_rate": 7.218922257890454e-05, "loss": 0.0, "step": 2167 }, { "epoch": 0.36464553023294927, "grad_norm": NaN, "learning_rate": 7.216513651255735e-05, "loss": 0.0, "step": 2168 }, { "epoch": 0.36481372466571355, "grad_norm": NaN, "learning_rate": 7.214104404295045e-05, "loss": 0.0, "step": 2169 }, { "epoch": 0.3649819190984778, "grad_norm": NaN, "learning_rate": 7.211694517704394e-05, "loss": 0.0, "step": 2170 }, { "epoch": 0.3651501135312421, "grad_norm": NaN, "learning_rate": 7.20928399217997e-05, "loss": 0.0, "step": 2171 }, { "epoch": 0.3653183079640064, "grad_norm": NaN, "learning_rate": 7.206872828418146e-05, "loss": 0.0, "step": 2172 }, { "epoch": 0.36548650239677066, "grad_norm": NaN, "learning_rate": 7.20446102711548e-05, "loss": 0.0, "step": 2173 }, { "epoch": 0.36565469682953494, "grad_norm": NaN, "learning_rate": 7.202048588968715e-05, "loss": 0.0, "step": 2174 }, { "epoch": 0.3658228912622992, "grad_norm": NaN, "learning_rate": 7.199635514674778e-05, "loss": 0.0, "step": 2175 }, { "epoch": 0.3659910856950635, "grad_norm": NaN, "learning_rate": 7.197221804930776e-05, "loss": 0.0, "step": 2176 }, { "epoch": 0.3661592801278278, "grad_norm": NaN, "learning_rate": 7.194807460434005e-05, "loss": 0.0, "step": 2177 }, { "epoch": 0.36632747456059206, "grad_norm": NaN, "learning_rate": 7.192392481881944e-05, "loss": 0.0, "step": 2178 }, { "epoch": 0.36649566899335634, "grad_norm": NaN, "learning_rate": 7.189976869972248e-05, "loss": 0.0, "step": 2179 }, { "epoch": 0.3666638634261206, "grad_norm": NaN, "learning_rate": 7.187560625402766e-05, "loss": 0.0, "step": 2180 }, { "epoch": 0.3668320578588849, "grad_norm": NaN, "learning_rate": 7.18514374887152e-05, "loss": 0.0, "step": 2181 }, { "epoch": 0.3670002522916491, "grad_norm": NaN, "learning_rate": 7.18272624107672e-05, "loss": 0.0, "step": 2182 }, { "epoch": 0.3671684467244134, "grad_norm": NaN, "learning_rate": 7.180308102716758e-05, "loss": 0.0, "step": 2183 }, { "epoch": 0.3673366411571777, "grad_norm": NaN, "learning_rate": 7.177889334490206e-05, "loss": 0.0, "step": 2184 }, { "epoch": 0.36750483558994196, "grad_norm": NaN, "learning_rate": 7.175469937095818e-05, "loss": 0.0, "step": 2185 }, { "epoch": 0.36767303002270624, "grad_norm": NaN, "learning_rate": 7.173049911232533e-05, "loss": 0.0, "step": 2186 }, { "epoch": 0.3678412244554705, "grad_norm": NaN, "learning_rate": 7.170629257599468e-05, "loss": 0.0, "step": 2187 }, { "epoch": 0.3680094188882348, "grad_norm": NaN, "learning_rate": 7.168207976895922e-05, "loss": 0.0, "step": 2188 }, { "epoch": 0.3681776133209991, "grad_norm": NaN, "learning_rate": 7.165786069821376e-05, "loss": 0.0, "step": 2189 }, { "epoch": 0.36834580775376335, "grad_norm": NaN, "learning_rate": 7.163363537075495e-05, "loss": 0.0, "step": 2190 }, { "epoch": 0.36851400218652763, "grad_norm": NaN, "learning_rate": 7.160940379358117e-05, "loss": 0.0, "step": 2191 }, { "epoch": 0.3686821966192919, "grad_norm": NaN, "learning_rate": 7.158516597369269e-05, "loss": 0.0, "step": 2192 }, { "epoch": 0.3688503910520562, "grad_norm": NaN, "learning_rate": 7.156092191809152e-05, "loss": 0.0, "step": 2193 }, { "epoch": 0.36901858548482047, "grad_norm": NaN, "learning_rate": 7.153667163378151e-05, "loss": 0.0, "step": 2194 }, { "epoch": 0.36918677991758475, "grad_norm": NaN, "learning_rate": 7.151241512776829e-05, "loss": 0.0, "step": 2195 }, { "epoch": 0.36935497435034903, "grad_norm": NaN, "learning_rate": 7.148815240705928e-05, "loss": 0.0, "step": 2196 }, { "epoch": 0.36952316878311325, "grad_norm": NaN, "learning_rate": 7.146388347866372e-05, "loss": 0.0, "step": 2197 }, { "epoch": 0.36969136321587753, "grad_norm": NaN, "learning_rate": 7.143960834959264e-05, "loss": 0.0, "step": 2198 }, { "epoch": 0.3698595576486418, "grad_norm": NaN, "learning_rate": 7.141532702685886e-05, "loss": 0.0, "step": 2199 }, { "epoch": 0.3700277520814061, "grad_norm": NaN, "learning_rate": 7.139103951747695e-05, "loss": 0.0, "step": 2200 }, { "epoch": 0.37019594651417037, "grad_norm": NaN, "learning_rate": 7.136674582846333e-05, "loss": 0.0, "step": 2201 }, { "epoch": 0.37036414094693465, "grad_norm": NaN, "learning_rate": 7.134244596683614e-05, "loss": 0.0, "step": 2202 }, { "epoch": 0.37053233537969893, "grad_norm": NaN, "learning_rate": 7.131813993961537e-05, "loss": 0.0, "step": 2203 }, { "epoch": 0.3707005298124632, "grad_norm": NaN, "learning_rate": 7.129382775382276e-05, "loss": 0.0, "step": 2204 }, { "epoch": 0.3708687242452275, "grad_norm": NaN, "learning_rate": 7.12695094164818e-05, "loss": 0.0, "step": 2205 }, { "epoch": 0.37103691867799177, "grad_norm": NaN, "learning_rate": 7.12451849346178e-05, "loss": 0.0, "step": 2206 }, { "epoch": 0.37120511311075605, "grad_norm": NaN, "learning_rate": 7.122085431525785e-05, "loss": 0.0, "step": 2207 }, { "epoch": 0.3713733075435203, "grad_norm": NaN, "learning_rate": 7.119651756543075e-05, "loss": 0.0, "step": 2208 }, { "epoch": 0.3715415019762846, "grad_norm": NaN, "learning_rate": 7.117217469216716e-05, "loss": 0.0, "step": 2209 }, { "epoch": 0.3717096964090489, "grad_norm": NaN, "learning_rate": 7.114782570249943e-05, "loss": 0.0, "step": 2210 }, { "epoch": 0.37187789084181316, "grad_norm": NaN, "learning_rate": 7.112347060346173e-05, "loss": 0.0, "step": 2211 }, { "epoch": 0.3720460852745774, "grad_norm": NaN, "learning_rate": 7.109910940208999e-05, "loss": 0.0, "step": 2212 }, { "epoch": 0.37221427970734167, "grad_norm": NaN, "learning_rate": 7.107474210542185e-05, "loss": 0.0, "step": 2213 }, { "epoch": 0.37238247414010595, "grad_norm": NaN, "learning_rate": 7.105036872049675e-05, "loss": 0.0, "step": 2214 }, { "epoch": 0.3725506685728702, "grad_norm": NaN, "learning_rate": 7.102598925435593e-05, "loss": 0.0, "step": 2215 }, { "epoch": 0.3727188630056345, "grad_norm": NaN, "learning_rate": 7.100160371404229e-05, "loss": 0.0, "step": 2216 }, { "epoch": 0.3728870574383988, "grad_norm": NaN, "learning_rate": 7.09772121066006e-05, "loss": 0.0, "step": 2217 }, { "epoch": 0.37305525187116306, "grad_norm": NaN, "learning_rate": 7.095281443907726e-05, "loss": 0.0, "step": 2218 }, { "epoch": 0.37322344630392734, "grad_norm": NaN, "learning_rate": 7.092841071852055e-05, "loss": 0.0, "step": 2219 }, { "epoch": 0.3733916407366916, "grad_norm": NaN, "learning_rate": 7.090400095198035e-05, "loss": 0.0, "step": 2220 }, { "epoch": 0.3735598351694559, "grad_norm": NaN, "learning_rate": 7.087958514650846e-05, "loss": 0.0, "step": 2221 }, { "epoch": 0.3737280296022202, "grad_norm": NaN, "learning_rate": 7.085516330915825e-05, "loss": 0.0, "step": 2222 }, { "epoch": 0.37389622403498446, "grad_norm": NaN, "learning_rate": 7.083073544698498e-05, "loss": 0.0, "step": 2223 }, { "epoch": 0.37406441846774874, "grad_norm": NaN, "learning_rate": 7.080630156704553e-05, "loss": 0.0, "step": 2224 }, { "epoch": 0.374232612900513, "grad_norm": NaN, "learning_rate": 7.078186167639862e-05, "loss": 0.0, "step": 2225 }, { "epoch": 0.37440080733327724, "grad_norm": NaN, "learning_rate": 7.075741578210463e-05, "loss": 0.0, "step": 2226 }, { "epoch": 0.3745690017660415, "grad_norm": NaN, "learning_rate": 7.073296389122572e-05, "loss": 0.0, "step": 2227 }, { "epoch": 0.3747371961988058, "grad_norm": NaN, "learning_rate": 7.070850601082577e-05, "loss": 0.0, "step": 2228 }, { "epoch": 0.3749053906315701, "grad_norm": NaN, "learning_rate": 7.068404214797038e-05, "loss": 0.0, "step": 2229 }, { "epoch": 0.37507358506433436, "grad_norm": NaN, "learning_rate": 7.065957230972689e-05, "loss": 0.0, "step": 2230 }, { "epoch": 0.37524177949709864, "grad_norm": NaN, "learning_rate": 7.063509650316435e-05, "loss": 0.0, "step": 2231 }, { "epoch": 0.3754099739298629, "grad_norm": NaN, "learning_rate": 7.061061473535358e-05, "loss": 0.0, "step": 2232 }, { "epoch": 0.3755781683626272, "grad_norm": NaN, "learning_rate": 7.058612701336705e-05, "loss": 0.0, "step": 2233 }, { "epoch": 0.3757463627953915, "grad_norm": NaN, "learning_rate": 7.056163334427899e-05, "loss": 0.0, "step": 2234 }, { "epoch": 0.37591455722815575, "grad_norm": NaN, "learning_rate": 7.053713373516538e-05, "loss": 0.0, "step": 2235 }, { "epoch": 0.37608275166092003, "grad_norm": NaN, "learning_rate": 7.051262819310385e-05, "loss": 0.0, "step": 2236 }, { "epoch": 0.3762509460936843, "grad_norm": NaN, "learning_rate": 7.048811672517379e-05, "loss": 0.0, "step": 2237 }, { "epoch": 0.3764191405264486, "grad_norm": NaN, "learning_rate": 7.04635993384563e-05, "loss": 0.0, "step": 2238 }, { "epoch": 0.37658733495921287, "grad_norm": NaN, "learning_rate": 7.043907604003415e-05, "loss": 0.0, "step": 2239 }, { "epoch": 0.37675552939197715, "grad_norm": NaN, "learning_rate": 7.041454683699189e-05, "loss": 0.0, "step": 2240 }, { "epoch": 0.3769237238247414, "grad_norm": NaN, "learning_rate": 7.039001173641568e-05, "loss": 0.0, "step": 2241 }, { "epoch": 0.37709191825750565, "grad_norm": NaN, "learning_rate": 7.036547074539347e-05, "loss": 0.0, "step": 2242 }, { "epoch": 0.37726011269026993, "grad_norm": NaN, "learning_rate": 7.034092387101487e-05, "loss": 0.0, "step": 2243 }, { "epoch": 0.3774283071230342, "grad_norm": NaN, "learning_rate": 7.031637112037118e-05, "loss": 0.0, "step": 2244 }, { "epoch": 0.3775965015557985, "grad_norm": NaN, "learning_rate": 7.029181250055545e-05, "loss": 0.0, "step": 2245 }, { "epoch": 0.37776469598856277, "grad_norm": NaN, "learning_rate": 7.026724801866235e-05, "loss": 0.0, "step": 2246 }, { "epoch": 0.37793289042132705, "grad_norm": NaN, "learning_rate": 7.024267768178831e-05, "loss": 0.0, "step": 2247 }, { "epoch": 0.37810108485409133, "grad_norm": NaN, "learning_rate": 7.021810149703143e-05, "loss": 0.0, "step": 2248 }, { "epoch": 0.3782692792868556, "grad_norm": NaN, "learning_rate": 7.019351947149148e-05, "loss": 0.0, "step": 2249 }, { "epoch": 0.3784374737196199, "grad_norm": NaN, "learning_rate": 7.016893161226994e-05, "loss": 0.0, "step": 2250 }, { "epoch": 0.37860566815238417, "grad_norm": NaN, "learning_rate": 7.014433792646995e-05, "loss": 0.0, "step": 2251 }, { "epoch": 0.37877386258514845, "grad_norm": NaN, "learning_rate": 7.011973842119637e-05, "loss": 0.0, "step": 2252 }, { "epoch": 0.3789420570179127, "grad_norm": NaN, "learning_rate": 7.009513310355571e-05, "loss": 0.0, "step": 2253 }, { "epoch": 0.379110251450677, "grad_norm": NaN, "learning_rate": 7.007052198065618e-05, "loss": 0.0, "step": 2254 }, { "epoch": 0.3792784458834413, "grad_norm": NaN, "learning_rate": 7.004590505960765e-05, "loss": 0.0, "step": 2255 }, { "epoch": 0.3794466403162055, "grad_norm": NaN, "learning_rate": 7.002128234752166e-05, "loss": 0.0, "step": 2256 }, { "epoch": 0.3796148347489698, "grad_norm": NaN, "learning_rate": 6.999665385151147e-05, "loss": 0.0, "step": 2257 }, { "epoch": 0.37978302918173407, "grad_norm": NaN, "learning_rate": 6.997201957869195e-05, "loss": 0.0, "step": 2258 }, { "epoch": 0.37995122361449835, "grad_norm": NaN, "learning_rate": 6.994737953617968e-05, "loss": 0.0, "step": 2259 }, { "epoch": 0.3801194180472626, "grad_norm": NaN, "learning_rate": 6.992273373109288e-05, "loss": 0.0, "step": 2260 }, { "epoch": 0.3802876124800269, "grad_norm": NaN, "learning_rate": 6.989808217055145e-05, "loss": 0.0, "step": 2261 }, { "epoch": 0.3804558069127912, "grad_norm": NaN, "learning_rate": 6.987342486167697e-05, "loss": 0.0, "step": 2262 }, { "epoch": 0.38062400134555546, "grad_norm": NaN, "learning_rate": 6.984876181159261e-05, "loss": 0.0, "step": 2263 }, { "epoch": 0.38079219577831974, "grad_norm": NaN, "learning_rate": 6.98240930274233e-05, "loss": 0.0, "step": 2264 }, { "epoch": 0.380960390211084, "grad_norm": NaN, "learning_rate": 6.979941851629555e-05, "loss": 0.0, "step": 2265 }, { "epoch": 0.3811285846438483, "grad_norm": NaN, "learning_rate": 6.977473828533757e-05, "loss": 0.0, "step": 2266 }, { "epoch": 0.3812967790766126, "grad_norm": NaN, "learning_rate": 6.975005234167917e-05, "loss": 0.0, "step": 2267 }, { "epoch": 0.38146497350937686, "grad_norm": NaN, "learning_rate": 6.972536069245189e-05, "loss": 0.0, "step": 2268 }, { "epoch": 0.38163316794214114, "grad_norm": NaN, "learning_rate": 6.970066334478882e-05, "loss": 0.0, "step": 2269 }, { "epoch": 0.3818013623749054, "grad_norm": NaN, "learning_rate": 6.967596030582478e-05, "loss": 0.0, "step": 2270 }, { "epoch": 0.38196955680766964, "grad_norm": NaN, "learning_rate": 6.965125158269619e-05, "loss": 0.0, "step": 2271 }, { "epoch": 0.3821377512404339, "grad_norm": NaN, "learning_rate": 6.962653718254111e-05, "loss": 0.0, "step": 2272 }, { "epoch": 0.3823059456731982, "grad_norm": NaN, "learning_rate": 6.960181711249929e-05, "loss": 0.0, "step": 2273 }, { "epoch": 0.3824741401059625, "grad_norm": NaN, "learning_rate": 6.957709137971204e-05, "loss": 0.0, "step": 2274 }, { "epoch": 0.38264233453872676, "grad_norm": NaN, "learning_rate": 6.955235999132238e-05, "loss": 0.0, "step": 2275 }, { "epoch": 0.38281052897149104, "grad_norm": NaN, "learning_rate": 6.952762295447491e-05, "loss": 0.0, "step": 2276 }, { "epoch": 0.3829787234042553, "grad_norm": NaN, "learning_rate": 6.950288027631588e-05, "loss": 0.0, "step": 2277 }, { "epoch": 0.3831469178370196, "grad_norm": NaN, "learning_rate": 6.947813196399321e-05, "loss": 0.0, "step": 2278 }, { "epoch": 0.3833151122697839, "grad_norm": NaN, "learning_rate": 6.945337802465636e-05, "loss": 0.0, "step": 2279 }, { "epoch": 0.38348330670254815, "grad_norm": NaN, "learning_rate": 6.942861846545651e-05, "loss": 0.0, "step": 2280 }, { "epoch": 0.38365150113531243, "grad_norm": NaN, "learning_rate": 6.940385329354639e-05, "loss": 0.0, "step": 2281 }, { "epoch": 0.3838196955680767, "grad_norm": NaN, "learning_rate": 6.937908251608038e-05, "loss": 0.0, "step": 2282 }, { "epoch": 0.383987890000841, "grad_norm": NaN, "learning_rate": 6.93543061402145e-05, "loss": 0.0, "step": 2283 }, { "epoch": 0.38415608443360527, "grad_norm": NaN, "learning_rate": 6.932952417310634e-05, "loss": 0.0, "step": 2284 }, { "epoch": 0.38432427886636955, "grad_norm": NaN, "learning_rate": 6.930473662191517e-05, "loss": 0.0, "step": 2285 }, { "epoch": 0.3844924732991338, "grad_norm": NaN, "learning_rate": 6.92799434938018e-05, "loss": 0.0, "step": 2286 }, { "epoch": 0.38466066773189805, "grad_norm": NaN, "learning_rate": 6.925514479592872e-05, "loss": 0.0, "step": 2287 }, { "epoch": 0.38482886216466233, "grad_norm": NaN, "learning_rate": 6.923034053545996e-05, "loss": 0.0, "step": 2288 }, { "epoch": 0.3849970565974266, "grad_norm": NaN, "learning_rate": 6.920553071956122e-05, "loss": 0.0, "step": 2289 }, { "epoch": 0.3851652510301909, "grad_norm": NaN, "learning_rate": 6.918071535539978e-05, "loss": 0.0, "step": 2290 }, { "epoch": 0.38533344546295517, "grad_norm": NaN, "learning_rate": 6.915589445014448e-05, "loss": 0.0, "step": 2291 }, { "epoch": 0.38550163989571945, "grad_norm": NaN, "learning_rate": 6.913106801096586e-05, "loss": 0.0, "step": 2292 }, { "epoch": 0.38566983432848373, "grad_norm": NaN, "learning_rate": 6.910623604503596e-05, "loss": 0.0, "step": 2293 }, { "epoch": 0.385838028761248, "grad_norm": NaN, "learning_rate": 6.908139855952846e-05, "loss": 0.0, "step": 2294 }, { "epoch": 0.3860062231940123, "grad_norm": NaN, "learning_rate": 6.905655556161865e-05, "loss": 0.0, "step": 2295 }, { "epoch": 0.38617441762677657, "grad_norm": NaN, "learning_rate": 6.903170705848339e-05, "loss": 0.0, "step": 2296 }, { "epoch": 0.38634261205954085, "grad_norm": NaN, "learning_rate": 6.900685305730112e-05, "loss": 0.0, "step": 2297 }, { "epoch": 0.3865108064923051, "grad_norm": NaN, "learning_rate": 6.89819935652519e-05, "loss": 0.0, "step": 2298 }, { "epoch": 0.3866790009250694, "grad_norm": NaN, "learning_rate": 6.895712858951735e-05, "loss": 0.0, "step": 2299 }, { "epoch": 0.38684719535783363, "grad_norm": NaN, "learning_rate": 6.893225813728067e-05, "loss": 0.0, "step": 2300 }, { "epoch": 0.3870153897905979, "grad_norm": NaN, "learning_rate": 6.890738221572668e-05, "loss": 0.0, "step": 2301 }, { "epoch": 0.3871835842233622, "grad_norm": NaN, "learning_rate": 6.888250083204173e-05, "loss": 0.0, "step": 2302 }, { "epoch": 0.38735177865612647, "grad_norm": NaN, "learning_rate": 6.885761399341379e-05, "loss": 0.0, "step": 2303 }, { "epoch": 0.38751997308889075, "grad_norm": NaN, "learning_rate": 6.883272170703237e-05, "loss": 0.0, "step": 2304 }, { "epoch": 0.387688167521655, "grad_norm": NaN, "learning_rate": 6.880782398008862e-05, "loss": 0.0, "step": 2305 }, { "epoch": 0.3878563619544193, "grad_norm": NaN, "learning_rate": 6.878292081977516e-05, "loss": 0.0, "step": 2306 }, { "epoch": 0.3880245563871836, "grad_norm": NaN, "learning_rate": 6.875801223328628e-05, "loss": 0.0, "step": 2307 }, { "epoch": 0.38819275081994786, "grad_norm": NaN, "learning_rate": 6.873309822781773e-05, "loss": 0.0, "step": 2308 }, { "epoch": 0.38836094525271214, "grad_norm": NaN, "learning_rate": 6.870817881056695e-05, "loss": 0.0, "step": 2309 }, { "epoch": 0.3885291396854764, "grad_norm": NaN, "learning_rate": 6.868325398873284e-05, "loss": 0.0, "step": 2310 }, { "epoch": 0.3886973341182407, "grad_norm": NaN, "learning_rate": 6.865832376951594e-05, "loss": 0.0, "step": 2311 }, { "epoch": 0.388865528551005, "grad_norm": NaN, "learning_rate": 6.863338816011826e-05, "loss": 0.0, "step": 2312 }, { "epoch": 0.38903372298376926, "grad_norm": NaN, "learning_rate": 6.860844716774346e-05, "loss": 0.0, "step": 2313 }, { "epoch": 0.38920191741653354, "grad_norm": NaN, "learning_rate": 6.85835007995967e-05, "loss": 0.0, "step": 2314 }, { "epoch": 0.38937011184929776, "grad_norm": NaN, "learning_rate": 6.855854906288471e-05, "loss": 0.0, "step": 2315 }, { "epoch": 0.38953830628206204, "grad_norm": NaN, "learning_rate": 6.853359196481576e-05, "loss": 0.0, "step": 2316 }, { "epoch": 0.3897065007148263, "grad_norm": NaN, "learning_rate": 6.850862951259968e-05, "loss": 0.0, "step": 2317 }, { "epoch": 0.3898746951475906, "grad_norm": NaN, "learning_rate": 6.848366171344784e-05, "loss": 0.0, "step": 2318 }, { "epoch": 0.3900428895803549, "grad_norm": NaN, "learning_rate": 6.845868857457316e-05, "loss": 0.0, "step": 2319 }, { "epoch": 0.39021108401311916, "grad_norm": NaN, "learning_rate": 6.84337101031901e-05, "loss": 0.0, "step": 2320 }, { "epoch": 0.39037927844588344, "grad_norm": NaN, "learning_rate": 6.840872630651467e-05, "loss": 0.0, "step": 2321 }, { "epoch": 0.3905474728786477, "grad_norm": NaN, "learning_rate": 6.838373719176439e-05, "loss": 0.0, "step": 2322 }, { "epoch": 0.390715667311412, "grad_norm": NaN, "learning_rate": 6.835874276615835e-05, "loss": 0.0, "step": 2323 }, { "epoch": 0.3908838617441763, "grad_norm": NaN, "learning_rate": 6.833374303691714e-05, "loss": 0.0, "step": 2324 }, { "epoch": 0.39105205617694055, "grad_norm": NaN, "learning_rate": 6.830873801126293e-05, "loss": 0.0, "step": 2325 }, { "epoch": 0.39122025060970483, "grad_norm": NaN, "learning_rate": 6.828372769641938e-05, "loss": 0.0, "step": 2326 }, { "epoch": 0.3913884450424691, "grad_norm": NaN, "learning_rate": 6.825871209961168e-05, "loss": 0.0, "step": 2327 }, { "epoch": 0.3915566394752334, "grad_norm": NaN, "learning_rate": 6.823369122806656e-05, "loss": 0.0, "step": 2328 }, { "epoch": 0.39172483390799767, "grad_norm": NaN, "learning_rate": 6.820866508901229e-05, "loss": 0.0, "step": 2329 }, { "epoch": 0.3918930283407619, "grad_norm": NaN, "learning_rate": 6.818363368967862e-05, "loss": 0.0, "step": 2330 }, { "epoch": 0.3920612227735262, "grad_norm": NaN, "learning_rate": 6.815859703729683e-05, "loss": 0.0, "step": 2331 }, { "epoch": 0.39222941720629045, "grad_norm": NaN, "learning_rate": 6.813355513909976e-05, "loss": 0.0, "step": 2332 }, { "epoch": 0.39239761163905473, "grad_norm": NaN, "learning_rate": 6.810850800232172e-05, "loss": 0.0, "step": 2333 }, { "epoch": 0.392565806071819, "grad_norm": NaN, "learning_rate": 6.808345563419853e-05, "loss": 0.0, "step": 2334 }, { "epoch": 0.3927340005045833, "grad_norm": NaN, "learning_rate": 6.805839804196757e-05, "loss": 0.0, "step": 2335 }, { "epoch": 0.39290219493734757, "grad_norm": NaN, "learning_rate": 6.803333523286766e-05, "loss": 0.0, "step": 2336 }, { "epoch": 0.39307038937011185, "grad_norm": NaN, "learning_rate": 6.800826721413919e-05, "loss": 0.0, "step": 2337 }, { "epoch": 0.39323858380287613, "grad_norm": NaN, "learning_rate": 6.798319399302404e-05, "loss": 0.0, "step": 2338 }, { "epoch": 0.3934067782356404, "grad_norm": NaN, "learning_rate": 6.795811557676557e-05, "loss": 0.0, "step": 2339 }, { "epoch": 0.3935749726684047, "grad_norm": NaN, "learning_rate": 6.793303197260864e-05, "loss": 0.0, "step": 2340 }, { "epoch": 0.39374316710116897, "grad_norm": NaN, "learning_rate": 6.790794318779964e-05, "loss": 0.0, "step": 2341 }, { "epoch": 0.39391136153393325, "grad_norm": NaN, "learning_rate": 6.788284922958643e-05, "loss": 0.0, "step": 2342 }, { "epoch": 0.3940795559666975, "grad_norm": NaN, "learning_rate": 6.785775010521837e-05, "loss": 0.0, "step": 2343 }, { "epoch": 0.3942477503994618, "grad_norm": NaN, "learning_rate": 6.783264582194635e-05, "loss": 0.0, "step": 2344 }, { "epoch": 0.39441594483222603, "grad_norm": NaN, "learning_rate": 6.780753638702267e-05, "loss": 0.0, "step": 2345 }, { "epoch": 0.3945841392649903, "grad_norm": NaN, "learning_rate": 6.778242180770118e-05, "loss": 0.0, "step": 2346 }, { "epoch": 0.3947523336977546, "grad_norm": NaN, "learning_rate": 6.775730209123722e-05, "loss": 0.0, "step": 2347 }, { "epoch": 0.39492052813051887, "grad_norm": NaN, "learning_rate": 6.773217724488756e-05, "loss": 0.0, "step": 2348 }, { "epoch": 0.39508872256328315, "grad_norm": NaN, "learning_rate": 6.770704727591053e-05, "loss": 0.0, "step": 2349 }, { "epoch": 0.3952569169960474, "grad_norm": NaN, "learning_rate": 6.768191219156586e-05, "loss": 0.0, "step": 2350 }, { "epoch": 0.3954251114288117, "grad_norm": NaN, "learning_rate": 6.765677199911479e-05, "loss": 0.0, "step": 2351 }, { "epoch": 0.395593305861576, "grad_norm": NaN, "learning_rate": 6.763162670582007e-05, "loss": 0.0, "step": 2352 }, { "epoch": 0.39576150029434026, "grad_norm": NaN, "learning_rate": 6.760647631894589e-05, "loss": 0.0, "step": 2353 }, { "epoch": 0.39592969472710454, "grad_norm": NaN, "learning_rate": 6.75813208457579e-05, "loss": 0.0, "step": 2354 }, { "epoch": 0.3960978891598688, "grad_norm": NaN, "learning_rate": 6.755616029352323e-05, "loss": 0.0, "step": 2355 }, { "epoch": 0.3962660835926331, "grad_norm": NaN, "learning_rate": 6.753099466951049e-05, "loss": 0.0, "step": 2356 }, { "epoch": 0.3964342780253974, "grad_norm": NaN, "learning_rate": 6.750582398098976e-05, "loss": 0.0, "step": 2357 }, { "epoch": 0.39660247245816166, "grad_norm": NaN, "learning_rate": 6.748064823523255e-05, "loss": 0.0, "step": 2358 }, { "epoch": 0.3967706668909259, "grad_norm": NaN, "learning_rate": 6.745546743951187e-05, "loss": 0.0, "step": 2359 }, { "epoch": 0.39693886132369016, "grad_norm": NaN, "learning_rate": 6.743028160110215e-05, "loss": 0.0, "step": 2360 }, { "epoch": 0.39710705575645444, "grad_norm": NaN, "learning_rate": 6.740509072727931e-05, "loss": 0.0, "step": 2361 }, { "epoch": 0.3972752501892187, "grad_norm": NaN, "learning_rate": 6.73798948253207e-05, "loss": 0.0, "step": 2362 }, { "epoch": 0.397443444621983, "grad_norm": NaN, "learning_rate": 6.735469390250515e-05, "loss": 0.0, "step": 2363 }, { "epoch": 0.3976116390547473, "grad_norm": NaN, "learning_rate": 6.732948796611292e-05, "loss": 0.0, "step": 2364 }, { "epoch": 0.39777983348751156, "grad_norm": NaN, "learning_rate": 6.73042770234257e-05, "loss": 0.0, "step": 2365 }, { "epoch": 0.39794802792027584, "grad_norm": NaN, "learning_rate": 6.727906108172668e-05, "loss": 0.0, "step": 2366 }, { "epoch": 0.3981162223530401, "grad_norm": NaN, "learning_rate": 6.725384014830045e-05, "loss": 0.0, "step": 2367 }, { "epoch": 0.3982844167858044, "grad_norm": NaN, "learning_rate": 6.722861423043305e-05, "loss": 0.0, "step": 2368 }, { "epoch": 0.3984526112185687, "grad_norm": NaN, "learning_rate": 6.720338333541196e-05, "loss": 0.0, "step": 2369 }, { "epoch": 0.39862080565133295, "grad_norm": NaN, "learning_rate": 6.717814747052613e-05, "loss": 0.0, "step": 2370 }, { "epoch": 0.39878900008409723, "grad_norm": NaN, "learning_rate": 6.715290664306589e-05, "loss": 0.0, "step": 2371 }, { "epoch": 0.3989571945168615, "grad_norm": NaN, "learning_rate": 6.712766086032306e-05, "loss": 0.0, "step": 2372 }, { "epoch": 0.3991253889496258, "grad_norm": NaN, "learning_rate": 6.710241012959085e-05, "loss": 0.0, "step": 2373 }, { "epoch": 0.39929358338239, "grad_norm": NaN, "learning_rate": 6.70771544581639e-05, "loss": 0.0, "step": 2374 }, { "epoch": 0.3994617778151543, "grad_norm": NaN, "learning_rate": 6.70518938533383e-05, "loss": 0.0, "step": 2375 }, { "epoch": 0.3996299722479186, "grad_norm": NaN, "learning_rate": 6.702662832241156e-05, "loss": 0.0, "step": 2376 }, { "epoch": 0.39979816668068285, "grad_norm": NaN, "learning_rate": 6.70013578726826e-05, "loss": 0.0, "step": 2377 }, { "epoch": 0.39996636111344713, "grad_norm": NaN, "learning_rate": 6.69760825114518e-05, "loss": 0.0, "step": 2378 }, { "epoch": 0.4001345555462114, "grad_norm": NaN, "learning_rate": 6.695080224602088e-05, "loss": 0.0, "step": 2379 }, { "epoch": 0.4003027499789757, "grad_norm": NaN, "learning_rate": 6.692551708369307e-05, "loss": 0.0, "step": 2380 }, { "epoch": 0.40047094441173997, "grad_norm": NaN, "learning_rate": 6.690022703177294e-05, "loss": 0.0, "step": 2381 }, { "epoch": 0.40063913884450425, "grad_norm": NaN, "learning_rate": 6.687493209756653e-05, "loss": 0.0, "step": 2382 }, { "epoch": 0.40080733327726853, "grad_norm": NaN, "learning_rate": 6.684963228838122e-05, "loss": 0.0, "step": 2383 }, { "epoch": 0.4009755277100328, "grad_norm": NaN, "learning_rate": 6.682432761152589e-05, "loss": 0.0, "step": 2384 }, { "epoch": 0.4011437221427971, "grad_norm": NaN, "learning_rate": 6.679901807431073e-05, "loss": 0.0, "step": 2385 }, { "epoch": 0.40131191657556137, "grad_norm": NaN, "learning_rate": 6.677370368404744e-05, "loss": 0.0, "step": 2386 }, { "epoch": 0.40148011100832565, "grad_norm": NaN, "learning_rate": 6.674838444804902e-05, "loss": 0.0, "step": 2387 }, { "epoch": 0.4016483054410899, "grad_norm": NaN, "learning_rate": 6.672306037362991e-05, "loss": 0.0, "step": 2388 }, { "epoch": 0.40181649987385415, "grad_norm": NaN, "learning_rate": 6.669773146810599e-05, "loss": 0.0, "step": 2389 }, { "epoch": 0.40198469430661843, "grad_norm": NaN, "learning_rate": 6.667239773879447e-05, "loss": 0.0, "step": 2390 }, { "epoch": 0.4021528887393827, "grad_norm": NaN, "learning_rate": 6.664705919301399e-05, "loss": 0.0, "step": 2391 }, { "epoch": 0.402321083172147, "grad_norm": NaN, "learning_rate": 6.662171583808455e-05, "loss": 0.0, "step": 2392 }, { "epoch": 0.40248927760491127, "grad_norm": NaN, "learning_rate": 6.659636768132759e-05, "loss": 0.0, "step": 2393 }, { "epoch": 0.40265747203767555, "grad_norm": NaN, "learning_rate": 6.657101473006588e-05, "loss": 0.0, "step": 2394 }, { "epoch": 0.4028256664704398, "grad_norm": NaN, "learning_rate": 6.654565699162364e-05, "loss": 0.0, "step": 2395 }, { "epoch": 0.4029938609032041, "grad_norm": NaN, "learning_rate": 6.652029447332641e-05, "loss": 0.0, "step": 2396 }, { "epoch": 0.4031620553359684, "grad_norm": NaN, "learning_rate": 6.649492718250115e-05, "loss": 0.0, "step": 2397 }, { "epoch": 0.40333024976873266, "grad_norm": NaN, "learning_rate": 6.646955512647616e-05, "loss": 0.0, "step": 2398 }, { "epoch": 0.40349844420149694, "grad_norm": NaN, "learning_rate": 6.64441783125812e-05, "loss": 0.0, "step": 2399 }, { "epoch": 0.4036666386342612, "grad_norm": NaN, "learning_rate": 6.641879674814729e-05, "loss": 0.0, "step": 2400 }, { "epoch": 0.4038348330670255, "grad_norm": NaN, "learning_rate": 6.63934104405069e-05, "loss": 0.0, "step": 2401 }, { "epoch": 0.4040030274997898, "grad_norm": NaN, "learning_rate": 6.636801939699384e-05, "loss": 0.0, "step": 2402 }, { "epoch": 0.40417122193255406, "grad_norm": NaN, "learning_rate": 6.634262362494332e-05, "loss": 0.0, "step": 2403 }, { "epoch": 0.4043394163653183, "grad_norm": NaN, "learning_rate": 6.631722313169188e-05, "loss": 0.0, "step": 2404 }, { "epoch": 0.40450761079808256, "grad_norm": NaN, "learning_rate": 6.629181792457745e-05, "loss": 0.0, "step": 2405 }, { "epoch": 0.40467580523084684, "grad_norm": NaN, "learning_rate": 6.626640801093929e-05, "loss": 0.0, "step": 2406 }, { "epoch": 0.4048439996636111, "grad_norm": NaN, "learning_rate": 6.624099339811805e-05, "loss": 0.0, "step": 2407 }, { "epoch": 0.4050121940963754, "grad_norm": NaN, "learning_rate": 6.621557409345572e-05, "loss": 0.0, "step": 2408 }, { "epoch": 0.4051803885291397, "grad_norm": NaN, "learning_rate": 6.619015010429568e-05, "loss": 0.0, "step": 2409 }, { "epoch": 0.40534858296190396, "grad_norm": NaN, "learning_rate": 6.616472143798261e-05, "loss": 0.0, "step": 2410 }, { "epoch": 0.40551677739466824, "grad_norm": NaN, "learning_rate": 6.613928810186257e-05, "loss": 0.0, "step": 2411 }, { "epoch": 0.4056849718274325, "grad_norm": NaN, "learning_rate": 6.611385010328294e-05, "loss": 0.0, "step": 2412 }, { "epoch": 0.4058531662601968, "grad_norm": NaN, "learning_rate": 6.608840744959255e-05, "loss": 0.0, "step": 2413 }, { "epoch": 0.4060213606929611, "grad_norm": NaN, "learning_rate": 6.60629601481414e-05, "loss": 0.0, "step": 2414 }, { "epoch": 0.40618955512572535, "grad_norm": NaN, "learning_rate": 6.603750820628102e-05, "loss": 0.0, "step": 2415 }, { "epoch": 0.40635774955848963, "grad_norm": NaN, "learning_rate": 6.601205163136412e-05, "loss": 0.0, "step": 2416 }, { "epoch": 0.4065259439912539, "grad_norm": NaN, "learning_rate": 6.598659043074487e-05, "loss": 0.0, "step": 2417 }, { "epoch": 0.4066941384240182, "grad_norm": NaN, "learning_rate": 6.596112461177869e-05, "loss": 0.0, "step": 2418 }, { "epoch": 0.4068623328567824, "grad_norm": NaN, "learning_rate": 6.59356541818224e-05, "loss": 0.0, "step": 2419 }, { "epoch": 0.4070305272895467, "grad_norm": NaN, "learning_rate": 6.591017914823409e-05, "loss": 0.0, "step": 2420 }, { "epoch": 0.407198721722311, "grad_norm": NaN, "learning_rate": 6.588469951837323e-05, "loss": 0.0, "step": 2421 }, { "epoch": 0.40736691615507525, "grad_norm": NaN, "learning_rate": 6.58592152996006e-05, "loss": 0.0, "step": 2422 }, { "epoch": 0.40753511058783953, "grad_norm": NaN, "learning_rate": 6.58337264992783e-05, "loss": 0.0, "step": 2423 }, { "epoch": 0.4077033050206038, "grad_norm": NaN, "learning_rate": 6.580823312476976e-05, "loss": 0.0, "step": 2424 }, { "epoch": 0.4078714994533681, "grad_norm": NaN, "learning_rate": 6.578273518343975e-05, "loss": 0.0, "step": 2425 }, { "epoch": 0.40803969388613237, "grad_norm": NaN, "learning_rate": 6.57572326826543e-05, "loss": 0.0, "step": 2426 }, { "epoch": 0.40820788831889665, "grad_norm": NaN, "learning_rate": 6.573172562978084e-05, "loss": 0.0, "step": 2427 }, { "epoch": 0.40837608275166093, "grad_norm": NaN, "learning_rate": 6.570621403218803e-05, "loss": 0.0, "step": 2428 }, { "epoch": 0.4085442771844252, "grad_norm": NaN, "learning_rate": 6.568069789724593e-05, "loss": 0.0, "step": 2429 }, { "epoch": 0.4087124716171895, "grad_norm": NaN, "learning_rate": 6.565517723232583e-05, "loss": 0.0, "step": 2430 }, { "epoch": 0.40888066604995377, "grad_norm": NaN, "learning_rate": 6.562965204480039e-05, "loss": 0.0, "step": 2431 }, { "epoch": 0.40904886048271805, "grad_norm": NaN, "learning_rate": 6.560412234204351e-05, "loss": 0.0, "step": 2432 }, { "epoch": 0.40921705491548227, "grad_norm": NaN, "learning_rate": 6.557858813143048e-05, "loss": 0.0, "step": 2433 }, { "epoch": 0.40938524934824655, "grad_norm": NaN, "learning_rate": 6.55530494203378e-05, "loss": 0.0, "step": 2434 }, { "epoch": 0.40955344378101083, "grad_norm": NaN, "learning_rate": 6.552750621614337e-05, "loss": 0.0, "step": 2435 }, { "epoch": 0.4097216382137751, "grad_norm": NaN, "learning_rate": 6.550195852622631e-05, "loss": 0.0, "step": 2436 }, { "epoch": 0.4098898326465394, "grad_norm": NaN, "learning_rate": 6.547640635796707e-05, "loss": 0.0, "step": 2437 }, { "epoch": 0.41005802707930367, "grad_norm": NaN, "learning_rate": 6.545084971874738e-05, "loss": 0.0, "step": 2438 }, { "epoch": 0.41022622151206795, "grad_norm": NaN, "learning_rate": 6.542528861595025e-05, "loss": 0.0, "step": 2439 }, { "epoch": 0.4103944159448322, "grad_norm": NaN, "learning_rate": 6.539972305696002e-05, "loss": 0.0, "step": 2440 }, { "epoch": 0.4105626103775965, "grad_norm": NaN, "learning_rate": 6.537415304916231e-05, "loss": 0.0, "step": 2441 }, { "epoch": 0.4107308048103608, "grad_norm": NaN, "learning_rate": 6.534857859994394e-05, "loss": 0.0, "step": 2442 }, { "epoch": 0.41089899924312506, "grad_norm": NaN, "learning_rate": 6.532299971669316e-05, "loss": 0.0, "step": 2443 }, { "epoch": 0.41106719367588934, "grad_norm": NaN, "learning_rate": 6.529741640679936e-05, "loss": 0.0, "step": 2444 }, { "epoch": 0.4112353881086536, "grad_norm": NaN, "learning_rate": 6.527182867765332e-05, "loss": 0.0, "step": 2445 }, { "epoch": 0.4114035825414179, "grad_norm": NaN, "learning_rate": 6.524623653664703e-05, "loss": 0.0, "step": 2446 }, { "epoch": 0.4115717769741822, "grad_norm": NaN, "learning_rate": 6.522063999117379e-05, "loss": 0.0, "step": 2447 }, { "epoch": 0.4117399714069464, "grad_norm": NaN, "learning_rate": 6.51950390486281e-05, "loss": 0.0, "step": 2448 }, { "epoch": 0.4119081658397107, "grad_norm": NaN, "learning_rate": 6.516943371640583e-05, "loss": 0.0, "step": 2449 }, { "epoch": 0.41207636027247496, "grad_norm": NaN, "learning_rate": 6.514382400190409e-05, "loss": 0.0, "step": 2450 }, { "epoch": 0.41224455470523924, "grad_norm": NaN, "learning_rate": 6.51182099125212e-05, "loss": 0.0, "step": 2451 }, { "epoch": 0.4124127491380035, "grad_norm": NaN, "learning_rate": 6.509259145565681e-05, "loss": 0.0, "step": 2452 }, { "epoch": 0.4125809435707678, "grad_norm": NaN, "learning_rate": 6.506696863871178e-05, "loss": 0.0, "step": 2453 }, { "epoch": 0.4127491380035321, "grad_norm": NaN, "learning_rate": 6.504134146908828e-05, "loss": 0.0, "step": 2454 }, { "epoch": 0.41291733243629636, "grad_norm": NaN, "learning_rate": 6.501570995418969e-05, "loss": 0.0, "step": 2455 }, { "epoch": 0.41308552686906064, "grad_norm": NaN, "learning_rate": 6.499007410142069e-05, "loss": 0.0, "step": 2456 }, { "epoch": 0.4132537213018249, "grad_norm": NaN, "learning_rate": 6.496443391818719e-05, "loss": 0.0, "step": 2457 }, { "epoch": 0.4134219157345892, "grad_norm": NaN, "learning_rate": 6.493878941189633e-05, "loss": 0.0, "step": 2458 }, { "epoch": 0.4135901101673535, "grad_norm": NaN, "learning_rate": 6.491314058995654e-05, "loss": 0.0, "step": 2459 }, { "epoch": 0.41375830460011775, "grad_norm": NaN, "learning_rate": 6.488748745977746e-05, "loss": 0.0, "step": 2460 }, { "epoch": 0.41392649903288203, "grad_norm": NaN, "learning_rate": 6.486183002877001e-05, "loss": 0.0, "step": 2461 }, { "epoch": 0.4140946934656463, "grad_norm": NaN, "learning_rate": 6.483616830434632e-05, "loss": 0.0, "step": 2462 }, { "epoch": 0.41426288789841054, "grad_norm": NaN, "learning_rate": 6.481050229391978e-05, "loss": 0.0, "step": 2463 }, { "epoch": 0.4144310823311748, "grad_norm": NaN, "learning_rate": 6.478483200490503e-05, "loss": 0.0, "step": 2464 }, { "epoch": 0.4145992767639391, "grad_norm": NaN, "learning_rate": 6.475915744471792e-05, "loss": 0.0, "step": 2465 }, { "epoch": 0.4147674711967034, "grad_norm": NaN, "learning_rate": 6.473347862077552e-05, "loss": 0.0, "step": 2466 }, { "epoch": 0.41493566562946765, "grad_norm": NaN, "learning_rate": 6.470779554049615e-05, "loss": 0.0, "step": 2467 }, { "epoch": 0.41510386006223193, "grad_norm": NaN, "learning_rate": 6.468210821129942e-05, "loss": 0.0, "step": 2468 }, { "epoch": 0.4152720544949962, "grad_norm": NaN, "learning_rate": 6.465641664060605e-05, "loss": 0.0, "step": 2469 }, { "epoch": 0.4154402489277605, "grad_norm": NaN, "learning_rate": 6.46307208358381e-05, "loss": 0.0, "step": 2470 }, { "epoch": 0.41560844336052477, "grad_norm": NaN, "learning_rate": 6.460502080441874e-05, "loss": 0.0, "step": 2471 }, { "epoch": 0.41577663779328905, "grad_norm": NaN, "learning_rate": 6.45793165537725e-05, "loss": 0.0, "step": 2472 }, { "epoch": 0.41594483222605333, "grad_norm": NaN, "learning_rate": 6.455360809132496e-05, "loss": 0.0, "step": 2473 }, { "epoch": 0.4161130266588176, "grad_norm": NaN, "learning_rate": 6.452789542450309e-05, "loss": 0.0, "step": 2474 }, { "epoch": 0.4162812210915819, "grad_norm": NaN, "learning_rate": 6.450217856073494e-05, "loss": 0.0, "step": 2475 }, { "epoch": 0.41644941552434617, "grad_norm": NaN, "learning_rate": 6.447645750744984e-05, "loss": 0.0, "step": 2476 }, { "epoch": 0.41661760995711045, "grad_norm": NaN, "learning_rate": 6.44507322720783e-05, "loss": 0.0, "step": 2477 }, { "epoch": 0.41678580438987467, "grad_norm": NaN, "learning_rate": 6.442500286205207e-05, "loss": 0.0, "step": 2478 }, { "epoch": 0.41695399882263895, "grad_norm": NaN, "learning_rate": 6.439926928480408e-05, "loss": 0.0, "step": 2479 }, { "epoch": 0.41712219325540323, "grad_norm": NaN, "learning_rate": 6.437353154776849e-05, "loss": 0.0, "step": 2480 }, { "epoch": 0.4172903876881675, "grad_norm": NaN, "learning_rate": 6.43477896583806e-05, "loss": 0.0, "step": 2481 }, { "epoch": 0.4174585821209318, "grad_norm": NaN, "learning_rate": 6.4322043624077e-05, "loss": 0.0, "step": 2482 }, { "epoch": 0.41762677655369607, "grad_norm": NaN, "learning_rate": 6.42962934522954e-05, "loss": 0.0, "step": 2483 }, { "epoch": 0.41779497098646035, "grad_norm": NaN, "learning_rate": 6.427053915047477e-05, "loss": 0.0, "step": 2484 }, { "epoch": 0.4179631654192246, "grad_norm": NaN, "learning_rate": 6.424478072605522e-05, "loss": 0.0, "step": 2485 }, { "epoch": 0.4181313598519889, "grad_norm": NaN, "learning_rate": 6.421901818647807e-05, "loss": 0.0, "step": 2486 }, { "epoch": 0.4182995542847532, "grad_norm": NaN, "learning_rate": 6.41932515391858e-05, "loss": 0.0, "step": 2487 }, { "epoch": 0.41846774871751746, "grad_norm": NaN, "learning_rate": 6.416748079162216e-05, "loss": 0.0, "step": 2488 }, { "epoch": 0.41863594315028174, "grad_norm": NaN, "learning_rate": 6.414170595123198e-05, "loss": 0.0, "step": 2489 }, { "epoch": 0.418804137583046, "grad_norm": NaN, "learning_rate": 6.411592702546136e-05, "loss": 0.0, "step": 2490 }, { "epoch": 0.4189723320158103, "grad_norm": NaN, "learning_rate": 6.409014402175754e-05, "loss": 0.0, "step": 2491 }, { "epoch": 0.4191405264485746, "grad_norm": NaN, "learning_rate": 6.406435694756892e-05, "loss": 0.0, "step": 2492 }, { "epoch": 0.4193087208813388, "grad_norm": NaN, "learning_rate": 6.403856581034511e-05, "loss": 0.0, "step": 2493 }, { "epoch": 0.4194769153141031, "grad_norm": NaN, "learning_rate": 6.401277061753689e-05, "loss": 0.0, "step": 2494 }, { "epoch": 0.41964510974686736, "grad_norm": NaN, "learning_rate": 6.398697137659618e-05, "loss": 0.0, "step": 2495 }, { "epoch": 0.41981330417963164, "grad_norm": NaN, "learning_rate": 6.39611680949761e-05, "loss": 0.0, "step": 2496 }, { "epoch": 0.4199814986123959, "grad_norm": NaN, "learning_rate": 6.393536078013091e-05, "loss": 0.0, "step": 2497 }, { "epoch": 0.4201496930451602, "grad_norm": NaN, "learning_rate": 6.390954943951612e-05, "loss": 0.0, "step": 2498 }, { "epoch": 0.4203178874779245, "grad_norm": NaN, "learning_rate": 6.388373408058827e-05, "loss": 0.0, "step": 2499 }, { "epoch": 0.42048608191068876, "grad_norm": NaN, "learning_rate": 6.385791471080514e-05, "loss": 0.0, "step": 2500 }, { "epoch": 0.42065427634345304, "grad_norm": NaN, "learning_rate": 6.383209133762569e-05, "loss": 0.0, "step": 2501 }, { "epoch": 0.4208224707762173, "grad_norm": NaN, "learning_rate": 6.380626396850997e-05, "loss": 0.0, "step": 2502 }, { "epoch": 0.4209906652089816, "grad_norm": NaN, "learning_rate": 6.378043261091922e-05, "loss": 0.0, "step": 2503 }, { "epoch": 0.4211588596417459, "grad_norm": NaN, "learning_rate": 6.375459727231585e-05, "loss": 0.0, "step": 2504 }, { "epoch": 0.42132705407451015, "grad_norm": NaN, "learning_rate": 6.37287579601634e-05, "loss": 0.0, "step": 2505 }, { "epoch": 0.42149524850727443, "grad_norm": NaN, "learning_rate": 6.370291468192652e-05, "loss": 0.0, "step": 2506 }, { "epoch": 0.42166344294003866, "grad_norm": NaN, "learning_rate": 6.367706744507109e-05, "loss": 0.0, "step": 2507 }, { "epoch": 0.42183163737280294, "grad_norm": NaN, "learning_rate": 6.365121625706405e-05, "loss": 0.0, "step": 2508 }, { "epoch": 0.4219998318055672, "grad_norm": NaN, "learning_rate": 6.362536112537354e-05, "loss": 0.0, "step": 2509 }, { "epoch": 0.4221680262383315, "grad_norm": NaN, "learning_rate": 6.359950205746881e-05, "loss": 0.0, "step": 2510 }, { "epoch": 0.4223362206710958, "grad_norm": NaN, "learning_rate": 6.357363906082028e-05, "loss": 0.0, "step": 2511 }, { "epoch": 0.42250441510386005, "grad_norm": NaN, "learning_rate": 6.354777214289944e-05, "loss": 0.0, "step": 2512 }, { "epoch": 0.42267260953662433, "grad_norm": NaN, "learning_rate": 6.352190131117899e-05, "loss": 0.0, "step": 2513 }, { "epoch": 0.4228408039693886, "grad_norm": NaN, "learning_rate": 6.349602657313268e-05, "loss": 0.0, "step": 2514 }, { "epoch": 0.4230089984021529, "grad_norm": NaN, "learning_rate": 6.347014793623547e-05, "loss": 0.0, "step": 2515 }, { "epoch": 0.42317719283491717, "grad_norm": NaN, "learning_rate": 6.34442654079634e-05, "loss": 0.0, "step": 2516 }, { "epoch": 0.42334538726768145, "grad_norm": NaN, "learning_rate": 6.341837899579363e-05, "loss": 0.0, "step": 2517 }, { "epoch": 0.42351358170044573, "grad_norm": NaN, "learning_rate": 6.339248870720447e-05, "loss": 0.0, "step": 2518 }, { "epoch": 0.42368177613321, "grad_norm": NaN, "learning_rate": 6.336659454967532e-05, "loss": 0.0, "step": 2519 }, { "epoch": 0.4238499705659743, "grad_norm": NaN, "learning_rate": 6.334069653068671e-05, "loss": 0.0, "step": 2520 }, { "epoch": 0.42401816499873857, "grad_norm": NaN, "learning_rate": 6.331479465772032e-05, "loss": 0.0, "step": 2521 }, { "epoch": 0.4241863594315028, "grad_norm": NaN, "learning_rate": 6.328888893825888e-05, "loss": 0.0, "step": 2522 }, { "epoch": 0.42435455386426707, "grad_norm": NaN, "learning_rate": 6.326297937978627e-05, "loss": 0.0, "step": 2523 }, { "epoch": 0.42452274829703135, "grad_norm": NaN, "learning_rate": 6.323706598978746e-05, "loss": 0.0, "step": 2524 }, { "epoch": 0.42469094272979563, "grad_norm": NaN, "learning_rate": 6.321114877574856e-05, "loss": 0.0, "step": 2525 }, { "epoch": 0.4248591371625599, "grad_norm": NaN, "learning_rate": 6.318522774515674e-05, "loss": 0.0, "step": 2526 }, { "epoch": 0.4250273315953242, "grad_norm": NaN, "learning_rate": 6.315930290550032e-05, "loss": 0.0, "step": 2527 }, { "epoch": 0.42519552602808847, "grad_norm": NaN, "learning_rate": 6.313337426426867e-05, "loss": 0.0, "step": 2528 }, { "epoch": 0.42536372046085275, "grad_norm": NaN, "learning_rate": 6.310744182895231e-05, "loss": 0.0, "step": 2529 }, { "epoch": 0.425531914893617, "grad_norm": NaN, "learning_rate": 6.308150560704281e-05, "loss": 0.0, "step": 2530 }, { "epoch": 0.4257001093263813, "grad_norm": NaN, "learning_rate": 6.305556560603286e-05, "loss": 0.0, "step": 2531 }, { "epoch": 0.4258683037591456, "grad_norm": NaN, "learning_rate": 6.302962183341626e-05, "loss": 0.0, "step": 2532 }, { "epoch": 0.42603649819190986, "grad_norm": NaN, "learning_rate": 6.300367429668783e-05, "loss": 0.0, "step": 2533 }, { "epoch": 0.42620469262467414, "grad_norm": NaN, "learning_rate": 6.297772300334355e-05, "loss": 0.0, "step": 2534 }, { "epoch": 0.4263728870574384, "grad_norm": NaN, "learning_rate": 6.295176796088045e-05, "loss": 0.0, "step": 2535 }, { "epoch": 0.4265410814902027, "grad_norm": NaN, "learning_rate": 6.292580917679665e-05, "loss": 0.0, "step": 2536 }, { "epoch": 0.4267092759229669, "grad_norm": NaN, "learning_rate": 6.289984665859136e-05, "loss": 0.0, "step": 2537 }, { "epoch": 0.4268774703557312, "grad_norm": NaN, "learning_rate": 6.287388041376486e-05, "loss": 0.0, "step": 2538 }, { "epoch": 0.4270456647884955, "grad_norm": NaN, "learning_rate": 6.284791044981851e-05, "loss": 0.0, "step": 2539 }, { "epoch": 0.42721385922125976, "grad_norm": NaN, "learning_rate": 6.282193677425474e-05, "loss": 0.0, "step": 2540 }, { "epoch": 0.42738205365402404, "grad_norm": NaN, "learning_rate": 6.279595939457705e-05, "loss": 0.0, "step": 2541 }, { "epoch": 0.4275502480867883, "grad_norm": NaN, "learning_rate": 6.276997831829e-05, "loss": 0.0, "step": 2542 }, { "epoch": 0.4277184425195526, "grad_norm": NaN, "learning_rate": 6.274399355289923e-05, "loss": 0.0, "step": 2543 }, { "epoch": 0.4278866369523169, "grad_norm": NaN, "learning_rate": 6.271800510591148e-05, "loss": 0.0, "step": 2544 }, { "epoch": 0.42805483138508116, "grad_norm": NaN, "learning_rate": 6.269201298483451e-05, "loss": 0.0, "step": 2545 }, { "epoch": 0.42822302581784544, "grad_norm": NaN, "learning_rate": 6.266601719717715e-05, "loss": 0.0, "step": 2546 }, { "epoch": 0.4283912202506097, "grad_norm": NaN, "learning_rate": 6.264001775044929e-05, "loss": 0.0, "step": 2547 }, { "epoch": 0.428559414683374, "grad_norm": NaN, "learning_rate": 6.261401465216188e-05, "loss": 0.0, "step": 2548 }, { "epoch": 0.4287276091161383, "grad_norm": NaN, "learning_rate": 6.25880079098269e-05, "loss": 0.0, "step": 2549 }, { "epoch": 0.42889580354890255, "grad_norm": NaN, "learning_rate": 6.256199753095745e-05, "loss": 0.0, "step": 2550 }, { "epoch": 0.42906399798166683, "grad_norm": NaN, "learning_rate": 6.25359835230676e-05, "loss": 0.0, "step": 2551 }, { "epoch": 0.42923219241443106, "grad_norm": NaN, "learning_rate": 6.250996589367255e-05, "loss": 0.0, "step": 2552 }, { "epoch": 0.42940038684719534, "grad_norm": NaN, "learning_rate": 6.248394465028844e-05, "loss": 0.0, "step": 2553 }, { "epoch": 0.4295685812799596, "grad_norm": NaN, "learning_rate": 6.245791980043257e-05, "loss": 0.0, "step": 2554 }, { "epoch": 0.4297367757127239, "grad_norm": NaN, "learning_rate": 6.24318913516232e-05, "loss": 0.0, "step": 2555 }, { "epoch": 0.4299049701454882, "grad_norm": NaN, "learning_rate": 6.240585931137966e-05, "loss": 0.0, "step": 2556 }, { "epoch": 0.43007316457825245, "grad_norm": NaN, "learning_rate": 6.237982368722232e-05, "loss": 0.0, "step": 2557 }, { "epoch": 0.43024135901101673, "grad_norm": NaN, "learning_rate": 6.235378448667257e-05, "loss": 0.0, "step": 2558 }, { "epoch": 0.430409553443781, "grad_norm": NaN, "learning_rate": 6.232774171725287e-05, "loss": 0.0, "step": 2559 }, { "epoch": 0.4305777478765453, "grad_norm": NaN, "learning_rate": 6.230169538648667e-05, "loss": 0.0, "step": 2560 }, { "epoch": 0.43074594230930957, "grad_norm": NaN, "learning_rate": 6.227564550189844e-05, "loss": 0.0, "step": 2561 }, { "epoch": 0.43091413674207385, "grad_norm": NaN, "learning_rate": 6.224959207101372e-05, "loss": 0.0, "step": 2562 }, { "epoch": 0.43108233117483813, "grad_norm": NaN, "learning_rate": 6.222353510135906e-05, "loss": 0.0, "step": 2563 }, { "epoch": 0.4312505256076024, "grad_norm": NaN, "learning_rate": 6.219747460046203e-05, "loss": 0.0, "step": 2564 }, { "epoch": 0.4314187200403667, "grad_norm": NaN, "learning_rate": 6.217141057585119e-05, "loss": 0.0, "step": 2565 }, { "epoch": 0.43158691447313097, "grad_norm": NaN, "learning_rate": 6.214534303505617e-05, "loss": 0.0, "step": 2566 }, { "epoch": 0.4317551089058952, "grad_norm": NaN, "learning_rate": 6.211927198560759e-05, "loss": 0.0, "step": 2567 }, { "epoch": 0.43192330333865947, "grad_norm": NaN, "learning_rate": 6.209319743503706e-05, "loss": 0.0, "step": 2568 }, { "epoch": 0.43209149777142375, "grad_norm": NaN, "learning_rate": 6.206711939087727e-05, "loss": 0.0, "step": 2569 }, { "epoch": 0.43225969220418803, "grad_norm": NaN, "learning_rate": 6.204103786066183e-05, "loss": 0.0, "step": 2570 }, { "epoch": 0.4324278866369523, "grad_norm": NaN, "learning_rate": 6.201495285192542e-05, "loss": 0.0, "step": 2571 }, { "epoch": 0.4325960810697166, "grad_norm": NaN, "learning_rate": 6.19888643722037e-05, "loss": 0.0, "step": 2572 }, { "epoch": 0.43276427550248087, "grad_norm": NaN, "learning_rate": 6.196277242903336e-05, "loss": 0.0, "step": 2573 }, { "epoch": 0.43293246993524515, "grad_norm": NaN, "learning_rate": 6.193667702995205e-05, "loss": 0.0, "step": 2574 }, { "epoch": 0.4331006643680094, "grad_norm": NaN, "learning_rate": 6.191057818249844e-05, "loss": 0.0, "step": 2575 }, { "epoch": 0.4332688588007737, "grad_norm": NaN, "learning_rate": 6.18844758942122e-05, "loss": 0.0, "step": 2576 }, { "epoch": 0.433437053233538, "grad_norm": NaN, "learning_rate": 6.185837017263399e-05, "loss": 0.0, "step": 2577 }, { "epoch": 0.43360524766630226, "grad_norm": NaN, "learning_rate": 6.183226102530547e-05, "loss": 0.0, "step": 2578 }, { "epoch": 0.43377344209906654, "grad_norm": NaN, "learning_rate": 6.180614845976926e-05, "loss": 0.0, "step": 2579 }, { "epoch": 0.4339416365318308, "grad_norm": NaN, "learning_rate": 6.178003248356898e-05, "loss": 0.0, "step": 2580 }, { "epoch": 0.43410983096459504, "grad_norm": NaN, "learning_rate": 6.175391310424928e-05, "loss": 0.0, "step": 2581 }, { "epoch": 0.4342780253973593, "grad_norm": NaN, "learning_rate": 6.172779032935573e-05, "loss": 0.0, "step": 2582 }, { "epoch": 0.4344462198301236, "grad_norm": NaN, "learning_rate": 6.17016641664349e-05, "loss": 0.0, "step": 2583 }, { "epoch": 0.4346144142628879, "grad_norm": NaN, "learning_rate": 6.167553462303438e-05, "loss": 0.0, "step": 2584 }, { "epoch": 0.43478260869565216, "grad_norm": NaN, "learning_rate": 6.164940170670266e-05, "loss": 0.0, "step": 2585 }, { "epoch": 0.43495080312841644, "grad_norm": NaN, "learning_rate": 6.162326542498928e-05, "loss": 0.0, "step": 2586 }, { "epoch": 0.4351189975611807, "grad_norm": NaN, "learning_rate": 6.159712578544472e-05, "loss": 0.0, "step": 2587 }, { "epoch": 0.435287191993945, "grad_norm": NaN, "learning_rate": 6.157098279562041e-05, "loss": 0.0, "step": 2588 }, { "epoch": 0.4354553864267093, "grad_norm": NaN, "learning_rate": 6.154483646306875e-05, "loss": 0.0, "step": 2589 }, { "epoch": 0.43562358085947356, "grad_norm": NaN, "learning_rate": 6.151868679534316e-05, "loss": 0.0, "step": 2590 }, { "epoch": 0.43579177529223784, "grad_norm": NaN, "learning_rate": 6.149253379999796e-05, "loss": 0.0, "step": 2591 }, { "epoch": 0.4359599697250021, "grad_norm": NaN, "learning_rate": 6.146637748458849e-05, "loss": 0.0, "step": 2592 }, { "epoch": 0.4361281641577664, "grad_norm": NaN, "learning_rate": 6.144021785667098e-05, "loss": 0.0, "step": 2593 }, { "epoch": 0.4362963585905307, "grad_norm": NaN, "learning_rate": 6.141405492380268e-05, "loss": 0.0, "step": 2594 }, { "epoch": 0.43646455302329495, "grad_norm": NaN, "learning_rate": 6.138788869354176e-05, "loss": 0.0, "step": 2595 }, { "epoch": 0.4366327474560592, "grad_norm": NaN, "learning_rate": 6.136171917344733e-05, "loss": 0.0, "step": 2596 }, { "epoch": 0.43680094188882346, "grad_norm": NaN, "learning_rate": 6.13355463710795e-05, "loss": 0.0, "step": 2597 }, { "epoch": 0.43696913632158774, "grad_norm": NaN, "learning_rate": 6.13093702939993e-05, "loss": 0.0, "step": 2598 }, { "epoch": 0.437137330754352, "grad_norm": NaN, "learning_rate": 6.128319094976868e-05, "loss": 0.0, "step": 2599 }, { "epoch": 0.4373055251871163, "grad_norm": NaN, "learning_rate": 6.125700834595057e-05, "loss": 0.0, "step": 2600 }, { "epoch": 0.4374737196198806, "grad_norm": NaN, "learning_rate": 6.123082249010885e-05, "loss": 0.0, "step": 2601 }, { "epoch": 0.43764191405264485, "grad_norm": NaN, "learning_rate": 6.120463338980829e-05, "loss": 0.0, "step": 2602 }, { "epoch": 0.43781010848540913, "grad_norm": NaN, "learning_rate": 6.117844105261465e-05, "loss": 0.0, "step": 2603 }, { "epoch": 0.4379783029181734, "grad_norm": NaN, "learning_rate": 6.115224548609459e-05, "loss": 0.0, "step": 2604 }, { "epoch": 0.4381464973509377, "grad_norm": NaN, "learning_rate": 6.112604669781572e-05, "loss": 0.0, "step": 2605 }, { "epoch": 0.43831469178370197, "grad_norm": NaN, "learning_rate": 6.109984469534659e-05, "loss": 0.0, "step": 2606 }, { "epoch": 0.43848288621646625, "grad_norm": NaN, "learning_rate": 6.107363948625665e-05, "loss": 0.0, "step": 2607 }, { "epoch": 0.43865108064923053, "grad_norm": NaN, "learning_rate": 6.104743107811629e-05, "loss": 0.0, "step": 2608 }, { "epoch": 0.4388192750819948, "grad_norm": NaN, "learning_rate": 6.102121947849683e-05, "loss": 0.0, "step": 2609 }, { "epoch": 0.4389874695147591, "grad_norm": NaN, "learning_rate": 6.09950046949705e-05, "loss": 0.0, "step": 2610 }, { "epoch": 0.4391556639475233, "grad_norm": NaN, "learning_rate": 6.0968786735110486e-05, "loss": 0.0, "step": 2611 }, { "epoch": 0.4393238583802876, "grad_norm": NaN, "learning_rate": 6.094256560649081e-05, "loss": 0.0, "step": 2612 }, { "epoch": 0.43949205281305187, "grad_norm": NaN, "learning_rate": 6.091634131668652e-05, "loss": 0.0, "step": 2613 }, { "epoch": 0.43966024724581615, "grad_norm": NaN, "learning_rate": 6.089011387327348e-05, "loss": 0.0, "step": 2614 }, { "epoch": 0.43982844167858043, "grad_norm": NaN, "learning_rate": 6.086388328382853e-05, "loss": 0.0, "step": 2615 }, { "epoch": 0.4399966361113447, "grad_norm": NaN, "learning_rate": 6.0837649555929376e-05, "loss": 0.0, "step": 2616 }, { "epoch": 0.440164830544109, "grad_norm": NaN, "learning_rate": 6.081141269715466e-05, "loss": 0.0, "step": 2617 }, { "epoch": 0.44033302497687327, "grad_norm": NaN, "learning_rate": 6.0785172715083895e-05, "loss": 0.0, "step": 2618 }, { "epoch": 0.44050121940963755, "grad_norm": NaN, "learning_rate": 6.0758929617297545e-05, "loss": 0.0, "step": 2619 }, { "epoch": 0.4406694138424018, "grad_norm": NaN, "learning_rate": 6.0732683411376935e-05, "loss": 0.0, "step": 2620 }, { "epoch": 0.4408376082751661, "grad_norm": NaN, "learning_rate": 6.07064341049043e-05, "loss": 0.0, "step": 2621 }, { "epoch": 0.4410058027079304, "grad_norm": NaN, "learning_rate": 6.068018170546276e-05, "loss": 0.0, "step": 2622 }, { "epoch": 0.44117399714069466, "grad_norm": NaN, "learning_rate": 6.065392622063637e-05, "loss": 0.0, "step": 2623 }, { "epoch": 0.44134219157345894, "grad_norm": NaN, "learning_rate": 6.0627667658010015e-05, "loss": 0.0, "step": 2624 }, { "epoch": 0.4415103860062232, "grad_norm": NaN, "learning_rate": 6.060140602516952e-05, "loss": 0.0, "step": 2625 }, { "epoch": 0.44167858043898744, "grad_norm": NaN, "learning_rate": 6.0575141329701545e-05, "loss": 0.0, "step": 2626 }, { "epoch": 0.4418467748717517, "grad_norm": NaN, "learning_rate": 6.054887357919371e-05, "loss": 0.0, "step": 2627 }, { "epoch": 0.442014969304516, "grad_norm": NaN, "learning_rate": 6.0522602781234426e-05, "loss": 0.0, "step": 2628 }, { "epoch": 0.4421831637372803, "grad_norm": NaN, "learning_rate": 6.049632894341308e-05, "loss": 0.0, "step": 2629 }, { "epoch": 0.44235135817004456, "grad_norm": NaN, "learning_rate": 6.047005207331986e-05, "loss": 0.0, "step": 2630 }, { "epoch": 0.44251955260280884, "grad_norm": NaN, "learning_rate": 6.0443772178545874e-05, "loss": 0.0, "step": 2631 }, { "epoch": 0.4426877470355731, "grad_norm": NaN, "learning_rate": 6.041748926668308e-05, "loss": 0.0, "step": 2632 }, { "epoch": 0.4428559414683374, "grad_norm": NaN, "learning_rate": 6.039120334532432e-05, "loss": 0.0, "step": 2633 }, { "epoch": 0.4430241359011017, "grad_norm": NaN, "learning_rate": 6.0364914422063304e-05, "loss": 0.0, "step": 2634 }, { "epoch": 0.44319233033386596, "grad_norm": NaN, "learning_rate": 6.0338622504494604e-05, "loss": 0.0, "step": 2635 }, { "epoch": 0.44336052476663024, "grad_norm": NaN, "learning_rate": 6.031232760021366e-05, "loss": 0.0, "step": 2636 }, { "epoch": 0.4435287191993945, "grad_norm": NaN, "learning_rate": 6.0286029716816774e-05, "loss": 0.0, "step": 2637 }, { "epoch": 0.4436969136321588, "grad_norm": NaN, "learning_rate": 6.025972886190111e-05, "loss": 0.0, "step": 2638 }, { "epoch": 0.4438651080649231, "grad_norm": NaN, "learning_rate": 6.023342504306471e-05, "loss": 0.0, "step": 2639 }, { "epoch": 0.4440333024976873, "grad_norm": NaN, "learning_rate": 6.0207118267906415e-05, "loss": 0.0, "step": 2640 }, { "epoch": 0.4442014969304516, "grad_norm": NaN, "learning_rate": 6.018080854402599e-05, "loss": 0.0, "step": 2641 }, { "epoch": 0.44436969136321586, "grad_norm": NaN, "learning_rate": 6.0154495879024e-05, "loss": 0.0, "step": 2642 }, { "epoch": 0.44453788579598014, "grad_norm": NaN, "learning_rate": 6.012818028050189e-05, "loss": 0.0, "step": 2643 }, { "epoch": 0.4447060802287444, "grad_norm": NaN, "learning_rate": 6.010186175606195e-05, "loss": 0.0, "step": 2644 }, { "epoch": 0.4448742746615087, "grad_norm": NaN, "learning_rate": 6.0075540313307296e-05, "loss": 0.0, "step": 2645 }, { "epoch": 0.445042469094273, "grad_norm": NaN, "learning_rate": 6.004921595984189e-05, "loss": 0.0, "step": 2646 }, { "epoch": 0.44521066352703725, "grad_norm": NaN, "learning_rate": 6.002288870327055e-05, "loss": 0.0, "step": 2647 }, { "epoch": 0.44537885795980153, "grad_norm": NaN, "learning_rate": 5.999655855119893e-05, "loss": 0.0, "step": 2648 }, { "epoch": 0.4455470523925658, "grad_norm": NaN, "learning_rate": 5.9970225511233504e-05, "loss": 0.0, "step": 2649 }, { "epoch": 0.4457152468253301, "grad_norm": NaN, "learning_rate": 5.994388959098162e-05, "loss": 0.0, "step": 2650 }, { "epoch": 0.44588344125809437, "grad_norm": NaN, "learning_rate": 5.9917550798051384e-05, "loss": 0.0, "step": 2651 }, { "epoch": 0.44605163569085865, "grad_norm": NaN, "learning_rate": 5.989120914005183e-05, "loss": 0.0, "step": 2652 }, { "epoch": 0.44621983012362293, "grad_norm": NaN, "learning_rate": 5.986486462459273e-05, "loss": 0.0, "step": 2653 }, { "epoch": 0.4463880245563872, "grad_norm": NaN, "learning_rate": 5.983851725928474e-05, "loss": 0.0, "step": 2654 }, { "epoch": 0.44655621898915143, "grad_norm": NaN, "learning_rate": 5.98121670517393e-05, "loss": 0.0, "step": 2655 }, { "epoch": 0.4467244134219157, "grad_norm": NaN, "learning_rate": 5.978581400956872e-05, "loss": 0.0, "step": 2656 }, { "epoch": 0.44689260785468, "grad_norm": NaN, "learning_rate": 5.975945814038606e-05, "loss": 0.0, "step": 2657 }, { "epoch": 0.44706080228744427, "grad_norm": NaN, "learning_rate": 5.973309945180526e-05, "loss": 0.0, "step": 2658 }, { "epoch": 0.44722899672020855, "grad_norm": NaN, "learning_rate": 5.970673795144105e-05, "loss": 0.0, "step": 2659 }, { "epoch": 0.44739719115297283, "grad_norm": NaN, "learning_rate": 5.968037364690897e-05, "loss": 0.0, "step": 2660 }, { "epoch": 0.4475653855857371, "grad_norm": NaN, "learning_rate": 5.965400654582536e-05, "loss": 0.0, "step": 2661 }, { "epoch": 0.4477335800185014, "grad_norm": NaN, "learning_rate": 5.962763665580741e-05, "loss": 0.0, "step": 2662 }, { "epoch": 0.44790177445126567, "grad_norm": NaN, "learning_rate": 5.960126398447304e-05, "loss": 0.0, "step": 2663 }, { "epoch": 0.44806996888402995, "grad_norm": NaN, "learning_rate": 5.957488853944106e-05, "loss": 0.0, "step": 2664 }, { "epoch": 0.4482381633167942, "grad_norm": NaN, "learning_rate": 5.954851032833103e-05, "loss": 0.0, "step": 2665 }, { "epoch": 0.4484063577495585, "grad_norm": NaN, "learning_rate": 5.9522129358763315e-05, "loss": 0.0, "step": 2666 }, { "epoch": 0.4485745521823228, "grad_norm": NaN, "learning_rate": 5.9495745638359066e-05, "loss": 0.0, "step": 2667 }, { "epoch": 0.44874274661508706, "grad_norm": NaN, "learning_rate": 5.946935917474028e-05, "loss": 0.0, "step": 2668 }, { "epoch": 0.44891094104785134, "grad_norm": NaN, "learning_rate": 5.944296997552967e-05, "loss": 0.0, "step": 2669 }, { "epoch": 0.44907913548061557, "grad_norm": NaN, "learning_rate": 5.941657804835081e-05, "loss": 0.0, "step": 2670 }, { "epoch": 0.44924732991337984, "grad_norm": NaN, "learning_rate": 5.939018340082804e-05, "loss": 0.0, "step": 2671 }, { "epoch": 0.4494155243461441, "grad_norm": NaN, "learning_rate": 5.9363786040586455e-05, "loss": 0.0, "step": 2672 }, { "epoch": 0.4495837187789084, "grad_norm": NaN, "learning_rate": 5.933738597525195e-05, "loss": 0.0, "step": 2673 }, { "epoch": 0.4497519132116727, "grad_norm": NaN, "learning_rate": 5.9310983212451234e-05, "loss": 0.0, "step": 2674 }, { "epoch": 0.44992010764443696, "grad_norm": NaN, "learning_rate": 5.9284577759811744e-05, "loss": 0.0, "step": 2675 }, { "epoch": 0.45008830207720124, "grad_norm": NaN, "learning_rate": 5.925816962496175e-05, "loss": 0.0, "step": 2676 }, { "epoch": 0.4502564965099655, "grad_norm": NaN, "learning_rate": 5.923175881553022e-05, "loss": 0.0, "step": 2677 }, { "epoch": 0.4504246909427298, "grad_norm": NaN, "learning_rate": 5.9205345339147e-05, "loss": 0.0, "step": 2678 }, { "epoch": 0.4505928853754941, "grad_norm": NaN, "learning_rate": 5.91789292034426e-05, "loss": 0.0, "step": 2679 }, { "epoch": 0.45076107980825836, "grad_norm": NaN, "learning_rate": 5.915251041604837e-05, "loss": 0.0, "step": 2680 }, { "epoch": 0.45092927424102264, "grad_norm": NaN, "learning_rate": 5.9126088984596394e-05, "loss": 0.0, "step": 2681 }, { "epoch": 0.4510974686737869, "grad_norm": NaN, "learning_rate": 5.9099664916719535e-05, "loss": 0.0, "step": 2682 }, { "epoch": 0.4512656631065512, "grad_norm": NaN, "learning_rate": 5.9073238220051394e-05, "loss": 0.0, "step": 2683 }, { "epoch": 0.4514338575393155, "grad_norm": NaN, "learning_rate": 5.904680890222636e-05, "loss": 0.0, "step": 2684 }, { "epoch": 0.4516020519720797, "grad_norm": NaN, "learning_rate": 5.902037697087957e-05, "loss": 0.0, "step": 2685 }, { "epoch": 0.451770246404844, "grad_norm": NaN, "learning_rate": 5.89939424336469e-05, "loss": 0.0, "step": 2686 }, { "epoch": 0.45193844083760826, "grad_norm": NaN, "learning_rate": 5.896750529816499e-05, "loss": 0.0, "step": 2687 }, { "epoch": 0.45210663527037254, "grad_norm": NaN, "learning_rate": 5.8941065572071255e-05, "loss": 0.0, "step": 2688 }, { "epoch": 0.4522748297031368, "grad_norm": NaN, "learning_rate": 5.891462326300381e-05, "loss": 0.0, "step": 2689 }, { "epoch": 0.4524430241359011, "grad_norm": NaN, "learning_rate": 5.8888178378601565e-05, "loss": 0.0, "step": 2690 }, { "epoch": 0.4526112185686654, "grad_norm": NaN, "learning_rate": 5.886173092650414e-05, "loss": 0.0, "step": 2691 }, { "epoch": 0.45277941300142965, "grad_norm": NaN, "learning_rate": 5.883528091435191e-05, "loss": 0.0, "step": 2692 }, { "epoch": 0.45294760743419393, "grad_norm": NaN, "learning_rate": 5.880882834978597e-05, "loss": 0.0, "step": 2693 }, { "epoch": 0.4531158018669582, "grad_norm": NaN, "learning_rate": 5.87823732404482e-05, "loss": 0.0, "step": 2694 }, { "epoch": 0.4532839962997225, "grad_norm": NaN, "learning_rate": 5.875591559398116e-05, "loss": 0.0, "step": 2695 }, { "epoch": 0.45345219073248677, "grad_norm": NaN, "learning_rate": 5.872945541802818e-05, "loss": 0.0, "step": 2696 }, { "epoch": 0.45362038516525105, "grad_norm": NaN, "learning_rate": 5.8702992720233296e-05, "loss": 0.0, "step": 2697 }, { "epoch": 0.45378857959801533, "grad_norm": NaN, "learning_rate": 5.867652750824131e-05, "loss": 0.0, "step": 2698 }, { "epoch": 0.4539567740307796, "grad_norm": NaN, "learning_rate": 5.865005978969771e-05, "loss": 0.0, "step": 2699 }, { "epoch": 0.45412496846354383, "grad_norm": NaN, "learning_rate": 5.862358957224872e-05, "loss": 0.0, "step": 2700 }, { "epoch": 0.4542931628963081, "grad_norm": NaN, "learning_rate": 5.859711686354129e-05, "loss": 0.0, "step": 2701 }, { "epoch": 0.4544613573290724, "grad_norm": NaN, "learning_rate": 5.8570641671223084e-05, "loss": 0.0, "step": 2702 }, { "epoch": 0.45462955176183667, "grad_norm": NaN, "learning_rate": 5.85441640029425e-05, "loss": 0.0, "step": 2703 }, { "epoch": 0.45479774619460095, "grad_norm": NaN, "learning_rate": 5.851768386634863e-05, "loss": 0.0, "step": 2704 }, { "epoch": 0.45496594062736523, "grad_norm": NaN, "learning_rate": 5.84912012690913e-05, "loss": 0.0, "step": 2705 }, { "epoch": 0.4551341350601295, "grad_norm": NaN, "learning_rate": 5.846471621882103e-05, "loss": 0.0, "step": 2706 }, { "epoch": 0.4553023294928938, "grad_norm": NaN, "learning_rate": 5.843822872318906e-05, "loss": 0.0, "step": 2707 }, { "epoch": 0.45547052392565807, "grad_norm": NaN, "learning_rate": 5.841173878984731e-05, "loss": 0.0, "step": 2708 }, { "epoch": 0.45563871835842235, "grad_norm": NaN, "learning_rate": 5.838524642644846e-05, "loss": 0.0, "step": 2709 }, { "epoch": 0.4558069127911866, "grad_norm": NaN, "learning_rate": 5.8358751640645835e-05, "loss": 0.0, "step": 2710 }, { "epoch": 0.4559751072239509, "grad_norm": NaN, "learning_rate": 5.833225444009348e-05, "loss": 0.0, "step": 2711 }, { "epoch": 0.4561433016567152, "grad_norm": NaN, "learning_rate": 5.830575483244613e-05, "loss": 0.0, "step": 2712 }, { "epoch": 0.45631149608947946, "grad_norm": NaN, "learning_rate": 5.827925282535926e-05, "loss": 0.0, "step": 2713 }, { "epoch": 0.4564796905222437, "grad_norm": NaN, "learning_rate": 5.8252748426488966e-05, "loss": 0.0, "step": 2714 }, { "epoch": 0.45664788495500797, "grad_norm": NaN, "learning_rate": 5.8226241643492096e-05, "loss": 0.0, "step": 2715 }, { "epoch": 0.45681607938777224, "grad_norm": NaN, "learning_rate": 5.819973248402615e-05, "loss": 0.0, "step": 2716 }, { "epoch": 0.4569842738205365, "grad_norm": NaN, "learning_rate": 5.8173220955749343e-05, "loss": 0.0, "step": 2717 }, { "epoch": 0.4571524682533008, "grad_norm": NaN, "learning_rate": 5.814670706632054e-05, "loss": 0.0, "step": 2718 }, { "epoch": 0.4573206626860651, "grad_norm": NaN, "learning_rate": 5.8120190823399324e-05, "loss": 0.0, "step": 2719 }, { "epoch": 0.45748885711882936, "grad_norm": NaN, "learning_rate": 5.80936722346459e-05, "loss": 0.0, "step": 2720 }, { "epoch": 0.45765705155159364, "grad_norm": NaN, "learning_rate": 5.806715130772125e-05, "loss": 0.0, "step": 2721 }, { "epoch": 0.4578252459843579, "grad_norm": NaN, "learning_rate": 5.804062805028693e-05, "loss": 0.0, "step": 2722 }, { "epoch": 0.4579934404171222, "grad_norm": NaN, "learning_rate": 5.8014102470005236e-05, "loss": 0.0, "step": 2723 }, { "epoch": 0.4581616348498865, "grad_norm": NaN, "learning_rate": 5.7987574574539096e-05, "loss": 0.0, "step": 2724 }, { "epoch": 0.45832982928265076, "grad_norm": NaN, "learning_rate": 5.796104437155213e-05, "loss": 0.0, "step": 2725 }, { "epoch": 0.45849802371541504, "grad_norm": NaN, "learning_rate": 5.79345118687086e-05, "loss": 0.0, "step": 2726 }, { "epoch": 0.4586662181481793, "grad_norm": NaN, "learning_rate": 5.790797707367348e-05, "loss": 0.0, "step": 2727 }, { "epoch": 0.4588344125809436, "grad_norm": NaN, "learning_rate": 5.788143999411236e-05, "loss": 0.0, "step": 2728 }, { "epoch": 0.4590026070137078, "grad_norm": NaN, "learning_rate": 5.78549006376915e-05, "loss": 0.0, "step": 2729 }, { "epoch": 0.4591708014464721, "grad_norm": NaN, "learning_rate": 5.7828359012077814e-05, "loss": 0.0, "step": 2730 }, { "epoch": 0.4593389958792364, "grad_norm": NaN, "learning_rate": 5.7801815124938905e-05, "loss": 0.0, "step": 2731 }, { "epoch": 0.45950719031200066, "grad_norm": NaN, "learning_rate": 5.777526898394298e-05, "loss": 0.0, "step": 2732 }, { "epoch": 0.45967538474476494, "grad_norm": NaN, "learning_rate": 5.774872059675894e-05, "loss": 0.0, "step": 2733 }, { "epoch": 0.4598435791775292, "grad_norm": NaN, "learning_rate": 5.772216997105631e-05, "loss": 0.0, "step": 2734 }, { "epoch": 0.4600117736102935, "grad_norm": NaN, "learning_rate": 5.769561711450527e-05, "loss": 0.0, "step": 2735 }, { "epoch": 0.4601799680430578, "grad_norm": NaN, "learning_rate": 5.7669062034776634e-05, "loss": 0.0, "step": 2736 }, { "epoch": 0.46034816247582205, "grad_norm": NaN, "learning_rate": 5.764250473954189e-05, "loss": 0.0, "step": 2737 }, { "epoch": 0.46051635690858633, "grad_norm": NaN, "learning_rate": 5.761594523647312e-05, "loss": 0.0, "step": 2738 }, { "epoch": 0.4606845513413506, "grad_norm": NaN, "learning_rate": 5.758938353324308e-05, "loss": 0.0, "step": 2739 }, { "epoch": 0.4608527457741149, "grad_norm": NaN, "learning_rate": 5.7562819637525124e-05, "loss": 0.0, "step": 2740 }, { "epoch": 0.46102094020687917, "grad_norm": NaN, "learning_rate": 5.7536253556993306e-05, "loss": 0.0, "step": 2741 }, { "epoch": 0.46118913463964345, "grad_norm": NaN, "learning_rate": 5.750968529932223e-05, "loss": 0.0, "step": 2742 }, { "epoch": 0.46135732907240773, "grad_norm": NaN, "learning_rate": 5.7483114872187193e-05, "loss": 0.0, "step": 2743 }, { "epoch": 0.46152552350517195, "grad_norm": NaN, "learning_rate": 5.745654228326407e-05, "loss": 0.0, "step": 2744 }, { "epoch": 0.46169371793793623, "grad_norm": NaN, "learning_rate": 5.742996754022942e-05, "loss": 0.0, "step": 2745 }, { "epoch": 0.4618619123707005, "grad_norm": NaN, "learning_rate": 5.7403390650760356e-05, "loss": 0.0, "step": 2746 }, { "epoch": 0.4620301068034648, "grad_norm": NaN, "learning_rate": 5.737681162253468e-05, "loss": 0.0, "step": 2747 }, { "epoch": 0.46219830123622907, "grad_norm": NaN, "learning_rate": 5.7350230463230725e-05, "loss": 0.0, "step": 2748 }, { "epoch": 0.46236649566899335, "grad_norm": NaN, "learning_rate": 5.732364718052752e-05, "loss": 0.0, "step": 2749 }, { "epoch": 0.46253469010175763, "grad_norm": NaN, "learning_rate": 5.729706178210468e-05, "loss": 0.0, "step": 2750 }, { "epoch": 0.4627028845345219, "grad_norm": NaN, "learning_rate": 5.7270474275642406e-05, "loss": 0.0, "step": 2751 }, { "epoch": 0.4628710789672862, "grad_norm": NaN, "learning_rate": 5.724388466882157e-05, "loss": 0.0, "step": 2752 }, { "epoch": 0.46303927340005047, "grad_norm": NaN, "learning_rate": 5.7217292969323576e-05, "loss": 0.0, "step": 2753 }, { "epoch": 0.46320746783281475, "grad_norm": NaN, "learning_rate": 5.719069918483049e-05, "loss": 0.0, "step": 2754 }, { "epoch": 0.463375662265579, "grad_norm": NaN, "learning_rate": 5.716410332302493e-05, "loss": 0.0, "step": 2755 }, { "epoch": 0.4635438566983433, "grad_norm": NaN, "learning_rate": 5.713750539159018e-05, "loss": 0.0, "step": 2756 }, { "epoch": 0.4637120511311076, "grad_norm": NaN, "learning_rate": 5.711090539821008e-05, "loss": 0.0, "step": 2757 }, { "epoch": 0.46388024556387186, "grad_norm": NaN, "learning_rate": 5.708430335056905e-05, "loss": 0.0, "step": 2758 }, { "epoch": 0.4640484399966361, "grad_norm": NaN, "learning_rate": 5.7057699256352114e-05, "loss": 0.0, "step": 2759 }, { "epoch": 0.46421663442940037, "grad_norm": NaN, "learning_rate": 5.7031093123244925e-05, "loss": 0.0, "step": 2760 }, { "epoch": 0.46438482886216464, "grad_norm": NaN, "learning_rate": 5.700448495893368e-05, "loss": 0.0, "step": 2761 }, { "epoch": 0.4645530232949289, "grad_norm": NaN, "learning_rate": 5.697787477110519e-05, "loss": 0.0, "step": 2762 }, { "epoch": 0.4647212177276932, "grad_norm": NaN, "learning_rate": 5.695126256744683e-05, "loss": 0.0, "step": 2763 }, { "epoch": 0.4648894121604575, "grad_norm": NaN, "learning_rate": 5.692464835564658e-05, "loss": 0.0, "step": 2764 }, { "epoch": 0.46505760659322176, "grad_norm": NaN, "learning_rate": 5.689803214339298e-05, "loss": 0.0, "step": 2765 }, { "epoch": 0.46522580102598604, "grad_norm": NaN, "learning_rate": 5.687141393837516e-05, "loss": 0.0, "step": 2766 }, { "epoch": 0.4653939954587503, "grad_norm": NaN, "learning_rate": 5.68447937482828e-05, "loss": 0.0, "step": 2767 }, { "epoch": 0.4655621898915146, "grad_norm": NaN, "learning_rate": 5.6818171580806214e-05, "loss": 0.0, "step": 2768 }, { "epoch": 0.4657303843242789, "grad_norm": NaN, "learning_rate": 5.6791547443636205e-05, "loss": 0.0, "step": 2769 }, { "epoch": 0.46589857875704316, "grad_norm": NaN, "learning_rate": 5.676492134446422e-05, "loss": 0.0, "step": 2770 }, { "epoch": 0.46606677318980744, "grad_norm": NaN, "learning_rate": 5.6738293290982224e-05, "loss": 0.0, "step": 2771 }, { "epoch": 0.4662349676225717, "grad_norm": NaN, "learning_rate": 5.6711663290882776e-05, "loss": 0.0, "step": 2772 }, { "epoch": 0.466403162055336, "grad_norm": NaN, "learning_rate": 5.668503135185897e-05, "loss": 0.0, "step": 2773 }, { "epoch": 0.4665713564881002, "grad_norm": NaN, "learning_rate": 5.66583974816045e-05, "loss": 0.0, "step": 2774 }, { "epoch": 0.4667395509208645, "grad_norm": NaN, "learning_rate": 5.6631761687813564e-05, "loss": 0.0, "step": 2775 }, { "epoch": 0.4669077453536288, "grad_norm": NaN, "learning_rate": 5.6605123978180963e-05, "loss": 0.0, "step": 2776 }, { "epoch": 0.46707593978639306, "grad_norm": NaN, "learning_rate": 5.657848436040202e-05, "loss": 0.0, "step": 2777 }, { "epoch": 0.46724413421915734, "grad_norm": NaN, "learning_rate": 5.655184284217263e-05, "loss": 0.0, "step": 2778 }, { "epoch": 0.4674123286519216, "grad_norm": NaN, "learning_rate": 5.6525199431189226e-05, "loss": 0.0, "step": 2779 }, { "epoch": 0.4675805230846859, "grad_norm": NaN, "learning_rate": 5.64985541351488e-05, "loss": 0.0, "step": 2780 }, { "epoch": 0.4677487175174502, "grad_norm": NaN, "learning_rate": 5.6471906961748856e-05, "loss": 0.0, "step": 2781 }, { "epoch": 0.46791691195021445, "grad_norm": NaN, "learning_rate": 5.64452579186875e-05, "loss": 0.0, "step": 2782 }, { "epoch": 0.46808510638297873, "grad_norm": NaN, "learning_rate": 5.6418607013663314e-05, "loss": 0.0, "step": 2783 }, { "epoch": 0.468253300815743, "grad_norm": NaN, "learning_rate": 5.6391954254375465e-05, "loss": 0.0, "step": 2784 }, { "epoch": 0.4684214952485073, "grad_norm": NaN, "learning_rate": 5.6365299648523605e-05, "loss": 0.0, "step": 2785 }, { "epoch": 0.46858968968127157, "grad_norm": NaN, "learning_rate": 5.6338643203807986e-05, "loss": 0.0, "step": 2786 }, { "epoch": 0.46875788411403585, "grad_norm": NaN, "learning_rate": 5.631198492792933e-05, "loss": 0.0, "step": 2787 }, { "epoch": 0.4689260785468001, "grad_norm": NaN, "learning_rate": 5.6285324828588934e-05, "loss": 0.0, "step": 2788 }, { "epoch": 0.46909427297956435, "grad_norm": NaN, "learning_rate": 5.625866291348859e-05, "loss": 0.0, "step": 2789 }, { "epoch": 0.46926246741232863, "grad_norm": NaN, "learning_rate": 5.6231999190330645e-05, "loss": 0.0, "step": 2790 }, { "epoch": 0.4694306618450929, "grad_norm": NaN, "learning_rate": 5.6205333666817925e-05, "loss": 0.0, "step": 2791 }, { "epoch": 0.4695988562778572, "grad_norm": NaN, "learning_rate": 5.617866635065382e-05, "loss": 0.0, "step": 2792 }, { "epoch": 0.46976705071062147, "grad_norm": NaN, "learning_rate": 5.615199724954221e-05, "loss": 0.0, "step": 2793 }, { "epoch": 0.46993524514338575, "grad_norm": NaN, "learning_rate": 5.6125326371187504e-05, "loss": 0.0, "step": 2794 }, { "epoch": 0.47010343957615003, "grad_norm": NaN, "learning_rate": 5.6098653723294604e-05, "loss": 0.0, "step": 2795 }, { "epoch": 0.4702716340089143, "grad_norm": NaN, "learning_rate": 5.607197931356896e-05, "loss": 0.0, "step": 2796 }, { "epoch": 0.4704398284416786, "grad_norm": NaN, "learning_rate": 5.60453031497165e-05, "loss": 0.0, "step": 2797 }, { "epoch": 0.47060802287444287, "grad_norm": NaN, "learning_rate": 5.6018625239443666e-05, "loss": 0.0, "step": 2798 }, { "epoch": 0.47077621730720715, "grad_norm": NaN, "learning_rate": 5.599194559045743e-05, "loss": 0.0, "step": 2799 }, { "epoch": 0.4709444117399714, "grad_norm": NaN, "learning_rate": 5.596526421046521e-05, "loss": 0.0, "step": 2800 }, { "epoch": 0.4711126061727357, "grad_norm": NaN, "learning_rate": 5.593858110717498e-05, "loss": 0.0, "step": 2801 }, { "epoch": 0.4712808006055, "grad_norm": NaN, "learning_rate": 5.5911896288295183e-05, "loss": 0.0, "step": 2802 }, { "epoch": 0.4714489950382642, "grad_norm": NaN, "learning_rate": 5.588520976153477e-05, "loss": 0.0, "step": 2803 }, { "epoch": 0.4716171894710285, "grad_norm": NaN, "learning_rate": 5.585852153460318e-05, "loss": 0.0, "step": 2804 }, { "epoch": 0.47178538390379277, "grad_norm": NaN, "learning_rate": 5.583183161521033e-05, "loss": 0.0, "step": 2805 }, { "epoch": 0.47195357833655704, "grad_norm": NaN, "learning_rate": 5.5805140011066636e-05, "loss": 0.0, "step": 2806 }, { "epoch": 0.4721217727693213, "grad_norm": NaN, "learning_rate": 5.5778446729883026e-05, "loss": 0.0, "step": 2807 }, { "epoch": 0.4722899672020856, "grad_norm": NaN, "learning_rate": 5.5751751779370874e-05, "loss": 0.0, "step": 2808 }, { "epoch": 0.4724581616348499, "grad_norm": NaN, "learning_rate": 5.572505516724207e-05, "loss": 0.0, "step": 2809 }, { "epoch": 0.47262635606761416, "grad_norm": NaN, "learning_rate": 5.5698356901208925e-05, "loss": 0.0, "step": 2810 }, { "epoch": 0.47279455050037844, "grad_norm": NaN, "learning_rate": 5.567165698898432e-05, "loss": 0.0, "step": 2811 }, { "epoch": 0.4729627449331427, "grad_norm": NaN, "learning_rate": 5.564495543828153e-05, "loss": 0.0, "step": 2812 }, { "epoch": 0.473130939365907, "grad_norm": NaN, "learning_rate": 5.561825225681435e-05, "loss": 0.0, "step": 2813 }, { "epoch": 0.4732991337986713, "grad_norm": NaN, "learning_rate": 5.5591547452297e-05, "loss": 0.0, "step": 2814 }, { "epoch": 0.47346732823143556, "grad_norm": NaN, "learning_rate": 5.5564841032444234e-05, "loss": 0.0, "step": 2815 }, { "epoch": 0.47363552266419984, "grad_norm": NaN, "learning_rate": 5.5538133004971216e-05, "loss": 0.0, "step": 2816 }, { "epoch": 0.4738037170969641, "grad_norm": NaN, "learning_rate": 5.551142337759362e-05, "loss": 0.0, "step": 2817 }, { "epoch": 0.47397191152972834, "grad_norm": NaN, "learning_rate": 5.5484712158027515e-05, "loss": 0.0, "step": 2818 }, { "epoch": 0.4741401059624926, "grad_norm": NaN, "learning_rate": 5.545799935398952e-05, "loss": 0.0, "step": 2819 }, { "epoch": 0.4743083003952569, "grad_norm": NaN, "learning_rate": 5.543128497319664e-05, "loss": 0.0, "step": 2820 }, { "epoch": 0.4744764948280212, "grad_norm": NaN, "learning_rate": 5.540456902336637e-05, "loss": 0.0, "step": 2821 }, { "epoch": 0.47464468926078546, "grad_norm": NaN, "learning_rate": 5.5377851512216635e-05, "loss": 0.0, "step": 2822 }, { "epoch": 0.47481288369354974, "grad_norm": NaN, "learning_rate": 5.535113244746585e-05, "loss": 0.0, "step": 2823 }, { "epoch": 0.474981078126314, "grad_norm": NaN, "learning_rate": 5.532441183683281e-05, "loss": 0.0, "step": 2824 }, { "epoch": 0.4751492725590783, "grad_norm": NaN, "learning_rate": 5.529768968803686e-05, "loss": 0.0, "step": 2825 }, { "epoch": 0.4753174669918426, "grad_norm": NaN, "learning_rate": 5.5270966008797664e-05, "loss": 0.0, "step": 2826 }, { "epoch": 0.47548566142460685, "grad_norm": NaN, "learning_rate": 5.5244240806835445e-05, "loss": 0.0, "step": 2827 }, { "epoch": 0.47565385585737113, "grad_norm": NaN, "learning_rate": 5.521751408987078e-05, "loss": 0.0, "step": 2828 }, { "epoch": 0.4758220502901354, "grad_norm": NaN, "learning_rate": 5.519078586562475e-05, "loss": 0.0, "step": 2829 }, { "epoch": 0.4759902447228997, "grad_norm": NaN, "learning_rate": 5.516405614181883e-05, "loss": 0.0, "step": 2830 }, { "epoch": 0.47615843915566397, "grad_norm": NaN, "learning_rate": 5.513732492617492e-05, "loss": 0.0, "step": 2831 }, { "epoch": 0.47632663358842825, "grad_norm": NaN, "learning_rate": 5.511059222641537e-05, "loss": 0.0, "step": 2832 }, { "epoch": 0.4764948280211925, "grad_norm": NaN, "learning_rate": 5.508385805026299e-05, "loss": 0.0, "step": 2833 }, { "epoch": 0.47666302245395675, "grad_norm": NaN, "learning_rate": 5.5057122405440945e-05, "loss": 0.0, "step": 2834 }, { "epoch": 0.47683121688672103, "grad_norm": NaN, "learning_rate": 5.5030385299672884e-05, "loss": 0.0, "step": 2835 }, { "epoch": 0.4769994113194853, "grad_norm": NaN, "learning_rate": 5.500364674068285e-05, "loss": 0.0, "step": 2836 }, { "epoch": 0.4771676057522496, "grad_norm": NaN, "learning_rate": 5.497690673619532e-05, "loss": 0.0, "step": 2837 }, { "epoch": 0.47733580018501387, "grad_norm": NaN, "learning_rate": 5.4950165293935177e-05, "loss": 0.0, "step": 2838 }, { "epoch": 0.47750399461777815, "grad_norm": NaN, "learning_rate": 5.4923422421627736e-05, "loss": 0.0, "step": 2839 }, { "epoch": 0.47767218905054243, "grad_norm": NaN, "learning_rate": 5.489667812699869e-05, "loss": 0.0, "step": 2840 }, { "epoch": 0.4778403834833067, "grad_norm": NaN, "learning_rate": 5.4869932417774196e-05, "loss": 0.0, "step": 2841 }, { "epoch": 0.478008577916071, "grad_norm": NaN, "learning_rate": 5.4843185301680755e-05, "loss": 0.0, "step": 2842 }, { "epoch": 0.47817677234883527, "grad_norm": NaN, "learning_rate": 5.4816436786445335e-05, "loss": 0.0, "step": 2843 }, { "epoch": 0.47834496678159955, "grad_norm": NaN, "learning_rate": 5.4789686879795263e-05, "loss": 0.0, "step": 2844 }, { "epoch": 0.4785131612143638, "grad_norm": NaN, "learning_rate": 5.476293558945832e-05, "loss": 0.0, "step": 2845 }, { "epoch": 0.4786813556471281, "grad_norm": NaN, "learning_rate": 5.4736182923162605e-05, "loss": 0.0, "step": 2846 }, { "epoch": 0.4788495500798924, "grad_norm": NaN, "learning_rate": 5.470942888863672e-05, "loss": 0.0, "step": 2847 }, { "epoch": 0.4790177445126566, "grad_norm": NaN, "learning_rate": 5.468267349360957e-05, "loss": 0.0, "step": 2848 }, { "epoch": 0.4791859389454209, "grad_norm": NaN, "learning_rate": 5.465591674581049e-05, "loss": 0.0, "step": 2849 }, { "epoch": 0.47935413337818517, "grad_norm": NaN, "learning_rate": 5.4629158652969216e-05, "loss": 0.0, "step": 2850 }, { "epoch": 0.47952232781094944, "grad_norm": NaN, "learning_rate": 5.460239922281586e-05, "loss": 0.0, "step": 2851 }, { "epoch": 0.4796905222437137, "grad_norm": NaN, "learning_rate": 5.457563846308091e-05, "loss": 0.0, "step": 2852 }, { "epoch": 0.479858716676478, "grad_norm": NaN, "learning_rate": 5.454887638149526e-05, "loss": 0.0, "step": 2853 }, { "epoch": 0.4800269111092423, "grad_norm": NaN, "learning_rate": 5.4522112985790175e-05, "loss": 0.0, "step": 2854 }, { "epoch": 0.48019510554200656, "grad_norm": NaN, "learning_rate": 5.44953482836973e-05, "loss": 0.0, "step": 2855 }, { "epoch": 0.48036329997477084, "grad_norm": NaN, "learning_rate": 5.446858228294865e-05, "loss": 0.0, "step": 2856 }, { "epoch": 0.4805314944075351, "grad_norm": NaN, "learning_rate": 5.444181499127663e-05, "loss": 0.0, "step": 2857 }, { "epoch": 0.4806996888402994, "grad_norm": NaN, "learning_rate": 5.441504641641403e-05, "loss": 0.0, "step": 2858 }, { "epoch": 0.4808678832730637, "grad_norm": NaN, "learning_rate": 5.4388276566093955e-05, "loss": 0.0, "step": 2859 }, { "epoch": 0.48103607770582796, "grad_norm": NaN, "learning_rate": 5.436150544804994e-05, "loss": 0.0, "step": 2860 }, { "epoch": 0.48120427213859224, "grad_norm": NaN, "learning_rate": 5.433473307001584e-05, "loss": 0.0, "step": 2861 }, { "epoch": 0.48137246657135646, "grad_norm": NaN, "learning_rate": 5.4307959439725916e-05, "loss": 0.0, "step": 2862 }, { "epoch": 0.48154066100412074, "grad_norm": NaN, "learning_rate": 5.4281184564914754e-05, "loss": 0.0, "step": 2863 }, { "epoch": 0.481708855436885, "grad_norm": NaN, "learning_rate": 5.425440845331733e-05, "loss": 0.0, "step": 2864 }, { "epoch": 0.4818770498696493, "grad_norm": NaN, "learning_rate": 5.4227631112668955e-05, "loss": 0.0, "step": 2865 }, { "epoch": 0.4820452443024136, "grad_norm": NaN, "learning_rate": 5.420085255070531e-05, "loss": 0.0, "step": 2866 }, { "epoch": 0.48221343873517786, "grad_norm": NaN, "learning_rate": 5.41740727751624e-05, "loss": 0.0, "step": 2867 }, { "epoch": 0.48238163316794214, "grad_norm": NaN, "learning_rate": 5.4147291793776636e-05, "loss": 0.0, "step": 2868 }, { "epoch": 0.4825498276007064, "grad_norm": NaN, "learning_rate": 5.412050961428471e-05, "loss": 0.0, "step": 2869 }, { "epoch": 0.4827180220334707, "grad_norm": NaN, "learning_rate": 5.409372624442373e-05, "loss": 0.0, "step": 2870 }, { "epoch": 0.482886216466235, "grad_norm": NaN, "learning_rate": 5.406694169193106e-05, "loss": 0.0, "step": 2871 }, { "epoch": 0.48305441089899925, "grad_norm": NaN, "learning_rate": 5.4040155964544506e-05, "loss": 0.0, "step": 2872 }, { "epoch": 0.48322260533176353, "grad_norm": NaN, "learning_rate": 5.401336907000214e-05, "loss": 0.0, "step": 2873 }, { "epoch": 0.4833907997645278, "grad_norm": NaN, "learning_rate": 5.39865810160424e-05, "loss": 0.0, "step": 2874 }, { "epoch": 0.4835589941972921, "grad_norm": NaN, "learning_rate": 5.395979181040406e-05, "loss": 0.0, "step": 2875 }, { "epoch": 0.48372718863005637, "grad_norm": NaN, "learning_rate": 5.3933001460826225e-05, "loss": 0.0, "step": 2876 }, { "epoch": 0.4838953830628206, "grad_norm": NaN, "learning_rate": 5.3906209975048317e-05, "loss": 0.0, "step": 2877 }, { "epoch": 0.4840635774955849, "grad_norm": NaN, "learning_rate": 5.38794173608101e-05, "loss": 0.0, "step": 2878 }, { "epoch": 0.48423177192834915, "grad_norm": NaN, "learning_rate": 5.385262362585165e-05, "loss": 0.0, "step": 2879 }, { "epoch": 0.48439996636111343, "grad_norm": NaN, "learning_rate": 5.38258287779134e-05, "loss": 0.0, "step": 2880 }, { "epoch": 0.4845681607938777, "grad_norm": NaN, "learning_rate": 5.379903282473605e-05, "loss": 0.0, "step": 2881 }, { "epoch": 0.484736355226642, "grad_norm": NaN, "learning_rate": 5.377223577406069e-05, "loss": 0.0, "step": 2882 }, { "epoch": 0.48490454965940627, "grad_norm": NaN, "learning_rate": 5.374543763362865e-05, "loss": 0.0, "step": 2883 }, { "epoch": 0.48507274409217055, "grad_norm": NaN, "learning_rate": 5.371863841118163e-05, "loss": 0.0, "step": 2884 }, { "epoch": 0.48524093852493483, "grad_norm": NaN, "learning_rate": 5.3691838114461626e-05, "loss": 0.0, "step": 2885 }, { "epoch": 0.4854091329576991, "grad_norm": NaN, "learning_rate": 5.366503675121095e-05, "loss": 0.0, "step": 2886 }, { "epoch": 0.4855773273904634, "grad_norm": NaN, "learning_rate": 5.363823432917221e-05, "loss": 0.0, "step": 2887 }, { "epoch": 0.48574552182322767, "grad_norm": NaN, "learning_rate": 5.361143085608832e-05, "loss": 0.0, "step": 2888 }, { "epoch": 0.48591371625599195, "grad_norm": NaN, "learning_rate": 5.3584626339702495e-05, "loss": 0.0, "step": 2889 }, { "epoch": 0.4860819106887562, "grad_norm": NaN, "learning_rate": 5.355782078775827e-05, "loss": 0.0, "step": 2890 }, { "epoch": 0.4862501051215205, "grad_norm": NaN, "learning_rate": 5.3531014207999475e-05, "loss": 0.0, "step": 2891 }, { "epoch": 0.4864182995542847, "grad_norm": NaN, "learning_rate": 5.3504206608170224e-05, "loss": 0.0, "step": 2892 }, { "epoch": 0.486586493987049, "grad_norm": NaN, "learning_rate": 5.347739799601493e-05, "loss": 0.0, "step": 2893 }, { "epoch": 0.4867546884198133, "grad_norm": NaN, "learning_rate": 5.345058837927831e-05, "loss": 0.0, "step": 2894 }, { "epoch": 0.48692288285257757, "grad_norm": NaN, "learning_rate": 5.342377776570535e-05, "loss": 0.0, "step": 2895 }, { "epoch": 0.48709107728534184, "grad_norm": NaN, "learning_rate": 5.339696616304135e-05, "loss": 0.0, "step": 2896 }, { "epoch": 0.4872592717181061, "grad_norm": NaN, "learning_rate": 5.3370153579031876e-05, "loss": 0.0, "step": 2897 }, { "epoch": 0.4874274661508704, "grad_norm": NaN, "learning_rate": 5.334334002142278e-05, "loss": 0.0, "step": 2898 }, { "epoch": 0.4875956605836347, "grad_norm": NaN, "learning_rate": 5.33165254979602e-05, "loss": 0.0, "step": 2899 }, { "epoch": 0.48776385501639896, "grad_norm": NaN, "learning_rate": 5.3289710016390535e-05, "loss": 0.0, "step": 2900 }, { "epoch": 0.48793204944916324, "grad_norm": NaN, "learning_rate": 5.326289358446051e-05, "loss": 0.0, "step": 2901 }, { "epoch": 0.4881002438819275, "grad_norm": NaN, "learning_rate": 5.323607620991706e-05, "loss": 0.0, "step": 2902 }, { "epoch": 0.4882684383146918, "grad_norm": NaN, "learning_rate": 5.320925790050746e-05, "loss": 0.0, "step": 2903 }, { "epoch": 0.4884366327474561, "grad_norm": NaN, "learning_rate": 5.318243866397917e-05, "loss": 0.0, "step": 2904 }, { "epoch": 0.48860482718022036, "grad_norm": NaN, "learning_rate": 5.315561850808002e-05, "loss": 0.0, "step": 2905 }, { "epoch": 0.48877302161298464, "grad_norm": NaN, "learning_rate": 5.312879744055802e-05, "loss": 0.0, "step": 2906 }, { "epoch": 0.48894121604574886, "grad_norm": NaN, "learning_rate": 5.310197546916149e-05, "loss": 0.0, "step": 2907 }, { "epoch": 0.48910941047851314, "grad_norm": NaN, "learning_rate": 5.3075152601638976e-05, "loss": 0.0, "step": 2908 }, { "epoch": 0.4892776049112774, "grad_norm": NaN, "learning_rate": 5.304832884573934e-05, "loss": 0.0, "step": 2909 }, { "epoch": 0.4894457993440417, "grad_norm": NaN, "learning_rate": 5.302150420921161e-05, "loss": 0.0, "step": 2910 }, { "epoch": 0.489613993776806, "grad_norm": NaN, "learning_rate": 5.2994678699805186e-05, "loss": 0.0, "step": 2911 }, { "epoch": 0.48978218820957026, "grad_norm": NaN, "learning_rate": 5.296785232526962e-05, "loss": 0.0, "step": 2912 }, { "epoch": 0.48995038264233454, "grad_norm": NaN, "learning_rate": 5.2941025093354756e-05, "loss": 0.0, "step": 2913 }, { "epoch": 0.4901185770750988, "grad_norm": NaN, "learning_rate": 5.291419701181068e-05, "loss": 0.0, "step": 2914 }, { "epoch": 0.4902867715078631, "grad_norm": NaN, "learning_rate": 5.288736808838774e-05, "loss": 0.0, "step": 2915 }, { "epoch": 0.4904549659406274, "grad_norm": NaN, "learning_rate": 5.2860538330836504e-05, "loss": 0.0, "step": 2916 }, { "epoch": 0.49062316037339165, "grad_norm": NaN, "learning_rate": 5.283370774690778e-05, "loss": 0.0, "step": 2917 }, { "epoch": 0.49079135480615593, "grad_norm": NaN, "learning_rate": 5.280687634435261e-05, "loss": 0.0, "step": 2918 }, { "epoch": 0.4909595492389202, "grad_norm": NaN, "learning_rate": 5.2780044130922325e-05, "loss": 0.0, "step": 2919 }, { "epoch": 0.4911277436716845, "grad_norm": NaN, "learning_rate": 5.2753211114368406e-05, "loss": 0.0, "step": 2920 }, { "epoch": 0.4912959381044487, "grad_norm": NaN, "learning_rate": 5.2726377302442656e-05, "loss": 0.0, "step": 2921 }, { "epoch": 0.491464132537213, "grad_norm": NaN, "learning_rate": 5.269954270289702e-05, "loss": 0.0, "step": 2922 }, { "epoch": 0.4916323269699773, "grad_norm": NaN, "learning_rate": 5.2672707323483747e-05, "loss": 0.0, "step": 2923 }, { "epoch": 0.49180052140274155, "grad_norm": NaN, "learning_rate": 5.2645871171955275e-05, "loss": 0.0, "step": 2924 }, { "epoch": 0.49196871583550583, "grad_norm": NaN, "learning_rate": 5.2619034256064245e-05, "loss": 0.0, "step": 2925 }, { "epoch": 0.4921369102682701, "grad_norm": NaN, "learning_rate": 5.259219658356355e-05, "loss": 0.0, "step": 2926 }, { "epoch": 0.4923051047010344, "grad_norm": NaN, "learning_rate": 5.256535816220629e-05, "loss": 0.0, "step": 2927 }, { "epoch": 0.49247329913379867, "grad_norm": NaN, "learning_rate": 5.25385189997458e-05, "loss": 0.0, "step": 2928 }, { "epoch": 0.49264149356656295, "grad_norm": NaN, "learning_rate": 5.251167910393561e-05, "loss": 0.0, "step": 2929 }, { "epoch": 0.49280968799932723, "grad_norm": NaN, "learning_rate": 5.248483848252944e-05, "loss": 0.0, "step": 2930 }, { "epoch": 0.4929778824320915, "grad_norm": NaN, "learning_rate": 5.245799714328128e-05, "loss": 0.0, "step": 2931 }, { "epoch": 0.4931460768648558, "grad_norm": NaN, "learning_rate": 5.2431155093945275e-05, "loss": 0.0, "step": 2932 }, { "epoch": 0.49331427129762007, "grad_norm": NaN, "learning_rate": 5.2404312342275784e-05, "loss": 0.0, "step": 2933 }, { "epoch": 0.49348246573038435, "grad_norm": NaN, "learning_rate": 5.2377468896027403e-05, "loss": 0.0, "step": 2934 }, { "epoch": 0.4936506601631486, "grad_norm": NaN, "learning_rate": 5.2350624762954884e-05, "loss": 0.0, "step": 2935 }, { "epoch": 0.49381885459591285, "grad_norm": NaN, "learning_rate": 5.232377995081319e-05, "loss": 0.0, "step": 2936 }, { "epoch": 0.4939870490286771, "grad_norm": NaN, "learning_rate": 5.229693446735751e-05, "loss": 0.0, "step": 2937 }, { "epoch": 0.4941552434614414, "grad_norm": NaN, "learning_rate": 5.227008832034317e-05, "loss": 0.0, "step": 2938 }, { "epoch": 0.4943234378942057, "grad_norm": NaN, "learning_rate": 5.2243241517525754e-05, "loss": 0.0, "step": 2939 }, { "epoch": 0.49449163232696997, "grad_norm": NaN, "learning_rate": 5.221639406666098e-05, "loss": 0.0, "step": 2940 }, { "epoch": 0.49465982675973424, "grad_norm": NaN, "learning_rate": 5.2189545975504795e-05, "loss": 0.0, "step": 2941 }, { "epoch": 0.4948280211924985, "grad_norm": NaN, "learning_rate": 5.21626972518133e-05, "loss": 0.0, "step": 2942 }, { "epoch": 0.4949962156252628, "grad_norm": NaN, "learning_rate": 5.2135847903342795e-05, "loss": 0.0, "step": 2943 }, { "epoch": 0.4951644100580271, "grad_norm": NaN, "learning_rate": 5.2108997937849743e-05, "loss": 0.0, "step": 2944 }, { "epoch": 0.49533260449079136, "grad_norm": NaN, "learning_rate": 5.208214736309082e-05, "loss": 0.0, "step": 2945 }, { "epoch": 0.49550079892355564, "grad_norm": NaN, "learning_rate": 5.205529618682283e-05, "loss": 0.0, "step": 2946 }, { "epoch": 0.4956689933563199, "grad_norm": NaN, "learning_rate": 5.2028444416802814e-05, "loss": 0.0, "step": 2947 }, { "epoch": 0.4958371877890842, "grad_norm": NaN, "learning_rate": 5.200159206078792e-05, "loss": 0.0, "step": 2948 }, { "epoch": 0.4960053822218485, "grad_norm": NaN, "learning_rate": 5.197473912653549e-05, "loss": 0.0, "step": 2949 }, { "epoch": 0.49617357665461276, "grad_norm": NaN, "learning_rate": 5.194788562180305e-05, "loss": 0.0, "step": 2950 }, { "epoch": 0.496341771087377, "grad_norm": NaN, "learning_rate": 5.192103155434825e-05, "loss": 0.0, "step": 2951 }, { "epoch": 0.49650996552014126, "grad_norm": NaN, "learning_rate": 5.189417693192898e-05, "loss": 0.0, "step": 2952 }, { "epoch": 0.49667815995290554, "grad_norm": NaN, "learning_rate": 5.186732176230321e-05, "loss": 0.0, "step": 2953 }, { "epoch": 0.4968463543856698, "grad_norm": NaN, "learning_rate": 5.184046605322909e-05, "loss": 0.0, "step": 2954 }, { "epoch": 0.4970145488184341, "grad_norm": NaN, "learning_rate": 5.1813609812464936e-05, "loss": 0.0, "step": 2955 }, { "epoch": 0.4971827432511984, "grad_norm": NaN, "learning_rate": 5.178675304776923e-05, "loss": 0.0, "step": 2956 }, { "epoch": 0.49735093768396266, "grad_norm": NaN, "learning_rate": 5.1759895766900566e-05, "loss": 0.0, "step": 2957 }, { "epoch": 0.49751913211672694, "grad_norm": NaN, "learning_rate": 5.173303797761775e-05, "loss": 0.0, "step": 2958 }, { "epoch": 0.4976873265494912, "grad_norm": NaN, "learning_rate": 5.170617968767965e-05, "loss": 0.0, "step": 2959 }, { "epoch": 0.4978555209822555, "grad_norm": NaN, "learning_rate": 5.1679320904845365e-05, "loss": 0.0, "step": 2960 }, { "epoch": 0.4980237154150198, "grad_norm": NaN, "learning_rate": 5.165246163687406e-05, "loss": 0.0, "step": 2961 }, { "epoch": 0.49819190984778405, "grad_norm": NaN, "learning_rate": 5.162560189152512e-05, "loss": 0.0, "step": 2962 }, { "epoch": 0.49836010428054833, "grad_norm": NaN, "learning_rate": 5.1598741676557995e-05, "loss": 0.0, "step": 2963 }, { "epoch": 0.4985282987133126, "grad_norm": NaN, "learning_rate": 5.157188099973231e-05, "loss": 0.0, "step": 2964 }, { "epoch": 0.4986964931460769, "grad_norm": NaN, "learning_rate": 5.15450198688078e-05, "loss": 0.0, "step": 2965 }, { "epoch": 0.4988646875788411, "grad_norm": NaN, "learning_rate": 5.151815829154436e-05, "loss": 0.0, "step": 2966 }, { "epoch": 0.4990328820116054, "grad_norm": NaN, "learning_rate": 5.149129627570198e-05, "loss": 0.0, "step": 2967 }, { "epoch": 0.4992010764443697, "grad_norm": NaN, "learning_rate": 5.1464433829040834e-05, "loss": 0.0, "step": 2968 }, { "epoch": 0.49936927087713395, "grad_norm": NaN, "learning_rate": 5.143757095932113e-05, "loss": 0.0, "step": 2969 }, { "epoch": 0.49953746530989823, "grad_norm": NaN, "learning_rate": 5.14107076743033e-05, "loss": 0.0, "step": 2970 }, { "epoch": 0.4997056597426625, "grad_norm": NaN, "learning_rate": 5.1383843981747824e-05, "loss": 0.0, "step": 2971 }, { "epoch": 0.4998738541754268, "grad_norm": NaN, "learning_rate": 5.1356979889415324e-05, "loss": 0.0, "step": 2972 }, { "epoch": 0.500042048608191, "grad_norm": NaN, "learning_rate": 5.1330115405066517e-05, "loss": 0.0, "step": 2973 }, { "epoch": 0.5002102430409553, "grad_norm": NaN, "learning_rate": 5.1303250536462277e-05, "loss": 0.0, "step": 2974 }, { "epoch": 0.5003784374737196, "grad_norm": NaN, "learning_rate": 5.127638529136355e-05, "loss": 0.0, "step": 2975 }, { "epoch": 0.5005466319064839, "grad_norm": NaN, "learning_rate": 5.124951967753141e-05, "loss": 0.0, "step": 2976 }, { "epoch": 0.5007148263392481, "grad_norm": NaN, "learning_rate": 5.122265370272703e-05, "loss": 0.0, "step": 2977 }, { "epoch": 0.5008830207720124, "grad_norm": NaN, "learning_rate": 5.119578737471169e-05, "loss": 0.0, "step": 2978 }, { "epoch": 0.5010512152047767, "grad_norm": NaN, "learning_rate": 5.116892070124677e-05, "loss": 0.0, "step": 2979 }, { "epoch": 0.501219409637541, "grad_norm": NaN, "learning_rate": 5.114205369009375e-05, "loss": 0.0, "step": 2980 }, { "epoch": 0.5013876040703052, "grad_norm": NaN, "learning_rate": 5.1115186349014224e-05, "loss": 0.0, "step": 2981 }, { "epoch": 0.5015557985030695, "grad_norm": NaN, "learning_rate": 5.108831868576984e-05, "loss": 0.0, "step": 2982 }, { "epoch": 0.5017239929358338, "grad_norm": NaN, "learning_rate": 5.1061450708122385e-05, "loss": 0.0, "step": 2983 }, { "epoch": 0.5018921873685981, "grad_norm": NaN, "learning_rate": 5.103458242383371e-05, "loss": 0.0, "step": 2984 }, { "epoch": 0.5020603818013624, "grad_norm": NaN, "learning_rate": 5.100771384066575e-05, "loss": 0.0, "step": 2985 }, { "epoch": 0.5022285762341266, "grad_norm": NaN, "learning_rate": 5.0980844966380545e-05, "loss": 0.0, "step": 2986 }, { "epoch": 0.5023967706668909, "grad_norm": NaN, "learning_rate": 5.0953975808740226e-05, "loss": 0.0, "step": 2987 }, { "epoch": 0.5025649650996552, "grad_norm": NaN, "learning_rate": 5.0927106375506984e-05, "loss": 0.0, "step": 2988 }, { "epoch": 0.5027331595324195, "grad_norm": NaN, "learning_rate": 5.090023667444309e-05, "loss": 0.0, "step": 2989 }, { "epoch": 0.5029013539651838, "grad_norm": NaN, "learning_rate": 5.087336671331091e-05, "loss": 0.0, "step": 2990 }, { "epoch": 0.503069548397948, "grad_norm": NaN, "learning_rate": 5.084649649987285e-05, "loss": 0.0, "step": 2991 }, { "epoch": 0.5032377428307123, "grad_norm": NaN, "learning_rate": 5.081962604189145e-05, "loss": 0.0, "step": 2992 }, { "epoch": 0.5034059372634766, "grad_norm": NaN, "learning_rate": 5.079275534712926e-05, "loss": 0.0, "step": 2993 }, { "epoch": 0.5035741316962409, "grad_norm": NaN, "learning_rate": 5.076588442334894e-05, "loss": 0.0, "step": 2994 }, { "epoch": 0.5037423261290052, "grad_norm": NaN, "learning_rate": 5.073901327831317e-05, "loss": 0.0, "step": 2995 }, { "epoch": 0.5039105205617694, "grad_norm": NaN, "learning_rate": 5.071214191978476e-05, "loss": 0.0, "step": 2996 }, { "epoch": 0.5040787149945337, "grad_norm": NaN, "learning_rate": 5.0685270355526506e-05, "loss": 0.0, "step": 2997 }, { "epoch": 0.504246909427298, "grad_norm": NaN, "learning_rate": 5.065839859330134e-05, "loss": 0.0, "step": 2998 }, { "epoch": 0.5044151038600623, "grad_norm": NaN, "learning_rate": 5.06315266408722e-05, "loss": 0.0, "step": 2999 }, { "epoch": 0.5045832982928266, "grad_norm": NaN, "learning_rate": 5.060465450600207e-05, "loss": 0.0, "step": 3000 }, { "epoch": 0.5047514927255908, "grad_norm": NaN, "learning_rate": 5.057778219645404e-05, "loss": 0.0, "step": 3001 }, { "epoch": 0.5049196871583551, "grad_norm": NaN, "learning_rate": 5.0550909719991194e-05, "loss": 0.0, "step": 3002 }, { "epoch": 0.5050878815911193, "grad_norm": NaN, "learning_rate": 5.052403708437669e-05, "loss": 0.0, "step": 3003 }, { "epoch": 0.5052560760238836, "grad_norm": NaN, "learning_rate": 5.0497164297373755e-05, "loss": 0.0, "step": 3004 }, { "epoch": 0.5054242704566478, "grad_norm": NaN, "learning_rate": 5.047029136674563e-05, "loss": 0.0, "step": 3005 }, { "epoch": 0.5055924648894121, "grad_norm": NaN, "learning_rate": 5.0443418300255577e-05, "loss": 0.0, "step": 3006 }, { "epoch": 0.5057606593221764, "grad_norm": NaN, "learning_rate": 5.041654510566696e-05, "loss": 0.0, "step": 3007 }, { "epoch": 0.5059288537549407, "grad_norm": NaN, "learning_rate": 5.038967179074312e-05, "loss": 0.0, "step": 3008 }, { "epoch": 0.506097048187705, "grad_norm": NaN, "learning_rate": 5.036279836324746e-05, "loss": 0.0, "step": 3009 }, { "epoch": 0.5062652426204692, "grad_norm": NaN, "learning_rate": 5.033592483094344e-05, "loss": 0.0, "step": 3010 }, { "epoch": 0.5064334370532335, "grad_norm": NaN, "learning_rate": 5.030905120159448e-05, "loss": 0.0, "step": 3011 }, { "epoch": 0.5066016314859978, "grad_norm": NaN, "learning_rate": 5.028217748296409e-05, "loss": 0.0, "step": 3012 }, { "epoch": 0.5067698259187621, "grad_norm": NaN, "learning_rate": 5.025530368281579e-05, "loss": 0.0, "step": 3013 }, { "epoch": 0.5069380203515264, "grad_norm": NaN, "learning_rate": 5.0228429808913105e-05, "loss": 0.0, "step": 3014 }, { "epoch": 0.5071062147842906, "grad_norm": NaN, "learning_rate": 5.0201555869019626e-05, "loss": 0.0, "step": 3015 }, { "epoch": 0.5072744092170549, "grad_norm": NaN, "learning_rate": 5.017468187089889e-05, "loss": 0.0, "step": 3016 }, { "epoch": 0.5074426036498192, "grad_norm": NaN, "learning_rate": 5.014780782231453e-05, "loss": 0.0, "step": 3017 }, { "epoch": 0.5076107980825835, "grad_norm": NaN, "learning_rate": 5.012093373103015e-05, "loss": 0.0, "step": 3018 }, { "epoch": 0.5077789925153477, "grad_norm": NaN, "learning_rate": 5.0094059604809375e-05, "loss": 0.0, "step": 3019 }, { "epoch": 0.507947186948112, "grad_norm": NaN, "learning_rate": 5.006718545141582e-05, "loss": 0.0, "step": 3020 }, { "epoch": 0.5081153813808763, "grad_norm": NaN, "learning_rate": 5.004031127861315e-05, "loss": 0.0, "step": 3021 }, { "epoch": 0.5082835758136406, "grad_norm": NaN, "learning_rate": 5.0013437094165e-05, "loss": 0.0, "step": 3022 }, { "epoch": 0.5084517702464049, "grad_norm": NaN, "learning_rate": 4.998656290583502e-05, "loss": 0.0, "step": 3023 }, { "epoch": 0.5086199646791691, "grad_norm": NaN, "learning_rate": 4.9959688721386874e-05, "loss": 0.0, "step": 3024 }, { "epoch": 0.5087881591119334, "grad_norm": NaN, "learning_rate": 4.993281454858418e-05, "loss": 0.0, "step": 3025 }, { "epoch": 0.5089563535446977, "grad_norm": NaN, "learning_rate": 4.990594039519063e-05, "loss": 0.0, "step": 3026 }, { "epoch": 0.509124547977462, "grad_norm": NaN, "learning_rate": 4.987906626896985e-05, "loss": 0.0, "step": 3027 }, { "epoch": 0.5092927424102263, "grad_norm": NaN, "learning_rate": 4.985219217768548e-05, "loss": 0.0, "step": 3028 }, { "epoch": 0.5094609368429905, "grad_norm": NaN, "learning_rate": 4.982531812910111e-05, "loss": 0.0, "step": 3029 }, { "epoch": 0.5096291312757548, "grad_norm": NaN, "learning_rate": 4.979844413098039e-05, "loss": 0.0, "step": 3030 }, { "epoch": 0.5097973257085191, "grad_norm": NaN, "learning_rate": 4.97715701910869e-05, "loss": 0.0, "step": 3031 }, { "epoch": 0.5099655201412833, "grad_norm": NaN, "learning_rate": 4.9744696317184234e-05, "loss": 0.0, "step": 3032 }, { "epoch": 0.5101337145740475, "grad_norm": NaN, "learning_rate": 4.971782251703591e-05, "loss": 0.0, "step": 3033 }, { "epoch": 0.5103019090068118, "grad_norm": NaN, "learning_rate": 4.969094879840553e-05, "loss": 0.0, "step": 3034 }, { "epoch": 0.5104701034395761, "grad_norm": NaN, "learning_rate": 4.966407516905657e-05, "loss": 0.0, "step": 3035 }, { "epoch": 0.5106382978723404, "grad_norm": NaN, "learning_rate": 4.963720163675255e-05, "loss": 0.0, "step": 3036 }, { "epoch": 0.5108064923051047, "grad_norm": NaN, "learning_rate": 4.9610328209256886e-05, "loss": 0.0, "step": 3037 }, { "epoch": 0.5109746867378689, "grad_norm": NaN, "learning_rate": 4.958345489433305e-05, "loss": 0.0, "step": 3038 }, { "epoch": 0.5111428811706332, "grad_norm": NaN, "learning_rate": 4.955658169974443e-05, "loss": 0.0, "step": 3039 }, { "epoch": 0.5113110756033975, "grad_norm": NaN, "learning_rate": 4.95297086332544e-05, "loss": 0.0, "step": 3040 }, { "epoch": 0.5114792700361618, "grad_norm": NaN, "learning_rate": 4.950283570262624e-05, "loss": 0.0, "step": 3041 }, { "epoch": 0.5116474644689261, "grad_norm": NaN, "learning_rate": 4.947596291562331e-05, "loss": 0.0, "step": 3042 }, { "epoch": 0.5118156589016903, "grad_norm": NaN, "learning_rate": 4.9449090280008824e-05, "loss": 0.0, "step": 3043 }, { "epoch": 0.5119838533344546, "grad_norm": NaN, "learning_rate": 4.942221780354599e-05, "loss": 0.0, "step": 3044 }, { "epoch": 0.5121520477672189, "grad_norm": NaN, "learning_rate": 4.9395345493997944e-05, "loss": 0.0, "step": 3045 }, { "epoch": 0.5123202421999832, "grad_norm": NaN, "learning_rate": 4.936847335912782e-05, "loss": 0.0, "step": 3046 }, { "epoch": 0.5124884366327475, "grad_norm": NaN, "learning_rate": 4.934160140669867e-05, "loss": 0.0, "step": 3047 }, { "epoch": 0.5126566310655117, "grad_norm": NaN, "learning_rate": 4.93147296444735e-05, "loss": 0.0, "step": 3048 }, { "epoch": 0.512824825498276, "grad_norm": NaN, "learning_rate": 4.9287858080215265e-05, "loss": 0.0, "step": 3049 }, { "epoch": 0.5129930199310403, "grad_norm": NaN, "learning_rate": 4.9260986721686826e-05, "loss": 0.0, "step": 3050 }, { "epoch": 0.5131612143638046, "grad_norm": NaN, "learning_rate": 4.923411557665108e-05, "loss": 0.0, "step": 3051 }, { "epoch": 0.5133294087965689, "grad_norm": NaN, "learning_rate": 4.920724465287075e-05, "loss": 0.0, "step": 3052 }, { "epoch": 0.5134976032293331, "grad_norm": NaN, "learning_rate": 4.9180373958108566e-05, "loss": 0.0, "step": 3053 }, { "epoch": 0.5136657976620974, "grad_norm": NaN, "learning_rate": 4.915350350012714e-05, "loss": 0.0, "step": 3054 }, { "epoch": 0.5138339920948617, "grad_norm": NaN, "learning_rate": 4.91266332866891e-05, "loss": 0.0, "step": 3055 }, { "epoch": 0.514002186527626, "grad_norm": NaN, "learning_rate": 4.9099763325556916e-05, "loss": 0.0, "step": 3056 }, { "epoch": 0.5141703809603902, "grad_norm": NaN, "learning_rate": 4.907289362449303e-05, "loss": 0.0, "step": 3057 }, { "epoch": 0.5143385753931545, "grad_norm": NaN, "learning_rate": 4.904602419125978e-05, "loss": 0.0, "step": 3058 }, { "epoch": 0.5145067698259188, "grad_norm": NaN, "learning_rate": 4.901915503361946e-05, "loss": 0.0, "step": 3059 }, { "epoch": 0.5146749642586831, "grad_norm": NaN, "learning_rate": 4.899228615933427e-05, "loss": 0.0, "step": 3060 }, { "epoch": 0.5148431586914474, "grad_norm": NaN, "learning_rate": 4.8965417576166316e-05, "loss": 0.0, "step": 3061 }, { "epoch": 0.5150113531242115, "grad_norm": NaN, "learning_rate": 4.893854929187762e-05, "loss": 0.0, "step": 3062 }, { "epoch": 0.5151795475569758, "grad_norm": NaN, "learning_rate": 4.8911681314230164e-05, "loss": 0.0, "step": 3063 }, { "epoch": 0.5153477419897401, "grad_norm": NaN, "learning_rate": 4.888481365098579e-05, "loss": 0.0, "step": 3064 }, { "epoch": 0.5155159364225044, "grad_norm": NaN, "learning_rate": 4.885794630990625e-05, "loss": 0.0, "step": 3065 }, { "epoch": 0.5156841308552687, "grad_norm": NaN, "learning_rate": 4.883107929875323e-05, "loss": 0.0, "step": 3066 }, { "epoch": 0.5158523252880329, "grad_norm": NaN, "learning_rate": 4.880421262528832e-05, "loss": 0.0, "step": 3067 }, { "epoch": 0.5160205197207972, "grad_norm": NaN, "learning_rate": 4.8777346297272986e-05, "loss": 0.0, "step": 3068 }, { "epoch": 0.5161887141535615, "grad_norm": NaN, "learning_rate": 4.875048032246861e-05, "loss": 0.0, "step": 3069 }, { "epoch": 0.5163569085863258, "grad_norm": NaN, "learning_rate": 4.872361470863646e-05, "loss": 0.0, "step": 3070 }, { "epoch": 0.51652510301909, "grad_norm": NaN, "learning_rate": 4.8696749463537735e-05, "loss": 0.0, "step": 3071 }, { "epoch": 0.5166932974518543, "grad_norm": NaN, "learning_rate": 4.8669884594933495e-05, "loss": 0.0, "step": 3072 }, { "epoch": 0.5168614918846186, "grad_norm": NaN, "learning_rate": 4.8643020110584694e-05, "loss": 0.0, "step": 3073 }, { "epoch": 0.5170296863173829, "grad_norm": NaN, "learning_rate": 4.861615601825219e-05, "loss": 0.0, "step": 3074 }, { "epoch": 0.5171978807501472, "grad_norm": NaN, "learning_rate": 4.858929232569671e-05, "loss": 0.0, "step": 3075 }, { "epoch": 0.5173660751829114, "grad_norm": NaN, "learning_rate": 4.856242904067887e-05, "loss": 0.0, "step": 3076 }, { "epoch": 0.5175342696156757, "grad_norm": NaN, "learning_rate": 4.853556617095919e-05, "loss": 0.0, "step": 3077 }, { "epoch": 0.51770246404844, "grad_norm": NaN, "learning_rate": 4.8508703724298016e-05, "loss": 0.0, "step": 3078 }, { "epoch": 0.5178706584812043, "grad_norm": NaN, "learning_rate": 4.848184170845565e-05, "loss": 0.0, "step": 3079 }, { "epoch": 0.5180388529139686, "grad_norm": NaN, "learning_rate": 4.845498013119221e-05, "loss": 0.0, "step": 3080 }, { "epoch": 0.5182070473467328, "grad_norm": NaN, "learning_rate": 4.842811900026771e-05, "loss": 0.0, "step": 3081 }, { "epoch": 0.5183752417794971, "grad_norm": NaN, "learning_rate": 4.8401258323442016e-05, "loss": 0.0, "step": 3082 }, { "epoch": 0.5185434362122614, "grad_norm": NaN, "learning_rate": 4.837439810847489e-05, "loss": 0.0, "step": 3083 }, { "epoch": 0.5187116306450257, "grad_norm": NaN, "learning_rate": 4.8347538363125947e-05, "loss": 0.0, "step": 3084 }, { "epoch": 0.51887982507779, "grad_norm": NaN, "learning_rate": 4.832067909515467e-05, "loss": 0.0, "step": 3085 }, { "epoch": 0.5190480195105542, "grad_norm": NaN, "learning_rate": 4.8293820312320356e-05, "loss": 0.0, "step": 3086 }, { "epoch": 0.5192162139433185, "grad_norm": NaN, "learning_rate": 4.826696202238227e-05, "loss": 0.0, "step": 3087 }, { "epoch": 0.5193844083760828, "grad_norm": NaN, "learning_rate": 4.8240104233099445e-05, "loss": 0.0, "step": 3088 }, { "epoch": 0.5195526028088471, "grad_norm": NaN, "learning_rate": 4.8213246952230794e-05, "loss": 0.0, "step": 3089 }, { "epoch": 0.5197207972416114, "grad_norm": NaN, "learning_rate": 4.818639018753506e-05, "loss": 0.0, "step": 3090 }, { "epoch": 0.5198889916743756, "grad_norm": NaN, "learning_rate": 4.8159533946770916e-05, "loss": 0.0, "step": 3091 }, { "epoch": 0.5200571861071398, "grad_norm": NaN, "learning_rate": 4.8132678237696805e-05, "loss": 0.0, "step": 3092 }, { "epoch": 0.5202253805399041, "grad_norm": NaN, "learning_rate": 4.810582306807103e-05, "loss": 0.0, "step": 3093 }, { "epoch": 0.5203935749726684, "grad_norm": NaN, "learning_rate": 4.807896844565174e-05, "loss": 0.0, "step": 3094 }, { "epoch": 0.5205617694054326, "grad_norm": NaN, "learning_rate": 4.8052114378196964e-05, "loss": 0.0, "step": 3095 }, { "epoch": 0.5207299638381969, "grad_norm": NaN, "learning_rate": 4.802526087346453e-05, "loss": 0.0, "step": 3096 }, { "epoch": 0.5208981582709612, "grad_norm": NaN, "learning_rate": 4.7998407939212104e-05, "loss": 0.0, "step": 3097 }, { "epoch": 0.5210663527037255, "grad_norm": NaN, "learning_rate": 4.797155558319721e-05, "loss": 0.0, "step": 3098 }, { "epoch": 0.5212345471364898, "grad_norm": NaN, "learning_rate": 4.7944703813177166e-05, "loss": 0.0, "step": 3099 }, { "epoch": 0.521402741569254, "grad_norm": NaN, "learning_rate": 4.7917852636909185e-05, "loss": 0.0, "step": 3100 }, { "epoch": 0.5215709360020183, "grad_norm": NaN, "learning_rate": 4.789100206215027e-05, "loss": 0.0, "step": 3101 }, { "epoch": 0.5217391304347826, "grad_norm": NaN, "learning_rate": 4.786415209665723e-05, "loss": 0.0, "step": 3102 }, { "epoch": 0.5219073248675469, "grad_norm": NaN, "learning_rate": 4.7837302748186705e-05, "loss": 0.0, "step": 3103 }, { "epoch": 0.5220755193003112, "grad_norm": NaN, "learning_rate": 4.781045402449522e-05, "loss": 0.0, "step": 3104 }, { "epoch": 0.5222437137330754, "grad_norm": NaN, "learning_rate": 4.778360593333903e-05, "loss": 0.0, "step": 3105 }, { "epoch": 0.5224119081658397, "grad_norm": NaN, "learning_rate": 4.775675848247427e-05, "loss": 0.0, "step": 3106 }, { "epoch": 0.522580102598604, "grad_norm": NaN, "learning_rate": 4.772991167965683e-05, "loss": 0.0, "step": 3107 }, { "epoch": 0.5227482970313683, "grad_norm": NaN, "learning_rate": 4.770306553264251e-05, "loss": 0.0, "step": 3108 }, { "epoch": 0.5229164914641325, "grad_norm": NaN, "learning_rate": 4.767622004918682e-05, "loss": 0.0, "step": 3109 }, { "epoch": 0.5230846858968968, "grad_norm": NaN, "learning_rate": 4.7649375237045135e-05, "loss": 0.0, "step": 3110 }, { "epoch": 0.5232528803296611, "grad_norm": NaN, "learning_rate": 4.762253110397261e-05, "loss": 0.0, "step": 3111 }, { "epoch": 0.5234210747624254, "grad_norm": NaN, "learning_rate": 4.759568765772422e-05, "loss": 0.0, "step": 3112 }, { "epoch": 0.5235892691951897, "grad_norm": NaN, "learning_rate": 4.756884490605474e-05, "loss": 0.0, "step": 3113 }, { "epoch": 0.523757463627954, "grad_norm": NaN, "learning_rate": 4.754200285671874e-05, "loss": 0.0, "step": 3114 }, { "epoch": 0.5239256580607182, "grad_norm": NaN, "learning_rate": 4.7515161517470556e-05, "loss": 0.0, "step": 3115 }, { "epoch": 0.5240938524934825, "grad_norm": NaN, "learning_rate": 4.748832089606441e-05, "loss": 0.0, "step": 3116 }, { "epoch": 0.5242620469262468, "grad_norm": NaN, "learning_rate": 4.746148100025421e-05, "loss": 0.0, "step": 3117 }, { "epoch": 0.5244302413590111, "grad_norm": NaN, "learning_rate": 4.743464183779372e-05, "loss": 0.0, "step": 3118 }, { "epoch": 0.5245984357917753, "grad_norm": NaN, "learning_rate": 4.7407803416436456e-05, "loss": 0.0, "step": 3119 }, { "epoch": 0.5247666302245396, "grad_norm": NaN, "learning_rate": 4.7380965743935766e-05, "loss": 0.0, "step": 3120 }, { "epoch": 0.5249348246573038, "grad_norm": NaN, "learning_rate": 4.735412882804474e-05, "loss": 0.0, "step": 3121 }, { "epoch": 0.5251030190900681, "grad_norm": NaN, "learning_rate": 4.732729267651626e-05, "loss": 0.0, "step": 3122 }, { "epoch": 0.5252712135228323, "grad_norm": NaN, "learning_rate": 4.730045729710298e-05, "loss": 0.0, "step": 3123 }, { "epoch": 0.5254394079555966, "grad_norm": NaN, "learning_rate": 4.7273622697557356e-05, "loss": 0.0, "step": 3124 }, { "epoch": 0.5256076023883609, "grad_norm": NaN, "learning_rate": 4.7246788885631605e-05, "loss": 0.0, "step": 3125 }, { "epoch": 0.5257757968211252, "grad_norm": NaN, "learning_rate": 4.72199558690777e-05, "loss": 0.0, "step": 3126 }, { "epoch": 0.5259439912538895, "grad_norm": NaN, "learning_rate": 4.719312365564739e-05, "loss": 0.0, "step": 3127 }, { "epoch": 0.5261121856866537, "grad_norm": NaN, "learning_rate": 4.716629225309224e-05, "loss": 0.0, "step": 3128 }, { "epoch": 0.526280380119418, "grad_norm": NaN, "learning_rate": 4.713946166916351e-05, "loss": 0.0, "step": 3129 }, { "epoch": 0.5264485745521823, "grad_norm": NaN, "learning_rate": 4.711263191161227e-05, "loss": 0.0, "step": 3130 }, { "epoch": 0.5266167689849466, "grad_norm": NaN, "learning_rate": 4.7085802988189315e-05, "loss": 0.0, "step": 3131 }, { "epoch": 0.5267849634177109, "grad_norm": NaN, "learning_rate": 4.7058974906645256e-05, "loss": 0.0, "step": 3132 }, { "epoch": 0.5269531578504751, "grad_norm": NaN, "learning_rate": 4.7032147674730395e-05, "loss": 0.0, "step": 3133 }, { "epoch": 0.5271213522832394, "grad_norm": NaN, "learning_rate": 4.700532130019483e-05, "loss": 0.0, "step": 3134 }, { "epoch": 0.5272895467160037, "grad_norm": NaN, "learning_rate": 4.697849579078838e-05, "loss": 0.0, "step": 3135 }, { "epoch": 0.527457741148768, "grad_norm": NaN, "learning_rate": 4.695167115426068e-05, "loss": 0.0, "step": 3136 }, { "epoch": 0.5276259355815323, "grad_norm": NaN, "learning_rate": 4.6924847398361035e-05, "loss": 0.0, "step": 3137 }, { "epoch": 0.5277941300142965, "grad_norm": NaN, "learning_rate": 4.689802453083853e-05, "loss": 0.0, "step": 3138 }, { "epoch": 0.5279623244470608, "grad_norm": NaN, "learning_rate": 4.687120255944199e-05, "loss": 0.0, "step": 3139 }, { "epoch": 0.5281305188798251, "grad_norm": NaN, "learning_rate": 4.684438149191999e-05, "loss": 0.0, "step": 3140 }, { "epoch": 0.5282987133125894, "grad_norm": NaN, "learning_rate": 4.681756133602084e-05, "loss": 0.0, "step": 3141 }, { "epoch": 0.5284669077453537, "grad_norm": NaN, "learning_rate": 4.6790742099492565e-05, "loss": 0.0, "step": 3142 }, { "epoch": 0.5286351021781179, "grad_norm": NaN, "learning_rate": 4.6763923790082935e-05, "loss": 0.0, "step": 3143 }, { "epoch": 0.5288032966108822, "grad_norm": NaN, "learning_rate": 4.67371064155395e-05, "loss": 0.0, "step": 3144 }, { "epoch": 0.5289714910436465, "grad_norm": NaN, "learning_rate": 4.671028998360947e-05, "loss": 0.0, "step": 3145 }, { "epoch": 0.5291396854764108, "grad_norm": NaN, "learning_rate": 4.6683474502039826e-05, "loss": 0.0, "step": 3146 }, { "epoch": 0.529307879909175, "grad_norm": NaN, "learning_rate": 4.665665997857723e-05, "loss": 0.0, "step": 3147 }, { "epoch": 0.5294760743419393, "grad_norm": NaN, "learning_rate": 4.6629846420968136e-05, "loss": 0.0, "step": 3148 }, { "epoch": 0.5296442687747036, "grad_norm": NaN, "learning_rate": 4.660303383695866e-05, "loss": 0.0, "step": 3149 }, { "epoch": 0.5298124632074679, "grad_norm": NaN, "learning_rate": 4.6576222234294656e-05, "loss": 0.0, "step": 3150 }, { "epoch": 0.5299806576402321, "grad_norm": NaN, "learning_rate": 4.654941162072171e-05, "loss": 0.0, "step": 3151 }, { "epoch": 0.5301488520729963, "grad_norm": NaN, "learning_rate": 4.652260200398507e-05, "loss": 0.0, "step": 3152 }, { "epoch": 0.5303170465057606, "grad_norm": NaN, "learning_rate": 4.649579339182978e-05, "loss": 0.0, "step": 3153 }, { "epoch": 0.5304852409385249, "grad_norm": NaN, "learning_rate": 4.646898579200054e-05, "loss": 0.0, "step": 3154 }, { "epoch": 0.5306534353712892, "grad_norm": NaN, "learning_rate": 4.644217921224175e-05, "loss": 0.0, "step": 3155 }, { "epoch": 0.5308216298040535, "grad_norm": NaN, "learning_rate": 4.641537366029751e-05, "loss": 0.0, "step": 3156 }, { "epoch": 0.5309898242368177, "grad_norm": NaN, "learning_rate": 4.63885691439117e-05, "loss": 0.0, "step": 3157 }, { "epoch": 0.531158018669582, "grad_norm": NaN, "learning_rate": 4.6361765670827805e-05, "loss": 0.0, "step": 3158 }, { "epoch": 0.5313262131023463, "grad_norm": NaN, "learning_rate": 4.633496324878906e-05, "loss": 0.0, "step": 3159 }, { "epoch": 0.5314944075351106, "grad_norm": NaN, "learning_rate": 4.630816188553837e-05, "loss": 0.0, "step": 3160 }, { "epoch": 0.5316626019678748, "grad_norm": NaN, "learning_rate": 4.6281361588818376e-05, "loss": 0.0, "step": 3161 }, { "epoch": 0.5318307964006391, "grad_norm": NaN, "learning_rate": 4.625456236637137e-05, "loss": 0.0, "step": 3162 }, { "epoch": 0.5319989908334034, "grad_norm": NaN, "learning_rate": 4.6227764225939337e-05, "loss": 0.0, "step": 3163 }, { "epoch": 0.5321671852661677, "grad_norm": NaN, "learning_rate": 4.620096717526395e-05, "loss": 0.0, "step": 3164 }, { "epoch": 0.532335379698932, "grad_norm": NaN, "learning_rate": 4.617417122208661e-05, "loss": 0.0, "step": 3165 }, { "epoch": 0.5325035741316962, "grad_norm": NaN, "learning_rate": 4.614737637414836e-05, "loss": 0.0, "step": 3166 }, { "epoch": 0.5326717685644605, "grad_norm": NaN, "learning_rate": 4.612058263918992e-05, "loss": 0.0, "step": 3167 }, { "epoch": 0.5328399629972248, "grad_norm": NaN, "learning_rate": 4.6093790024951695e-05, "loss": 0.0, "step": 3168 }, { "epoch": 0.5330081574299891, "grad_norm": NaN, "learning_rate": 4.606699853917379e-05, "loss": 0.0, "step": 3169 }, { "epoch": 0.5331763518627534, "grad_norm": NaN, "learning_rate": 4.6040208189595954e-05, "loss": 0.0, "step": 3170 }, { "epoch": 0.5333445462955176, "grad_norm": NaN, "learning_rate": 4.601341898395762e-05, "loss": 0.0, "step": 3171 }, { "epoch": 0.5335127407282819, "grad_norm": NaN, "learning_rate": 4.5986630929997866e-05, "loss": 0.0, "step": 3172 }, { "epoch": 0.5336809351610462, "grad_norm": NaN, "learning_rate": 4.59598440354555e-05, "loss": 0.0, "step": 3173 }, { "epoch": 0.5338491295938105, "grad_norm": NaN, "learning_rate": 4.593305830806895e-05, "loss": 0.0, "step": 3174 }, { "epoch": 0.5340173240265748, "grad_norm": NaN, "learning_rate": 4.5906273755576304e-05, "loss": 0.0, "step": 3175 }, { "epoch": 0.534185518459339, "grad_norm": NaN, "learning_rate": 4.58794903857153e-05, "loss": 0.0, "step": 3176 }, { "epoch": 0.5343537128921033, "grad_norm": NaN, "learning_rate": 4.585270820622338e-05, "loss": 0.0, "step": 3177 }, { "epoch": 0.5345219073248676, "grad_norm": NaN, "learning_rate": 4.582592722483761e-05, "loss": 0.0, "step": 3178 }, { "epoch": 0.5346901017576319, "grad_norm": NaN, "learning_rate": 4.579914744929472e-05, "loss": 0.0, "step": 3179 }, { "epoch": 0.534858296190396, "grad_norm": NaN, "learning_rate": 4.577236888733105e-05, "loss": 0.0, "step": 3180 }, { "epoch": 0.5350264906231603, "grad_norm": NaN, "learning_rate": 4.5745591546682674e-05, "loss": 0.0, "step": 3181 }, { "epoch": 0.5351946850559246, "grad_norm": NaN, "learning_rate": 4.571881543508526e-05, "loss": 0.0, "step": 3182 }, { "epoch": 0.5353628794886889, "grad_norm": NaN, "learning_rate": 4.569204056027411e-05, "loss": 0.0, "step": 3183 }, { "epoch": 0.5355310739214532, "grad_norm": NaN, "learning_rate": 4.566526692998416e-05, "loss": 0.0, "step": 3184 }, { "epoch": 0.5356992683542174, "grad_norm": NaN, "learning_rate": 4.563849455195007e-05, "loss": 0.0, "step": 3185 }, { "epoch": 0.5358674627869817, "grad_norm": NaN, "learning_rate": 4.5611723433906056e-05, "loss": 0.0, "step": 3186 }, { "epoch": 0.536035657219746, "grad_norm": NaN, "learning_rate": 4.5584953583585985e-05, "loss": 0.0, "step": 3187 }, { "epoch": 0.5362038516525103, "grad_norm": NaN, "learning_rate": 4.555818500872336e-05, "loss": 0.0, "step": 3188 }, { "epoch": 0.5363720460852746, "grad_norm": NaN, "learning_rate": 4.553141771705135e-05, "loss": 0.0, "step": 3189 }, { "epoch": 0.5365402405180388, "grad_norm": NaN, "learning_rate": 4.550465171630271e-05, "loss": 0.0, "step": 3190 }, { "epoch": 0.5367084349508031, "grad_norm": NaN, "learning_rate": 4.547788701420984e-05, "loss": 0.0, "step": 3191 }, { "epoch": 0.5368766293835674, "grad_norm": NaN, "learning_rate": 4.5451123618504744e-05, "loss": 0.0, "step": 3192 }, { "epoch": 0.5370448238163317, "grad_norm": NaN, "learning_rate": 4.54243615369191e-05, "loss": 0.0, "step": 3193 }, { "epoch": 0.537213018249096, "grad_norm": NaN, "learning_rate": 4.539760077718416e-05, "loss": 0.0, "step": 3194 }, { "epoch": 0.5373812126818602, "grad_norm": NaN, "learning_rate": 4.5370841347030796e-05, "loss": 0.0, "step": 3195 }, { "epoch": 0.5375494071146245, "grad_norm": NaN, "learning_rate": 4.534408325418952e-05, "loss": 0.0, "step": 3196 }, { "epoch": 0.5377176015473888, "grad_norm": NaN, "learning_rate": 4.531732650639045e-05, "loss": 0.0, "step": 3197 }, { "epoch": 0.5378857959801531, "grad_norm": NaN, "learning_rate": 4.5290571111363295e-05, "loss": 0.0, "step": 3198 }, { "epoch": 0.5380539904129173, "grad_norm": NaN, "learning_rate": 4.52638170768374e-05, "loss": 0.0, "step": 3199 }, { "epoch": 0.5382221848456816, "grad_norm": NaN, "learning_rate": 4.523706441054171e-05, "loss": 0.0, "step": 3200 }, { "epoch": 0.5383903792784459, "grad_norm": NaN, "learning_rate": 4.5210313120204735e-05, "loss": 0.0, "step": 3201 }, { "epoch": 0.5385585737112102, "grad_norm": NaN, "learning_rate": 4.5183563213554676e-05, "loss": 0.0, "step": 3202 }, { "epoch": 0.5387267681439745, "grad_norm": NaN, "learning_rate": 4.5156814698319264e-05, "loss": 0.0, "step": 3203 }, { "epoch": 0.5388949625767387, "grad_norm": NaN, "learning_rate": 4.513006758222583e-05, "loss": 0.0, "step": 3204 }, { "epoch": 0.539063157009503, "grad_norm": NaN, "learning_rate": 4.5103321873001314e-05, "loss": 0.0, "step": 3205 }, { "epoch": 0.5392313514422673, "grad_norm": NaN, "learning_rate": 4.507657757837228e-05, "loss": 0.0, "step": 3206 }, { "epoch": 0.5393995458750316, "grad_norm": NaN, "learning_rate": 4.5049834706064835e-05, "loss": 0.0, "step": 3207 }, { "epoch": 0.5395677403077959, "grad_norm": NaN, "learning_rate": 4.50230932638047e-05, "loss": 0.0, "step": 3208 }, { "epoch": 0.5397359347405601, "grad_norm": NaN, "learning_rate": 4.499635325931715e-05, "loss": 0.0, "step": 3209 }, { "epoch": 0.5399041291733243, "grad_norm": NaN, "learning_rate": 4.496961470032713e-05, "loss": 0.0, "step": 3210 }, { "epoch": 0.5400723236060886, "grad_norm": NaN, "learning_rate": 4.4942877594559067e-05, "loss": 0.0, "step": 3211 }, { "epoch": 0.5402405180388529, "grad_norm": NaN, "learning_rate": 4.4916141949737034e-05, "loss": 0.0, "step": 3212 }, { "epoch": 0.5404087124716171, "grad_norm": NaN, "learning_rate": 4.488940777358462e-05, "loss": 0.0, "step": 3213 }, { "epoch": 0.5405769069043814, "grad_norm": NaN, "learning_rate": 4.4862675073825086e-05, "loss": 0.0, "step": 3214 }, { "epoch": 0.5407451013371457, "grad_norm": NaN, "learning_rate": 4.483594385818118e-05, "loss": 0.0, "step": 3215 }, { "epoch": 0.54091329576991, "grad_norm": NaN, "learning_rate": 4.4809214134375254e-05, "loss": 0.0, "step": 3216 }, { "epoch": 0.5410814902026743, "grad_norm": NaN, "learning_rate": 4.478248591012921e-05, "loss": 0.0, "step": 3217 }, { "epoch": 0.5412496846354385, "grad_norm": NaN, "learning_rate": 4.4755759193164566e-05, "loss": 0.0, "step": 3218 }, { "epoch": 0.5414178790682028, "grad_norm": NaN, "learning_rate": 4.472903399120235e-05, "loss": 0.0, "step": 3219 }, { "epoch": 0.5415860735009671, "grad_norm": NaN, "learning_rate": 4.470231031196317e-05, "loss": 0.0, "step": 3220 }, { "epoch": 0.5417542679337314, "grad_norm": NaN, "learning_rate": 4.467558816316718e-05, "loss": 0.0, "step": 3221 }, { "epoch": 0.5419224623664957, "grad_norm": NaN, "learning_rate": 4.464886755253416e-05, "loss": 0.0, "step": 3222 }, { "epoch": 0.5420906567992599, "grad_norm": NaN, "learning_rate": 4.462214848778337e-05, "loss": 0.0, "step": 3223 }, { "epoch": 0.5422588512320242, "grad_norm": NaN, "learning_rate": 4.459543097663364e-05, "loss": 0.0, "step": 3224 }, { "epoch": 0.5424270456647885, "grad_norm": NaN, "learning_rate": 4.456871502680336e-05, "loss": 0.0, "step": 3225 }, { "epoch": 0.5425952400975528, "grad_norm": NaN, "learning_rate": 4.454200064601048e-05, "loss": 0.0, "step": 3226 }, { "epoch": 0.5427634345303171, "grad_norm": NaN, "learning_rate": 4.451528784197249e-05, "loss": 0.0, "step": 3227 }, { "epoch": 0.5429316289630813, "grad_norm": NaN, "learning_rate": 4.4488576622406404e-05, "loss": 0.0, "step": 3228 }, { "epoch": 0.5430998233958456, "grad_norm": NaN, "learning_rate": 4.4461866995028776e-05, "loss": 0.0, "step": 3229 }, { "epoch": 0.5432680178286099, "grad_norm": NaN, "learning_rate": 4.443515896755577e-05, "loss": 0.0, "step": 3230 }, { "epoch": 0.5434362122613742, "grad_norm": NaN, "learning_rate": 4.4408452547703005e-05, "loss": 0.0, "step": 3231 }, { "epoch": 0.5436044066941385, "grad_norm": NaN, "learning_rate": 4.4381747743185675e-05, "loss": 0.0, "step": 3232 }, { "epoch": 0.5437726011269027, "grad_norm": NaN, "learning_rate": 4.4355044561718476e-05, "loss": 0.0, "step": 3233 }, { "epoch": 0.543940795559667, "grad_norm": NaN, "learning_rate": 4.4328343011015695e-05, "loss": 0.0, "step": 3234 }, { "epoch": 0.5441089899924313, "grad_norm": NaN, "learning_rate": 4.4301643098791086e-05, "loss": 0.0, "step": 3235 }, { "epoch": 0.5442771844251956, "grad_norm": NaN, "learning_rate": 4.427494483275796e-05, "loss": 0.0, "step": 3236 }, { "epoch": 0.5444453788579598, "grad_norm": NaN, "learning_rate": 4.4248248220629124e-05, "loss": 0.0, "step": 3237 }, { "epoch": 0.5446135732907241, "grad_norm": NaN, "learning_rate": 4.422155327011698e-05, "loss": 0.0, "step": 3238 }, { "epoch": 0.5447817677234883, "grad_norm": NaN, "learning_rate": 4.4194859988933376e-05, "loss": 0.0, "step": 3239 }, { "epoch": 0.5449499621562526, "grad_norm": NaN, "learning_rate": 4.4168168384789694e-05, "loss": 0.0, "step": 3240 }, { "epoch": 0.5451181565890169, "grad_norm": NaN, "learning_rate": 4.4141478465396836e-05, "loss": 0.0, "step": 3241 }, { "epoch": 0.5452863510217811, "grad_norm": NaN, "learning_rate": 4.411479023846524e-05, "loss": 0.0, "step": 3242 }, { "epoch": 0.5454545454545454, "grad_norm": NaN, "learning_rate": 4.408810371170483e-05, "loss": 0.0, "step": 3243 }, { "epoch": 0.5456227398873097, "grad_norm": NaN, "learning_rate": 4.406141889282504e-05, "loss": 0.0, "step": 3244 }, { "epoch": 0.545790934320074, "grad_norm": NaN, "learning_rate": 4.4034735789534795e-05, "loss": 0.0, "step": 3245 }, { "epoch": 0.5459591287528383, "grad_norm": NaN, "learning_rate": 4.400805440954258e-05, "loss": 0.0, "step": 3246 }, { "epoch": 0.5461273231856025, "grad_norm": NaN, "learning_rate": 4.398137476055634e-05, "loss": 0.0, "step": 3247 }, { "epoch": 0.5462955176183668, "grad_norm": NaN, "learning_rate": 4.395469685028352e-05, "loss": 0.0, "step": 3248 }, { "epoch": 0.5464637120511311, "grad_norm": NaN, "learning_rate": 4.392802068643106e-05, "loss": 0.0, "step": 3249 }, { "epoch": 0.5466319064838954, "grad_norm": NaN, "learning_rate": 4.39013462767054e-05, "loss": 0.0, "step": 3250 }, { "epoch": 0.5468001009166596, "grad_norm": NaN, "learning_rate": 4.3874673628812515e-05, "loss": 0.0, "step": 3251 }, { "epoch": 0.5469682953494239, "grad_norm": NaN, "learning_rate": 4.38480027504578e-05, "loss": 0.0, "step": 3252 }, { "epoch": 0.5471364897821882, "grad_norm": NaN, "learning_rate": 4.382133364934619e-05, "loss": 0.0, "step": 3253 }, { "epoch": 0.5473046842149525, "grad_norm": NaN, "learning_rate": 4.379466633318208e-05, "loss": 0.0, "step": 3254 }, { "epoch": 0.5474728786477168, "grad_norm": NaN, "learning_rate": 4.376800080966936e-05, "loss": 0.0, "step": 3255 }, { "epoch": 0.547641073080481, "grad_norm": NaN, "learning_rate": 4.3741337086511414e-05, "loss": 0.0, "step": 3256 }, { "epoch": 0.5478092675132453, "grad_norm": NaN, "learning_rate": 4.371467517141108e-05, "loss": 0.0, "step": 3257 }, { "epoch": 0.5479774619460096, "grad_norm": NaN, "learning_rate": 4.368801507207067e-05, "loss": 0.0, "step": 3258 }, { "epoch": 0.5481456563787739, "grad_norm": NaN, "learning_rate": 4.3661356796192025e-05, "loss": 0.0, "step": 3259 }, { "epoch": 0.5483138508115382, "grad_norm": NaN, "learning_rate": 4.363470035147641e-05, "loss": 0.0, "step": 3260 }, { "epoch": 0.5484820452443024, "grad_norm": NaN, "learning_rate": 4.360804574562456e-05, "loss": 0.0, "step": 3261 }, { "epoch": 0.5486502396770667, "grad_norm": NaN, "learning_rate": 4.358139298633669e-05, "loss": 0.0, "step": 3262 }, { "epoch": 0.548818434109831, "grad_norm": NaN, "learning_rate": 4.355474208131251e-05, "loss": 0.0, "step": 3263 }, { "epoch": 0.5489866285425953, "grad_norm": NaN, "learning_rate": 4.352809303825115e-05, "loss": 0.0, "step": 3264 }, { "epoch": 0.5491548229753596, "grad_norm": NaN, "learning_rate": 4.350144586485122e-05, "loss": 0.0, "step": 3265 }, { "epoch": 0.5493230174081238, "grad_norm": NaN, "learning_rate": 4.347480056881077e-05, "loss": 0.0, "step": 3266 }, { "epoch": 0.5494912118408881, "grad_norm": NaN, "learning_rate": 4.3448157157827376e-05, "loss": 0.0, "step": 3267 }, { "epoch": 0.5496594062736524, "grad_norm": NaN, "learning_rate": 4.342151563959799e-05, "loss": 0.0, "step": 3268 }, { "epoch": 0.5498276007064166, "grad_norm": NaN, "learning_rate": 4.3394876021819055e-05, "loss": 0.0, "step": 3269 }, { "epoch": 0.5499957951391808, "grad_norm": NaN, "learning_rate": 4.336823831218644e-05, "loss": 0.0, "step": 3270 }, { "epoch": 0.5501639895719451, "grad_norm": NaN, "learning_rate": 4.334160251839551e-05, "loss": 0.0, "step": 3271 }, { "epoch": 0.5503321840047094, "grad_norm": NaN, "learning_rate": 4.331496864814103e-05, "loss": 0.0, "step": 3272 }, { "epoch": 0.5505003784374737, "grad_norm": NaN, "learning_rate": 4.328833670911724e-05, "loss": 0.0, "step": 3273 }, { "epoch": 0.550668572870238, "grad_norm": NaN, "learning_rate": 4.3261706709017774e-05, "loss": 0.0, "step": 3274 }, { "epoch": 0.5508367673030022, "grad_norm": NaN, "learning_rate": 4.323507865553579e-05, "loss": 0.0, "step": 3275 }, { "epoch": 0.5510049617357665, "grad_norm": NaN, "learning_rate": 4.3208452556363807e-05, "loss": 0.0, "step": 3276 }, { "epoch": 0.5511731561685308, "grad_norm": NaN, "learning_rate": 4.318182841919382e-05, "loss": 0.0, "step": 3277 }, { "epoch": 0.5513413506012951, "grad_norm": NaN, "learning_rate": 4.31552062517172e-05, "loss": 0.0, "step": 3278 }, { "epoch": 0.5515095450340594, "grad_norm": NaN, "learning_rate": 4.312858606162485e-05, "loss": 0.0, "step": 3279 }, { "epoch": 0.5516777394668236, "grad_norm": NaN, "learning_rate": 4.310196785660703e-05, "loss": 0.0, "step": 3280 }, { "epoch": 0.5518459338995879, "grad_norm": NaN, "learning_rate": 4.307535164435344e-05, "loss": 0.0, "step": 3281 }, { "epoch": 0.5520141283323522, "grad_norm": NaN, "learning_rate": 4.304873743255318e-05, "loss": 0.0, "step": 3282 }, { "epoch": 0.5521823227651165, "grad_norm": NaN, "learning_rate": 4.302212522889482e-05, "loss": 0.0, "step": 3283 }, { "epoch": 0.5523505171978808, "grad_norm": NaN, "learning_rate": 4.299551504106634e-05, "loss": 0.0, "step": 3284 }, { "epoch": 0.552518711630645, "grad_norm": NaN, "learning_rate": 4.29689068767551e-05, "loss": 0.0, "step": 3285 }, { "epoch": 0.5526869060634093, "grad_norm": NaN, "learning_rate": 4.294230074364789e-05, "loss": 0.0, "step": 3286 }, { "epoch": 0.5528551004961736, "grad_norm": NaN, "learning_rate": 4.2915696649430964e-05, "loss": 0.0, "step": 3287 }, { "epoch": 0.5530232949289379, "grad_norm": NaN, "learning_rate": 4.288909460178993e-05, "loss": 0.0, "step": 3288 }, { "epoch": 0.5531914893617021, "grad_norm": NaN, "learning_rate": 4.286249460840982e-05, "loss": 0.0, "step": 3289 }, { "epoch": 0.5533596837944664, "grad_norm": NaN, "learning_rate": 4.283589667697506e-05, "loss": 0.0, "step": 3290 }, { "epoch": 0.5535278782272307, "grad_norm": NaN, "learning_rate": 4.280930081516952e-05, "loss": 0.0, "step": 3291 }, { "epoch": 0.553696072659995, "grad_norm": NaN, "learning_rate": 4.278270703067644e-05, "loss": 0.0, "step": 3292 }, { "epoch": 0.5538642670927593, "grad_norm": NaN, "learning_rate": 4.2756115331178454e-05, "loss": 0.0, "step": 3293 }, { "epoch": 0.5540324615255235, "grad_norm": NaN, "learning_rate": 4.2729525724357585e-05, "loss": 0.0, "step": 3294 }, { "epoch": 0.5542006559582878, "grad_norm": NaN, "learning_rate": 4.2702938217895334e-05, "loss": 0.0, "step": 3295 }, { "epoch": 0.5543688503910521, "grad_norm": NaN, "learning_rate": 4.267635281947249e-05, "loss": 0.0, "step": 3296 }, { "epoch": 0.5545370448238164, "grad_norm": NaN, "learning_rate": 4.264976953676929e-05, "loss": 0.0, "step": 3297 }, { "epoch": 0.5547052392565807, "grad_norm": NaN, "learning_rate": 4.262318837746535e-05, "loss": 0.0, "step": 3298 }, { "epoch": 0.5548734336893448, "grad_norm": NaN, "learning_rate": 4.259660934923965e-05, "loss": 0.0, "step": 3299 }, { "epoch": 0.5550416281221091, "grad_norm": NaN, "learning_rate": 4.257003245977059e-05, "loss": 0.0, "step": 3300 }, { "epoch": 0.5552098225548734, "grad_norm": NaN, "learning_rate": 4.2543457716735935e-05, "loss": 0.0, "step": 3301 }, { "epoch": 0.5553780169876377, "grad_norm": NaN, "learning_rate": 4.251688512781283e-05, "loss": 0.0, "step": 3302 }, { "epoch": 0.555546211420402, "grad_norm": NaN, "learning_rate": 4.249031470067778e-05, "loss": 0.0, "step": 3303 }, { "epoch": 0.5557144058531662, "grad_norm": NaN, "learning_rate": 4.2463746443006706e-05, "loss": 0.0, "step": 3304 }, { "epoch": 0.5558826002859305, "grad_norm": NaN, "learning_rate": 4.243718036247488e-05, "loss": 0.0, "step": 3305 }, { "epoch": 0.5560507947186948, "grad_norm": NaN, "learning_rate": 4.241061646675695e-05, "loss": 0.0, "step": 3306 }, { "epoch": 0.5562189891514591, "grad_norm": NaN, "learning_rate": 4.238405476352689e-05, "loss": 0.0, "step": 3307 }, { "epoch": 0.5563871835842233, "grad_norm": NaN, "learning_rate": 4.235749526045813e-05, "loss": 0.0, "step": 3308 }, { "epoch": 0.5565553780169876, "grad_norm": NaN, "learning_rate": 4.233093796522338e-05, "loss": 0.0, "step": 3309 }, { "epoch": 0.5567235724497519, "grad_norm": NaN, "learning_rate": 4.2304382885494753e-05, "loss": 0.0, "step": 3310 }, { "epoch": 0.5568917668825162, "grad_norm": NaN, "learning_rate": 4.227783002894369e-05, "loss": 0.0, "step": 3311 }, { "epoch": 0.5570599613152805, "grad_norm": NaN, "learning_rate": 4.225127940324107e-05, "loss": 0.0, "step": 3312 }, { "epoch": 0.5572281557480447, "grad_norm": NaN, "learning_rate": 4.222473101605703e-05, "loss": 0.0, "step": 3313 }, { "epoch": 0.557396350180809, "grad_norm": NaN, "learning_rate": 4.219818487506112e-05, "loss": 0.0, "step": 3314 }, { "epoch": 0.5575645446135733, "grad_norm": NaN, "learning_rate": 4.2171640987922184e-05, "loss": 0.0, "step": 3315 }, { "epoch": 0.5577327390463376, "grad_norm": NaN, "learning_rate": 4.214509936230851e-05, "loss": 0.0, "step": 3316 }, { "epoch": 0.5579009334791019, "grad_norm": NaN, "learning_rate": 4.2118560005887656e-05, "loss": 0.0, "step": 3317 }, { "epoch": 0.5580691279118661, "grad_norm": NaN, "learning_rate": 4.209202292632653e-05, "loss": 0.0, "step": 3318 }, { "epoch": 0.5582373223446304, "grad_norm": NaN, "learning_rate": 4.20654881312914e-05, "loss": 0.0, "step": 3319 }, { "epoch": 0.5584055167773947, "grad_norm": NaN, "learning_rate": 4.203895562844789e-05, "loss": 0.0, "step": 3320 }, { "epoch": 0.558573711210159, "grad_norm": NaN, "learning_rate": 4.201242542546092e-05, "loss": 0.0, "step": 3321 }, { "epoch": 0.5587419056429233, "grad_norm": NaN, "learning_rate": 4.198589752999479e-05, "loss": 0.0, "step": 3322 }, { "epoch": 0.5589101000756875, "grad_norm": NaN, "learning_rate": 4.195937194971307e-05, "loss": 0.0, "step": 3323 }, { "epoch": 0.5590782945084518, "grad_norm": NaN, "learning_rate": 4.193284869227876e-05, "loss": 0.0, "step": 3324 }, { "epoch": 0.5592464889412161, "grad_norm": NaN, "learning_rate": 4.1906327765354105e-05, "loss": 0.0, "step": 3325 }, { "epoch": 0.5594146833739804, "grad_norm": NaN, "learning_rate": 4.18798091766007e-05, "loss": 0.0, "step": 3326 }, { "epoch": 0.5595828778067446, "grad_norm": NaN, "learning_rate": 4.185329293367947e-05, "loss": 0.0, "step": 3327 }, { "epoch": 0.5597510722395088, "grad_norm": NaN, "learning_rate": 4.182677904425067e-05, "loss": 0.0, "step": 3328 }, { "epoch": 0.5599192666722731, "grad_norm": NaN, "learning_rate": 4.180026751597386e-05, "loss": 0.0, "step": 3329 }, { "epoch": 0.5600874611050374, "grad_norm": NaN, "learning_rate": 4.177375835650792e-05, "loss": 0.0, "step": 3330 }, { "epoch": 0.5602556555378017, "grad_norm": NaN, "learning_rate": 4.174725157351103e-05, "loss": 0.0, "step": 3331 }, { "epoch": 0.5604238499705659, "grad_norm": NaN, "learning_rate": 4.172074717464075e-05, "loss": 0.0, "step": 3332 }, { "epoch": 0.5605920444033302, "grad_norm": NaN, "learning_rate": 4.169424516755387e-05, "loss": 0.0, "step": 3333 }, { "epoch": 0.5607602388360945, "grad_norm": NaN, "learning_rate": 4.166774555990654e-05, "loss": 0.0, "step": 3334 }, { "epoch": 0.5609284332688588, "grad_norm": NaN, "learning_rate": 4.164124835935418e-05, "loss": 0.0, "step": 3335 }, { "epoch": 0.561096627701623, "grad_norm": NaN, "learning_rate": 4.161475357355155e-05, "loss": 0.0, "step": 3336 }, { "epoch": 0.5612648221343873, "grad_norm": NaN, "learning_rate": 4.15882612101527e-05, "loss": 0.0, "step": 3337 }, { "epoch": 0.5614330165671516, "grad_norm": NaN, "learning_rate": 4.156177127681096e-05, "loss": 0.0, "step": 3338 }, { "epoch": 0.5616012109999159, "grad_norm": NaN, "learning_rate": 4.153528378117897e-05, "loss": 0.0, "step": 3339 }, { "epoch": 0.5617694054326802, "grad_norm": NaN, "learning_rate": 4.150879873090871e-05, "loss": 0.0, "step": 3340 }, { "epoch": 0.5619375998654444, "grad_norm": NaN, "learning_rate": 4.1482316133651375e-05, "loss": 0.0, "step": 3341 }, { "epoch": 0.5621057942982087, "grad_norm": NaN, "learning_rate": 4.1455835997057525e-05, "loss": 0.0, "step": 3342 }, { "epoch": 0.562273988730973, "grad_norm": NaN, "learning_rate": 4.142935832877692e-05, "loss": 0.0, "step": 3343 }, { "epoch": 0.5624421831637373, "grad_norm": NaN, "learning_rate": 4.1402883136458725e-05, "loss": 0.0, "step": 3344 }, { "epoch": 0.5626103775965016, "grad_norm": NaN, "learning_rate": 4.137641042775129e-05, "loss": 0.0, "step": 3345 }, { "epoch": 0.5627785720292658, "grad_norm": NaN, "learning_rate": 4.134994021030231e-05, "loss": 0.0, "step": 3346 }, { "epoch": 0.5629467664620301, "grad_norm": NaN, "learning_rate": 4.1323472491758694e-05, "loss": 0.0, "step": 3347 }, { "epoch": 0.5631149608947944, "grad_norm": NaN, "learning_rate": 4.12970072797667e-05, "loss": 0.0, "step": 3348 }, { "epoch": 0.5632831553275587, "grad_norm": NaN, "learning_rate": 4.1270544581971826e-05, "loss": 0.0, "step": 3349 }, { "epoch": 0.563451349760323, "grad_norm": NaN, "learning_rate": 4.124408440601885e-05, "loss": 0.0, "step": 3350 }, { "epoch": 0.5636195441930872, "grad_norm": NaN, "learning_rate": 4.121762675955182e-05, "loss": 0.0, "step": 3351 }, { "epoch": 0.5637877386258515, "grad_norm": NaN, "learning_rate": 4.119117165021402e-05, "loss": 0.0, "step": 3352 }, { "epoch": 0.5639559330586158, "grad_norm": NaN, "learning_rate": 4.11647190856481e-05, "loss": 0.0, "step": 3353 }, { "epoch": 0.5641241274913801, "grad_norm": NaN, "learning_rate": 4.1138269073495864e-05, "loss": 0.0, "step": 3354 }, { "epoch": 0.5642923219241444, "grad_norm": NaN, "learning_rate": 4.1111821621398446e-05, "loss": 0.0, "step": 3355 }, { "epoch": 0.5644605163569086, "grad_norm": NaN, "learning_rate": 4.108537673699619e-05, "loss": 0.0, "step": 3356 }, { "epoch": 0.5646287107896729, "grad_norm": NaN, "learning_rate": 4.105893442792876e-05, "loss": 0.0, "step": 3357 }, { "epoch": 0.5647969052224371, "grad_norm": NaN, "learning_rate": 4.103249470183502e-05, "loss": 0.0, "step": 3358 }, { "epoch": 0.5649650996552014, "grad_norm": NaN, "learning_rate": 4.1006057566353126e-05, "loss": 0.0, "step": 3359 }, { "epoch": 0.5651332940879656, "grad_norm": NaN, "learning_rate": 4.0979623029120436e-05, "loss": 0.0, "step": 3360 }, { "epoch": 0.5653014885207299, "grad_norm": NaN, "learning_rate": 4.0953191097773645e-05, "loss": 0.0, "step": 3361 }, { "epoch": 0.5654696829534942, "grad_norm": NaN, "learning_rate": 4.092676177994862e-05, "loss": 0.0, "step": 3362 }, { "epoch": 0.5656378773862585, "grad_norm": NaN, "learning_rate": 4.0900335083280484e-05, "loss": 0.0, "step": 3363 }, { "epoch": 0.5658060718190228, "grad_norm": NaN, "learning_rate": 4.087391101540361e-05, "loss": 0.0, "step": 3364 }, { "epoch": 0.565974266251787, "grad_norm": NaN, "learning_rate": 4.084748958395164e-05, "loss": 0.0, "step": 3365 }, { "epoch": 0.5661424606845513, "grad_norm": NaN, "learning_rate": 4.082107079655741e-05, "loss": 0.0, "step": 3366 }, { "epoch": 0.5663106551173156, "grad_norm": NaN, "learning_rate": 4.0794654660853024e-05, "loss": 0.0, "step": 3367 }, { "epoch": 0.5664788495500799, "grad_norm": NaN, "learning_rate": 4.0768241184469775e-05, "loss": 0.0, "step": 3368 }, { "epoch": 0.5666470439828442, "grad_norm": NaN, "learning_rate": 4.074183037503827e-05, "loss": 0.0, "step": 3369 }, { "epoch": 0.5668152384156084, "grad_norm": NaN, "learning_rate": 4.071542224018827e-05, "loss": 0.0, "step": 3370 }, { "epoch": 0.5669834328483727, "grad_norm": NaN, "learning_rate": 4.068901678754879e-05, "loss": 0.0, "step": 3371 }, { "epoch": 0.567151627281137, "grad_norm": NaN, "learning_rate": 4.066261402474805e-05, "loss": 0.0, "step": 3372 }, { "epoch": 0.5673198217139013, "grad_norm": NaN, "learning_rate": 4.063621395941356e-05, "loss": 0.0, "step": 3373 }, { "epoch": 0.5674880161466656, "grad_norm": NaN, "learning_rate": 4.0609816599171966e-05, "loss": 0.0, "step": 3374 }, { "epoch": 0.5676562105794298, "grad_norm": NaN, "learning_rate": 4.058342195164919e-05, "loss": 0.0, "step": 3375 }, { "epoch": 0.5678244050121941, "grad_norm": NaN, "learning_rate": 4.055703002447033e-05, "loss": 0.0, "step": 3376 }, { "epoch": 0.5679925994449584, "grad_norm": NaN, "learning_rate": 4.053064082525974e-05, "loss": 0.0, "step": 3377 }, { "epoch": 0.5681607938777227, "grad_norm": NaN, "learning_rate": 4.0504254361640946e-05, "loss": 0.0, "step": 3378 }, { "epoch": 0.568328988310487, "grad_norm": NaN, "learning_rate": 4.047787064123671e-05, "loss": 0.0, "step": 3379 }, { "epoch": 0.5684971827432512, "grad_norm": NaN, "learning_rate": 4.045148967166897e-05, "loss": 0.0, "step": 3380 }, { "epoch": 0.5686653771760155, "grad_norm": NaN, "learning_rate": 4.042511146055894e-05, "loss": 0.0, "step": 3381 }, { "epoch": 0.5688335716087798, "grad_norm": NaN, "learning_rate": 4.039873601552696e-05, "loss": 0.0, "step": 3382 }, { "epoch": 0.5690017660415441, "grad_norm": NaN, "learning_rate": 4.037236334419261e-05, "loss": 0.0, "step": 3383 }, { "epoch": 0.5691699604743083, "grad_norm": NaN, "learning_rate": 4.034599345417464e-05, "loss": 0.0, "step": 3384 }, { "epoch": 0.5693381549070726, "grad_norm": NaN, "learning_rate": 4.031962635309104e-05, "loss": 0.0, "step": 3385 }, { "epoch": 0.5695063493398369, "grad_norm": NaN, "learning_rate": 4.029326204855896e-05, "loss": 0.0, "step": 3386 }, { "epoch": 0.5696745437726011, "grad_norm": NaN, "learning_rate": 4.0266900548194755e-05, "loss": 0.0, "step": 3387 }, { "epoch": 0.5698427382053654, "grad_norm": NaN, "learning_rate": 4.024054185961394e-05, "loss": 0.0, "step": 3388 }, { "epoch": 0.5700109326381296, "grad_norm": NaN, "learning_rate": 4.02141859904313e-05, "loss": 0.0, "step": 3389 }, { "epoch": 0.5701791270708939, "grad_norm": NaN, "learning_rate": 4.0187832948260705e-05, "loss": 0.0, "step": 3390 }, { "epoch": 0.5703473215036582, "grad_norm": NaN, "learning_rate": 4.016148274071528e-05, "loss": 0.0, "step": 3391 }, { "epoch": 0.5705155159364225, "grad_norm": NaN, "learning_rate": 4.0135135375407276e-05, "loss": 0.0, "step": 3392 }, { "epoch": 0.5706837103691867, "grad_norm": NaN, "learning_rate": 4.0108790859948184e-05, "loss": 0.0, "step": 3393 }, { "epoch": 0.570851904801951, "grad_norm": NaN, "learning_rate": 4.008244920194862e-05, "loss": 0.0, "step": 3394 }, { "epoch": 0.5710200992347153, "grad_norm": NaN, "learning_rate": 4.005611040901841e-05, "loss": 0.0, "step": 3395 }, { "epoch": 0.5711882936674796, "grad_norm": NaN, "learning_rate": 4.0029774488766494e-05, "loss": 0.0, "step": 3396 }, { "epoch": 0.5713564881002439, "grad_norm": NaN, "learning_rate": 4.000344144880108e-05, "loss": 0.0, "step": 3397 }, { "epoch": 0.5715246825330081, "grad_norm": NaN, "learning_rate": 3.997711129672946e-05, "loss": 0.0, "step": 3398 }, { "epoch": 0.5716928769657724, "grad_norm": NaN, "learning_rate": 3.995078404015813e-05, "loss": 0.0, "step": 3399 }, { "epoch": 0.5718610713985367, "grad_norm": NaN, "learning_rate": 3.992445968669273e-05, "loss": 0.0, "step": 3400 }, { "epoch": 0.572029265831301, "grad_norm": NaN, "learning_rate": 3.989813824393806e-05, "loss": 0.0, "step": 3401 }, { "epoch": 0.5721974602640653, "grad_norm": NaN, "learning_rate": 3.9871819719498116e-05, "loss": 0.0, "step": 3402 }, { "epoch": 0.5723656546968295, "grad_norm": NaN, "learning_rate": 3.984550412097601e-05, "loss": 0.0, "step": 3403 }, { "epoch": 0.5725338491295938, "grad_norm": NaN, "learning_rate": 3.981919145597404e-05, "loss": 0.0, "step": 3404 }, { "epoch": 0.5727020435623581, "grad_norm": NaN, "learning_rate": 3.979288173209359e-05, "loss": 0.0, "step": 3405 }, { "epoch": 0.5728702379951224, "grad_norm": NaN, "learning_rate": 3.976657495693531e-05, "loss": 0.0, "step": 3406 }, { "epoch": 0.5730384324278867, "grad_norm": NaN, "learning_rate": 3.97402711380989e-05, "loss": 0.0, "step": 3407 }, { "epoch": 0.5732066268606509, "grad_norm": NaN, "learning_rate": 3.9713970283183244e-05, "loss": 0.0, "step": 3408 }, { "epoch": 0.5733748212934152, "grad_norm": NaN, "learning_rate": 3.9687672399786344e-05, "loss": 0.0, "step": 3409 }, { "epoch": 0.5735430157261795, "grad_norm": NaN, "learning_rate": 3.96613774955054e-05, "loss": 0.0, "step": 3410 }, { "epoch": 0.5737112101589438, "grad_norm": NaN, "learning_rate": 3.96350855779367e-05, "loss": 0.0, "step": 3411 }, { "epoch": 0.573879404591708, "grad_norm": NaN, "learning_rate": 3.960879665467569e-05, "loss": 0.0, "step": 3412 }, { "epoch": 0.5740475990244723, "grad_norm": NaN, "learning_rate": 3.958251073331693e-05, "loss": 0.0, "step": 3413 }, { "epoch": 0.5742157934572366, "grad_norm": NaN, "learning_rate": 3.955622782145413e-05, "loss": 0.0, "step": 3414 }, { "epoch": 0.5743839878900009, "grad_norm": NaN, "learning_rate": 3.9529947926680144e-05, "loss": 0.0, "step": 3415 }, { "epoch": 0.5745521823227652, "grad_norm": NaN, "learning_rate": 3.950367105658693e-05, "loss": 0.0, "step": 3416 }, { "epoch": 0.5747203767555293, "grad_norm": NaN, "learning_rate": 3.9477397218765565e-05, "loss": 0.0, "step": 3417 }, { "epoch": 0.5748885711882936, "grad_norm": NaN, "learning_rate": 3.9451126420806304e-05, "loss": 0.0, "step": 3418 }, { "epoch": 0.5750567656210579, "grad_norm": NaN, "learning_rate": 3.942485867029847e-05, "loss": 0.0, "step": 3419 }, { "epoch": 0.5752249600538222, "grad_norm": NaN, "learning_rate": 3.939859397483051e-05, "loss": 0.0, "step": 3420 }, { "epoch": 0.5753931544865865, "grad_norm": NaN, "learning_rate": 3.937233234198999e-05, "loss": 0.0, "step": 3421 }, { "epoch": 0.5755613489193507, "grad_norm": NaN, "learning_rate": 3.9346073779363646e-05, "loss": 0.0, "step": 3422 }, { "epoch": 0.575729543352115, "grad_norm": NaN, "learning_rate": 3.931981829453725e-05, "loss": 0.0, "step": 3423 }, { "epoch": 0.5758977377848793, "grad_norm": NaN, "learning_rate": 3.929356589509572e-05, "loss": 0.0, "step": 3424 }, { "epoch": 0.5760659322176436, "grad_norm": NaN, "learning_rate": 3.926731658862307e-05, "loss": 0.0, "step": 3425 }, { "epoch": 0.5762341266504079, "grad_norm": NaN, "learning_rate": 3.924107038270246e-05, "loss": 0.0, "step": 3426 }, { "epoch": 0.5764023210831721, "grad_norm": NaN, "learning_rate": 3.921482728491611e-05, "loss": 0.0, "step": 3427 }, { "epoch": 0.5765705155159364, "grad_norm": NaN, "learning_rate": 3.918858730284536e-05, "loss": 0.0, "step": 3428 }, { "epoch": 0.5767387099487007, "grad_norm": NaN, "learning_rate": 3.9162350444070636e-05, "loss": 0.0, "step": 3429 }, { "epoch": 0.576906904381465, "grad_norm": NaN, "learning_rate": 3.913611671617148e-05, "loss": 0.0, "step": 3430 }, { "epoch": 0.5770750988142292, "grad_norm": NaN, "learning_rate": 3.910988612672653e-05, "loss": 0.0, "step": 3431 }, { "epoch": 0.5772432932469935, "grad_norm": NaN, "learning_rate": 3.90836586833135e-05, "loss": 0.0, "step": 3432 }, { "epoch": 0.5774114876797578, "grad_norm": NaN, "learning_rate": 3.905743439350918e-05, "loss": 0.0, "step": 3433 }, { "epoch": 0.5775796821125221, "grad_norm": NaN, "learning_rate": 3.903121326488953e-05, "loss": 0.0, "step": 3434 }, { "epoch": 0.5777478765452864, "grad_norm": NaN, "learning_rate": 3.900499530502951e-05, "loss": 0.0, "step": 3435 }, { "epoch": 0.5779160709780506, "grad_norm": NaN, "learning_rate": 3.897878052150319e-05, "loss": 0.0, "step": 3436 }, { "epoch": 0.5780842654108149, "grad_norm": NaN, "learning_rate": 3.8952568921883713e-05, "loss": 0.0, "step": 3437 }, { "epoch": 0.5782524598435792, "grad_norm": NaN, "learning_rate": 3.8926360513743356e-05, "loss": 0.0, "step": 3438 }, { "epoch": 0.5784206542763435, "grad_norm": NaN, "learning_rate": 3.890015530465342e-05, "loss": 0.0, "step": 3439 }, { "epoch": 0.5785888487091078, "grad_norm": NaN, "learning_rate": 3.887395330218429e-05, "loss": 0.0, "step": 3440 }, { "epoch": 0.578757043141872, "grad_norm": NaN, "learning_rate": 3.884775451390542e-05, "loss": 0.0, "step": 3441 }, { "epoch": 0.5789252375746363, "grad_norm": NaN, "learning_rate": 3.882155894738536e-05, "loss": 0.0, "step": 3442 }, { "epoch": 0.5790934320074006, "grad_norm": NaN, "learning_rate": 3.8795366610191725e-05, "loss": 0.0, "step": 3443 }, { "epoch": 0.5792616264401649, "grad_norm": NaN, "learning_rate": 3.876917750989117e-05, "loss": 0.0, "step": 3444 }, { "epoch": 0.5794298208729292, "grad_norm": NaN, "learning_rate": 3.8742991654049426e-05, "loss": 0.0, "step": 3445 }, { "epoch": 0.5795980153056934, "grad_norm": NaN, "learning_rate": 3.871680905023133e-05, "loss": 0.0, "step": 3446 }, { "epoch": 0.5797662097384576, "grad_norm": NaN, "learning_rate": 3.869062970600071e-05, "loss": 0.0, "step": 3447 }, { "epoch": 0.5799344041712219, "grad_norm": NaN, "learning_rate": 3.866445362892051e-05, "loss": 0.0, "step": 3448 }, { "epoch": 0.5801025986039862, "grad_norm": NaN, "learning_rate": 3.863828082655268e-05, "loss": 0.0, "step": 3449 }, { "epoch": 0.5802707930367504, "grad_norm": NaN, "learning_rate": 3.8612111306458254e-05, "loss": 0.0, "step": 3450 }, { "epoch": 0.5804389874695147, "grad_norm": NaN, "learning_rate": 3.8585945076197325e-05, "loss": 0.0, "step": 3451 }, { "epoch": 0.580607181902279, "grad_norm": NaN, "learning_rate": 3.855978214332903e-05, "loss": 0.0, "step": 3452 }, { "epoch": 0.5807753763350433, "grad_norm": NaN, "learning_rate": 3.8533622515411525e-05, "loss": 0.0, "step": 3453 }, { "epoch": 0.5809435707678076, "grad_norm": NaN, "learning_rate": 3.8507466200002027e-05, "loss": 0.0, "step": 3454 }, { "epoch": 0.5811117652005718, "grad_norm": NaN, "learning_rate": 3.8481313204656844e-05, "loss": 0.0, "step": 3455 }, { "epoch": 0.5812799596333361, "grad_norm": NaN, "learning_rate": 3.845516353693126e-05, "loss": 0.0, "step": 3456 }, { "epoch": 0.5814481540661004, "grad_norm": NaN, "learning_rate": 3.842901720437962e-05, "loss": 0.0, "step": 3457 }, { "epoch": 0.5816163484988647, "grad_norm": NaN, "learning_rate": 3.8402874214555296e-05, "loss": 0.0, "step": 3458 }, { "epoch": 0.581784542931629, "grad_norm": NaN, "learning_rate": 3.837673457501072e-05, "loss": 0.0, "step": 3459 }, { "epoch": 0.5819527373643932, "grad_norm": NaN, "learning_rate": 3.835059829329735e-05, "loss": 0.0, "step": 3460 }, { "epoch": 0.5821209317971575, "grad_norm": NaN, "learning_rate": 3.832446537696564e-05, "loss": 0.0, "step": 3461 }, { "epoch": 0.5822891262299218, "grad_norm": NaN, "learning_rate": 3.82983358335651e-05, "loss": 0.0, "step": 3462 }, { "epoch": 0.5824573206626861, "grad_norm": NaN, "learning_rate": 3.827220967064428e-05, "loss": 0.0, "step": 3463 }, { "epoch": 0.5826255150954504, "grad_norm": NaN, "learning_rate": 3.824608689575073e-05, "loss": 0.0, "step": 3464 }, { "epoch": 0.5827937095282146, "grad_norm": NaN, "learning_rate": 3.8219967516431034e-05, "loss": 0.0, "step": 3465 }, { "epoch": 0.5829619039609789, "grad_norm": NaN, "learning_rate": 3.819385154023075e-05, "loss": 0.0, "step": 3466 }, { "epoch": 0.5831300983937432, "grad_norm": NaN, "learning_rate": 3.816773897469454e-05, "loss": 0.0, "step": 3467 }, { "epoch": 0.5832982928265075, "grad_norm": NaN, "learning_rate": 3.8141629827366015e-05, "loss": 0.0, "step": 3468 }, { "epoch": 0.5834664872592717, "grad_norm": NaN, "learning_rate": 3.811552410578781e-05, "loss": 0.0, "step": 3469 }, { "epoch": 0.583634681692036, "grad_norm": NaN, "learning_rate": 3.808942181750156e-05, "loss": 0.0, "step": 3470 }, { "epoch": 0.5838028761248003, "grad_norm": NaN, "learning_rate": 3.806332297004797e-05, "loss": 0.0, "step": 3471 }, { "epoch": 0.5839710705575646, "grad_norm": NaN, "learning_rate": 3.803722757096666e-05, "loss": 0.0, "step": 3472 }, { "epoch": 0.5841392649903289, "grad_norm": NaN, "learning_rate": 3.801113562779631e-05, "loss": 0.0, "step": 3473 }, { "epoch": 0.5843074594230931, "grad_norm": NaN, "learning_rate": 3.7985047148074585e-05, "loss": 0.0, "step": 3474 }, { "epoch": 0.5844756538558574, "grad_norm": NaN, "learning_rate": 3.7958962139338176e-05, "loss": 0.0, "step": 3475 }, { "epoch": 0.5846438482886216, "grad_norm": NaN, "learning_rate": 3.793288060912275e-05, "loss": 0.0, "step": 3476 }, { "epoch": 0.5848120427213859, "grad_norm": NaN, "learning_rate": 3.790680256496294e-05, "loss": 0.0, "step": 3477 }, { "epoch": 0.5849802371541502, "grad_norm": NaN, "learning_rate": 3.788072801439242e-05, "loss": 0.0, "step": 3478 }, { "epoch": 0.5851484315869144, "grad_norm": NaN, "learning_rate": 3.785465696494384e-05, "loss": 0.0, "step": 3479 }, { "epoch": 0.5853166260196787, "grad_norm": NaN, "learning_rate": 3.7828589424148816e-05, "loss": 0.0, "step": 3480 }, { "epoch": 0.585484820452443, "grad_norm": NaN, "learning_rate": 3.7802525399538e-05, "loss": 0.0, "step": 3481 }, { "epoch": 0.5856530148852073, "grad_norm": NaN, "learning_rate": 3.777646489864094e-05, "loss": 0.0, "step": 3482 }, { "epoch": 0.5858212093179715, "grad_norm": NaN, "learning_rate": 3.775040792898629e-05, "loss": 0.0, "step": 3483 }, { "epoch": 0.5859894037507358, "grad_norm": NaN, "learning_rate": 3.7724354498101575e-05, "loss": 0.0, "step": 3484 }, { "epoch": 0.5861575981835001, "grad_norm": NaN, "learning_rate": 3.769830461351336e-05, "loss": 0.0, "step": 3485 }, { "epoch": 0.5863257926162644, "grad_norm": NaN, "learning_rate": 3.767225828274714e-05, "loss": 0.0, "step": 3486 }, { "epoch": 0.5864939870490287, "grad_norm": NaN, "learning_rate": 3.764621551332744e-05, "loss": 0.0, "step": 3487 }, { "epoch": 0.5866621814817929, "grad_norm": NaN, "learning_rate": 3.76201763127777e-05, "loss": 0.0, "step": 3488 }, { "epoch": 0.5868303759145572, "grad_norm": NaN, "learning_rate": 3.759414068862036e-05, "loss": 0.0, "step": 3489 }, { "epoch": 0.5869985703473215, "grad_norm": NaN, "learning_rate": 3.756810864837681e-05, "loss": 0.0, "step": 3490 }, { "epoch": 0.5871667647800858, "grad_norm": NaN, "learning_rate": 3.754208019956744e-05, "loss": 0.0, "step": 3491 }, { "epoch": 0.5873349592128501, "grad_norm": NaN, "learning_rate": 3.751605534971157e-05, "loss": 0.0, "step": 3492 }, { "epoch": 0.5875031536456143, "grad_norm": NaN, "learning_rate": 3.749003410632748e-05, "loss": 0.0, "step": 3493 }, { "epoch": 0.5876713480783786, "grad_norm": NaN, "learning_rate": 3.7464016476932394e-05, "loss": 0.0, "step": 3494 }, { "epoch": 0.5878395425111429, "grad_norm": NaN, "learning_rate": 3.7438002469042565e-05, "loss": 0.0, "step": 3495 }, { "epoch": 0.5880077369439072, "grad_norm": NaN, "learning_rate": 3.7411992090173107e-05, "loss": 0.0, "step": 3496 }, { "epoch": 0.5881759313766715, "grad_norm": NaN, "learning_rate": 3.738598534783815e-05, "loss": 0.0, "step": 3497 }, { "epoch": 0.5883441258094357, "grad_norm": NaN, "learning_rate": 3.735998224955073e-05, "loss": 0.0, "step": 3498 }, { "epoch": 0.5885123202422, "grad_norm": NaN, "learning_rate": 3.733398280282285e-05, "loss": 0.0, "step": 3499 }, { "epoch": 0.5886805146749643, "grad_norm": NaN, "learning_rate": 3.730798701516549e-05, "loss": 0.0, "step": 3500 }, { "epoch": 0.5888487091077286, "grad_norm": NaN, "learning_rate": 3.7281994894088524e-05, "loss": 0.0, "step": 3501 }, { "epoch": 0.5890169035404929, "grad_norm": NaN, "learning_rate": 3.725600644710078e-05, "loss": 0.0, "step": 3502 }, { "epoch": 0.5891850979732571, "grad_norm": NaN, "learning_rate": 3.723002168171002e-05, "loss": 0.0, "step": 3503 }, { "epoch": 0.5893532924060214, "grad_norm": NaN, "learning_rate": 3.720404060542297e-05, "loss": 0.0, "step": 3504 }, { "epoch": 0.5895214868387857, "grad_norm": NaN, "learning_rate": 3.717806322574527e-05, "loss": 0.0, "step": 3505 }, { "epoch": 0.5896896812715499, "grad_norm": NaN, "learning_rate": 3.71520895501815e-05, "loss": 0.0, "step": 3506 }, { "epoch": 0.5898578757043141, "grad_norm": NaN, "learning_rate": 3.712611958623514e-05, "loss": 0.0, "step": 3507 }, { "epoch": 0.5900260701370784, "grad_norm": NaN, "learning_rate": 3.7100153341408636e-05, "loss": 0.0, "step": 3508 }, { "epoch": 0.5901942645698427, "grad_norm": NaN, "learning_rate": 3.707419082320336e-05, "loss": 0.0, "step": 3509 }, { "epoch": 0.590362459002607, "grad_norm": NaN, "learning_rate": 3.704823203911957e-05, "loss": 0.0, "step": 3510 }, { "epoch": 0.5905306534353713, "grad_norm": NaN, "learning_rate": 3.702227699665646e-05, "loss": 0.0, "step": 3511 }, { "epoch": 0.5906988478681355, "grad_norm": NaN, "learning_rate": 3.6996325703312176e-05, "loss": 0.0, "step": 3512 }, { "epoch": 0.5908670423008998, "grad_norm": NaN, "learning_rate": 3.6970378166583755e-05, "loss": 0.0, "step": 3513 }, { "epoch": 0.5910352367336641, "grad_norm": NaN, "learning_rate": 3.6944434393967144e-05, "loss": 0.0, "step": 3514 }, { "epoch": 0.5912034311664284, "grad_norm": NaN, "learning_rate": 3.691849439295719e-05, "loss": 0.0, "step": 3515 }, { "epoch": 0.5913716255991927, "grad_norm": NaN, "learning_rate": 3.68925581710477e-05, "loss": 0.0, "step": 3516 }, { "epoch": 0.5915398200319569, "grad_norm": NaN, "learning_rate": 3.686662573573134e-05, "loss": 0.0, "step": 3517 }, { "epoch": 0.5917080144647212, "grad_norm": NaN, "learning_rate": 3.6840697094499705e-05, "loss": 0.0, "step": 3518 }, { "epoch": 0.5918762088974855, "grad_norm": NaN, "learning_rate": 3.681477225484326e-05, "loss": 0.0, "step": 3519 }, { "epoch": 0.5920444033302498, "grad_norm": NaN, "learning_rate": 3.678885122425145e-05, "loss": 0.0, "step": 3520 }, { "epoch": 0.592212597763014, "grad_norm": NaN, "learning_rate": 3.676293401021256e-05, "loss": 0.0, "step": 3521 }, { "epoch": 0.5923807921957783, "grad_norm": NaN, "learning_rate": 3.673702062021376e-05, "loss": 0.0, "step": 3522 }, { "epoch": 0.5925489866285426, "grad_norm": NaN, "learning_rate": 3.671111106174113e-05, "loss": 0.0, "step": 3523 }, { "epoch": 0.5927171810613069, "grad_norm": NaN, "learning_rate": 3.668520534227969e-05, "loss": 0.0, "step": 3524 }, { "epoch": 0.5928853754940712, "grad_norm": NaN, "learning_rate": 3.66593034693133e-05, "loss": 0.0, "step": 3525 }, { "epoch": 0.5930535699268354, "grad_norm": NaN, "learning_rate": 3.66334054503247e-05, "loss": 0.0, "step": 3526 }, { "epoch": 0.5932217643595997, "grad_norm": NaN, "learning_rate": 3.660751129279554e-05, "loss": 0.0, "step": 3527 }, { "epoch": 0.593389958792364, "grad_norm": NaN, "learning_rate": 3.658162100420638e-05, "loss": 0.0, "step": 3528 }, { "epoch": 0.5935581532251283, "grad_norm": NaN, "learning_rate": 3.655573459203662e-05, "loss": 0.0, "step": 3529 }, { "epoch": 0.5937263476578926, "grad_norm": NaN, "learning_rate": 3.6529852063764545e-05, "loss": 0.0, "step": 3530 }, { "epoch": 0.5938945420906568, "grad_norm": NaN, "learning_rate": 3.6503973426867317e-05, "loss": 0.0, "step": 3531 }, { "epoch": 0.5940627365234211, "grad_norm": NaN, "learning_rate": 3.647809868882102e-05, "loss": 0.0, "step": 3532 }, { "epoch": 0.5942309309561854, "grad_norm": NaN, "learning_rate": 3.645222785710056e-05, "loss": 0.0, "step": 3533 }, { "epoch": 0.5943991253889497, "grad_norm": NaN, "learning_rate": 3.6426360939179735e-05, "loss": 0.0, "step": 3534 }, { "epoch": 0.5945673198217138, "grad_norm": NaN, "learning_rate": 3.6400497942531186e-05, "loss": 0.0, "step": 3535 }, { "epoch": 0.5947355142544781, "grad_norm": NaN, "learning_rate": 3.637463887462647e-05, "loss": 0.0, "step": 3536 }, { "epoch": 0.5949037086872424, "grad_norm": NaN, "learning_rate": 3.6348783742935966e-05, "loss": 0.0, "step": 3537 }, { "epoch": 0.5950719031200067, "grad_norm": NaN, "learning_rate": 3.632293255492894e-05, "loss": 0.0, "step": 3538 }, { "epoch": 0.595240097552771, "grad_norm": NaN, "learning_rate": 3.629708531807348e-05, "loss": 0.0, "step": 3539 }, { "epoch": 0.5954082919855352, "grad_norm": NaN, "learning_rate": 3.627124203983662e-05, "loss": 0.0, "step": 3540 }, { "epoch": 0.5955764864182995, "grad_norm": NaN, "learning_rate": 3.624540272768416e-05, "loss": 0.0, "step": 3541 }, { "epoch": 0.5957446808510638, "grad_norm": NaN, "learning_rate": 3.621956738908079e-05, "loss": 0.0, "step": 3542 }, { "epoch": 0.5959128752838281, "grad_norm": NaN, "learning_rate": 3.6193736031490046e-05, "loss": 0.0, "step": 3543 }, { "epoch": 0.5960810697165924, "grad_norm": NaN, "learning_rate": 3.616790866237433e-05, "loss": 0.0, "step": 3544 }, { "epoch": 0.5962492641493566, "grad_norm": NaN, "learning_rate": 3.614208528919487e-05, "loss": 0.0, "step": 3545 }, { "epoch": 0.5964174585821209, "grad_norm": NaN, "learning_rate": 3.611626591941175e-05, "loss": 0.0, "step": 3546 }, { "epoch": 0.5965856530148852, "grad_norm": NaN, "learning_rate": 3.609045056048389e-05, "loss": 0.0, "step": 3547 }, { "epoch": 0.5967538474476495, "grad_norm": NaN, "learning_rate": 3.606463921986908e-05, "loss": 0.0, "step": 3548 }, { "epoch": 0.5969220418804138, "grad_norm": NaN, "learning_rate": 3.6038831905023916e-05, "loss": 0.0, "step": 3549 }, { "epoch": 0.597090236313178, "grad_norm": NaN, "learning_rate": 3.601302862340384e-05, "loss": 0.0, "step": 3550 }, { "epoch": 0.5972584307459423, "grad_norm": NaN, "learning_rate": 3.598722938246314e-05, "loss": 0.0, "step": 3551 }, { "epoch": 0.5974266251787066, "grad_norm": NaN, "learning_rate": 3.5961434189654896e-05, "loss": 0.0, "step": 3552 }, { "epoch": 0.5975948196114709, "grad_norm": NaN, "learning_rate": 3.593564305243109e-05, "loss": 0.0, "step": 3553 }, { "epoch": 0.5977630140442352, "grad_norm": NaN, "learning_rate": 3.590985597824248e-05, "loss": 0.0, "step": 3554 }, { "epoch": 0.5979312084769994, "grad_norm": NaN, "learning_rate": 3.5884072974538654e-05, "loss": 0.0, "step": 3555 }, { "epoch": 0.5980994029097637, "grad_norm": NaN, "learning_rate": 3.5858294048768016e-05, "loss": 0.0, "step": 3556 }, { "epoch": 0.598267597342528, "grad_norm": NaN, "learning_rate": 3.5832519208377855e-05, "loss": 0.0, "step": 3557 }, { "epoch": 0.5984357917752923, "grad_norm": NaN, "learning_rate": 3.580674846081421e-05, "loss": 0.0, "step": 3558 }, { "epoch": 0.5986039862080565, "grad_norm": NaN, "learning_rate": 3.578098181352196e-05, "loss": 0.0, "step": 3559 }, { "epoch": 0.5987721806408208, "grad_norm": NaN, "learning_rate": 3.57552192739448e-05, "loss": 0.0, "step": 3560 }, { "epoch": 0.5989403750735851, "grad_norm": NaN, "learning_rate": 3.572946084952524e-05, "loss": 0.0, "step": 3561 }, { "epoch": 0.5991085695063494, "grad_norm": NaN, "learning_rate": 3.57037065477046e-05, "loss": 0.0, "step": 3562 }, { "epoch": 0.5992767639391137, "grad_norm": NaN, "learning_rate": 3.567795637592301e-05, "loss": 0.0, "step": 3563 }, { "epoch": 0.599444958371878, "grad_norm": NaN, "learning_rate": 3.56522103416194e-05, "loss": 0.0, "step": 3564 }, { "epoch": 0.5996131528046421, "grad_norm": NaN, "learning_rate": 3.562646845223153e-05, "loss": 0.0, "step": 3565 }, { "epoch": 0.5997813472374064, "grad_norm": NaN, "learning_rate": 3.560073071519593e-05, "loss": 0.0, "step": 3566 }, { "epoch": 0.5999495416701707, "grad_norm": NaN, "learning_rate": 3.557499713794795e-05, "loss": 0.0, "step": 3567 }, { "epoch": 0.600117736102935, "grad_norm": NaN, "learning_rate": 3.55492677279217e-05, "loss": 0.0, "step": 3568 }, { "epoch": 0.6002859305356992, "grad_norm": NaN, "learning_rate": 3.5523542492550175e-05, "loss": 0.0, "step": 3569 }, { "epoch": 0.6004541249684635, "grad_norm": NaN, "learning_rate": 3.5497821439265074e-05, "loss": 0.0, "step": 3570 }, { "epoch": 0.6006223194012278, "grad_norm": NaN, "learning_rate": 3.5472104575496925e-05, "loss": 0.0, "step": 3571 }, { "epoch": 0.6007905138339921, "grad_norm": NaN, "learning_rate": 3.5446391908675033e-05, "loss": 0.0, "step": 3572 }, { "epoch": 0.6009587082667563, "grad_norm": NaN, "learning_rate": 3.542068344622752e-05, "loss": 0.0, "step": 3573 }, { "epoch": 0.6011269026995206, "grad_norm": NaN, "learning_rate": 3.5394979195581254e-05, "loss": 0.0, "step": 3574 }, { "epoch": 0.6012950971322849, "grad_norm": NaN, "learning_rate": 3.536927916416193e-05, "loss": 0.0, "step": 3575 }, { "epoch": 0.6014632915650492, "grad_norm": NaN, "learning_rate": 3.534358335939394e-05, "loss": 0.0, "step": 3576 }, { "epoch": 0.6016314859978135, "grad_norm": NaN, "learning_rate": 3.531789178870059e-05, "loss": 0.0, "step": 3577 }, { "epoch": 0.6017996804305777, "grad_norm": NaN, "learning_rate": 3.529220445950385e-05, "loss": 0.0, "step": 3578 }, { "epoch": 0.601967874863342, "grad_norm": NaN, "learning_rate": 3.526652137922451e-05, "loss": 0.0, "step": 3579 }, { "epoch": 0.6021360692961063, "grad_norm": NaN, "learning_rate": 3.52408425552821e-05, "loss": 0.0, "step": 3580 }, { "epoch": 0.6023042637288706, "grad_norm": NaN, "learning_rate": 3.5215167995094975e-05, "loss": 0.0, "step": 3581 }, { "epoch": 0.6024724581616349, "grad_norm": NaN, "learning_rate": 3.518949770608022e-05, "loss": 0.0, "step": 3582 }, { "epoch": 0.6026406525943991, "grad_norm": NaN, "learning_rate": 3.5163831695653694e-05, "loss": 0.0, "step": 3583 }, { "epoch": 0.6028088470271634, "grad_norm": NaN, "learning_rate": 3.513816997123e-05, "loss": 0.0, "step": 3584 }, { "epoch": 0.6029770414599277, "grad_norm": NaN, "learning_rate": 3.511251254022255e-05, "loss": 0.0, "step": 3585 }, { "epoch": 0.603145235892692, "grad_norm": NaN, "learning_rate": 3.508685941004348e-05, "loss": 0.0, "step": 3586 }, { "epoch": 0.6033134303254563, "grad_norm": NaN, "learning_rate": 3.5061210588103686e-05, "loss": 0.0, "step": 3587 }, { "epoch": 0.6034816247582205, "grad_norm": NaN, "learning_rate": 3.503556608181282e-05, "loss": 0.0, "step": 3588 }, { "epoch": 0.6036498191909848, "grad_norm": NaN, "learning_rate": 3.5009925898579314e-05, "loss": 0.0, "step": 3589 }, { "epoch": 0.6038180136237491, "grad_norm": NaN, "learning_rate": 3.498429004581032e-05, "loss": 0.0, "step": 3590 }, { "epoch": 0.6039862080565134, "grad_norm": NaN, "learning_rate": 3.495865853091175e-05, "loss": 0.0, "step": 3591 }, { "epoch": 0.6041544024892777, "grad_norm": NaN, "learning_rate": 3.4933031361288226e-05, "loss": 0.0, "step": 3592 }, { "epoch": 0.6043225969220419, "grad_norm": NaN, "learning_rate": 3.490740854434321e-05, "loss": 0.0, "step": 3593 }, { "epoch": 0.6044907913548061, "grad_norm": NaN, "learning_rate": 3.488179008747881e-05, "loss": 0.0, "step": 3594 }, { "epoch": 0.6046589857875704, "grad_norm": NaN, "learning_rate": 3.485617599809593e-05, "loss": 0.0, "step": 3595 }, { "epoch": 0.6048271802203347, "grad_norm": NaN, "learning_rate": 3.483056628359416e-05, "loss": 0.0, "step": 3596 }, { "epoch": 0.6049953746530989, "grad_norm": NaN, "learning_rate": 3.48049609513719e-05, "loss": 0.0, "step": 3597 }, { "epoch": 0.6051635690858632, "grad_norm": NaN, "learning_rate": 3.477936000882623e-05, "loss": 0.0, "step": 3598 }, { "epoch": 0.6053317635186275, "grad_norm": NaN, "learning_rate": 3.475376346335298e-05, "loss": 0.0, "step": 3599 }, { "epoch": 0.6054999579513918, "grad_norm": NaN, "learning_rate": 3.4728171322346694e-05, "loss": 0.0, "step": 3600 }, { "epoch": 0.605668152384156, "grad_norm": NaN, "learning_rate": 3.470258359320064e-05, "loss": 0.0, "step": 3601 }, { "epoch": 0.6058363468169203, "grad_norm": NaN, "learning_rate": 3.467700028330686e-05, "loss": 0.0, "step": 3602 }, { "epoch": 0.6060045412496846, "grad_norm": NaN, "learning_rate": 3.465142140005607e-05, "loss": 0.0, "step": 3603 }, { "epoch": 0.6061727356824489, "grad_norm": NaN, "learning_rate": 3.4625846950837725e-05, "loss": 0.0, "step": 3604 }, { "epoch": 0.6063409301152132, "grad_norm": NaN, "learning_rate": 3.460027694303998e-05, "loss": 0.0, "step": 3605 }, { "epoch": 0.6065091245479775, "grad_norm": NaN, "learning_rate": 3.457471138404975e-05, "loss": 0.0, "step": 3606 }, { "epoch": 0.6066773189807417, "grad_norm": NaN, "learning_rate": 3.4549150281252636e-05, "loss": 0.0, "step": 3607 }, { "epoch": 0.606845513413506, "grad_norm": NaN, "learning_rate": 3.452359364203294e-05, "loss": 0.0, "step": 3608 }, { "epoch": 0.6070137078462703, "grad_norm": NaN, "learning_rate": 3.449804147377369e-05, "loss": 0.0, "step": 3609 }, { "epoch": 0.6071819022790346, "grad_norm": NaN, "learning_rate": 3.447249378385663e-05, "loss": 0.0, "step": 3610 }, { "epoch": 0.6073500967117988, "grad_norm": NaN, "learning_rate": 3.444695057966221e-05, "loss": 0.0, "step": 3611 }, { "epoch": 0.6075182911445631, "grad_norm": NaN, "learning_rate": 3.442141186856955e-05, "loss": 0.0, "step": 3612 }, { "epoch": 0.6076864855773274, "grad_norm": NaN, "learning_rate": 3.43958776579565e-05, "loss": 0.0, "step": 3613 }, { "epoch": 0.6078546800100917, "grad_norm": NaN, "learning_rate": 3.437034795519963e-05, "loss": 0.0, "step": 3614 }, { "epoch": 0.608022874442856, "grad_norm": NaN, "learning_rate": 3.434482276767418e-05, "loss": 0.0, "step": 3615 }, { "epoch": 0.6081910688756202, "grad_norm": NaN, "learning_rate": 3.431930210275409e-05, "loss": 0.0, "step": 3616 }, { "epoch": 0.6083592633083845, "grad_norm": NaN, "learning_rate": 3.4293785967811964e-05, "loss": 0.0, "step": 3617 }, { "epoch": 0.6085274577411488, "grad_norm": NaN, "learning_rate": 3.426827437021917e-05, "loss": 0.0, "step": 3618 }, { "epoch": 0.6086956521739131, "grad_norm": NaN, "learning_rate": 3.424276731734571e-05, "loss": 0.0, "step": 3619 }, { "epoch": 0.6088638466066774, "grad_norm": NaN, "learning_rate": 3.4217264816560276e-05, "loss": 0.0, "step": 3620 }, { "epoch": 0.6090320410394416, "grad_norm": NaN, "learning_rate": 3.4191766875230234e-05, "loss": 0.0, "step": 3621 }, { "epoch": 0.6092002354722059, "grad_norm": NaN, "learning_rate": 3.416627350072171e-05, "loss": 0.0, "step": 3622 }, { "epoch": 0.6093684299049702, "grad_norm": NaN, "learning_rate": 3.414078470039942e-05, "loss": 0.0, "step": 3623 }, { "epoch": 0.6095366243377344, "grad_norm": NaN, "learning_rate": 3.411530048162679e-05, "loss": 0.0, "step": 3624 }, { "epoch": 0.6097048187704986, "grad_norm": NaN, "learning_rate": 3.408982085176592e-05, "loss": 0.0, "step": 3625 }, { "epoch": 0.6098730132032629, "grad_norm": NaN, "learning_rate": 3.406434581817762e-05, "loss": 0.0, "step": 3626 }, { "epoch": 0.6100412076360272, "grad_norm": NaN, "learning_rate": 3.403887538822132e-05, "loss": 0.0, "step": 3627 }, { "epoch": 0.6102094020687915, "grad_norm": NaN, "learning_rate": 3.401340956925515e-05, "loss": 0.0, "step": 3628 }, { "epoch": 0.6103775965015558, "grad_norm": NaN, "learning_rate": 3.398794836863588e-05, "loss": 0.0, "step": 3629 }, { "epoch": 0.61054579093432, "grad_norm": NaN, "learning_rate": 3.3962491793719e-05, "loss": 0.0, "step": 3630 }, { "epoch": 0.6107139853670843, "grad_norm": NaN, "learning_rate": 3.39370398518586e-05, "loss": 0.0, "step": 3631 }, { "epoch": 0.6108821797998486, "grad_norm": NaN, "learning_rate": 3.391159255040748e-05, "loss": 0.0, "step": 3632 }, { "epoch": 0.6110503742326129, "grad_norm": NaN, "learning_rate": 3.388614989671705e-05, "loss": 0.0, "step": 3633 }, { "epoch": 0.6112185686653772, "grad_norm": NaN, "learning_rate": 3.386071189813744e-05, "loss": 0.0, "step": 3634 }, { "epoch": 0.6113867630981414, "grad_norm": NaN, "learning_rate": 3.38352785620174e-05, "loss": 0.0, "step": 3635 }, { "epoch": 0.6115549575309057, "grad_norm": NaN, "learning_rate": 3.380984989570433e-05, "loss": 0.0, "step": 3636 }, { "epoch": 0.61172315196367, "grad_norm": NaN, "learning_rate": 3.378442590654427e-05, "loss": 0.0, "step": 3637 }, { "epoch": 0.6118913463964343, "grad_norm": NaN, "learning_rate": 3.375900660188196e-05, "loss": 0.0, "step": 3638 }, { "epoch": 0.6120595408291986, "grad_norm": NaN, "learning_rate": 3.373359198906072e-05, "loss": 0.0, "step": 3639 }, { "epoch": 0.6122277352619628, "grad_norm": NaN, "learning_rate": 3.3708182075422566e-05, "loss": 0.0, "step": 3640 }, { "epoch": 0.6123959296947271, "grad_norm": NaN, "learning_rate": 3.368277686830812e-05, "loss": 0.0, "step": 3641 }, { "epoch": 0.6125641241274914, "grad_norm": NaN, "learning_rate": 3.3657376375056684e-05, "loss": 0.0, "step": 3642 }, { "epoch": 0.6127323185602557, "grad_norm": NaN, "learning_rate": 3.363198060300616e-05, "loss": 0.0, "step": 3643 }, { "epoch": 0.61290051299302, "grad_norm": NaN, "learning_rate": 3.360658955949312e-05, "loss": 0.0, "step": 3644 }, { "epoch": 0.6130687074257842, "grad_norm": NaN, "learning_rate": 3.358120325185272e-05, "loss": 0.0, "step": 3645 }, { "epoch": 0.6132369018585485, "grad_norm": NaN, "learning_rate": 3.355582168741882e-05, "loss": 0.0, "step": 3646 }, { "epoch": 0.6134050962913128, "grad_norm": NaN, "learning_rate": 3.353044487352384e-05, "loss": 0.0, "step": 3647 }, { "epoch": 0.6135732907240771, "grad_norm": NaN, "learning_rate": 3.3505072817498876e-05, "loss": 0.0, "step": 3648 }, { "epoch": 0.6137414851568413, "grad_norm": NaN, "learning_rate": 3.347970552667361e-05, "loss": 0.0, "step": 3649 }, { "epoch": 0.6139096795896056, "grad_norm": NaN, "learning_rate": 3.345434300837636e-05, "loss": 0.0, "step": 3650 }, { "epoch": 0.6140778740223699, "grad_norm": NaN, "learning_rate": 3.342898526993412e-05, "loss": 0.0, "step": 3651 }, { "epoch": 0.6142460684551342, "grad_norm": NaN, "learning_rate": 3.340363231867243e-05, "loss": 0.0, "step": 3652 }, { "epoch": 0.6144142628878985, "grad_norm": NaN, "learning_rate": 3.3378284161915465e-05, "loss": 0.0, "step": 3653 }, { "epoch": 0.6145824573206626, "grad_norm": NaN, "learning_rate": 3.335294080698603e-05, "loss": 0.0, "step": 3654 }, { "epoch": 0.6147506517534269, "grad_norm": NaN, "learning_rate": 3.332760226120554e-05, "loss": 0.0, "step": 3655 }, { "epoch": 0.6149188461861912, "grad_norm": NaN, "learning_rate": 3.330226853189402e-05, "loss": 0.0, "step": 3656 }, { "epoch": 0.6150870406189555, "grad_norm": NaN, "learning_rate": 3.32769396263701e-05, "loss": 0.0, "step": 3657 }, { "epoch": 0.6152552350517198, "grad_norm": NaN, "learning_rate": 3.325161555195099e-05, "loss": 0.0, "step": 3658 }, { "epoch": 0.615423429484484, "grad_norm": NaN, "learning_rate": 3.3226296315952566e-05, "loss": 0.0, "step": 3659 }, { "epoch": 0.6155916239172483, "grad_norm": NaN, "learning_rate": 3.3200981925689264e-05, "loss": 0.0, "step": 3660 }, { "epoch": 0.6157598183500126, "grad_norm": NaN, "learning_rate": 3.3175672388474136e-05, "loss": 0.0, "step": 3661 }, { "epoch": 0.6159280127827769, "grad_norm": NaN, "learning_rate": 3.315036771161878e-05, "loss": 0.0, "step": 3662 }, { "epoch": 0.6160962072155411, "grad_norm": NaN, "learning_rate": 3.3125067902433485e-05, "loss": 0.0, "step": 3663 }, { "epoch": 0.6162644016483054, "grad_norm": NaN, "learning_rate": 3.3099772968227066e-05, "loss": 0.0, "step": 3664 }, { "epoch": 0.6164325960810697, "grad_norm": NaN, "learning_rate": 3.307448291630695e-05, "loss": 0.0, "step": 3665 }, { "epoch": 0.616600790513834, "grad_norm": NaN, "learning_rate": 3.304919775397912e-05, "loss": 0.0, "step": 3666 }, { "epoch": 0.6167689849465983, "grad_norm": NaN, "learning_rate": 3.3023917488548215e-05, "loss": 0.0, "step": 3667 }, { "epoch": 0.6169371793793625, "grad_norm": NaN, "learning_rate": 3.29986421273174e-05, "loss": 0.0, "step": 3668 }, { "epoch": 0.6171053738121268, "grad_norm": NaN, "learning_rate": 3.297337167758846e-05, "loss": 0.0, "step": 3669 }, { "epoch": 0.6172735682448911, "grad_norm": NaN, "learning_rate": 3.29481061466617e-05, "loss": 0.0, "step": 3670 }, { "epoch": 0.6174417626776554, "grad_norm": NaN, "learning_rate": 3.292284554183611e-05, "loss": 0.0, "step": 3671 }, { "epoch": 0.6176099571104197, "grad_norm": NaN, "learning_rate": 3.2897589870409164e-05, "loss": 0.0, "step": 3672 }, { "epoch": 0.6177781515431839, "grad_norm": NaN, "learning_rate": 3.287233913967695e-05, "loss": 0.0, "step": 3673 }, { "epoch": 0.6179463459759482, "grad_norm": NaN, "learning_rate": 3.284709335693411e-05, "loss": 0.0, "step": 3674 }, { "epoch": 0.6181145404087125, "grad_norm": NaN, "learning_rate": 3.282185252947387e-05, "loss": 0.0, "step": 3675 }, { "epoch": 0.6182827348414768, "grad_norm": NaN, "learning_rate": 3.279661666458804e-05, "loss": 0.0, "step": 3676 }, { "epoch": 0.6184509292742411, "grad_norm": NaN, "learning_rate": 3.2771385769566975e-05, "loss": 0.0, "step": 3677 }, { "epoch": 0.6186191237070053, "grad_norm": NaN, "learning_rate": 3.274615985169955e-05, "loss": 0.0, "step": 3678 }, { "epoch": 0.6187873181397696, "grad_norm": NaN, "learning_rate": 3.272093891827332e-05, "loss": 0.0, "step": 3679 }, { "epoch": 0.6189555125725339, "grad_norm": NaN, "learning_rate": 3.26957229765743e-05, "loss": 0.0, "step": 3680 }, { "epoch": 0.6191237070052982, "grad_norm": NaN, "learning_rate": 3.26705120338871e-05, "loss": 0.0, "step": 3681 }, { "epoch": 0.6192919014380625, "grad_norm": NaN, "learning_rate": 3.2645306097494855e-05, "loss": 0.0, "step": 3682 }, { "epoch": 0.6194600958708266, "grad_norm": NaN, "learning_rate": 3.2620105174679305e-05, "loss": 0.0, "step": 3683 }, { "epoch": 0.6196282903035909, "grad_norm": NaN, "learning_rate": 3.259490927272071e-05, "loss": 0.0, "step": 3684 }, { "epoch": 0.6197964847363552, "grad_norm": NaN, "learning_rate": 3.256971839889787e-05, "loss": 0.0, "step": 3685 }, { "epoch": 0.6199646791691195, "grad_norm": NaN, "learning_rate": 3.254453256048813e-05, "loss": 0.0, "step": 3686 }, { "epoch": 0.6201328736018837, "grad_norm": NaN, "learning_rate": 3.251935176476745e-05, "loss": 0.0, "step": 3687 }, { "epoch": 0.620301068034648, "grad_norm": NaN, "learning_rate": 3.2494176019010244e-05, "loss": 0.0, "step": 3688 }, { "epoch": 0.6204692624674123, "grad_norm": NaN, "learning_rate": 3.246900533048952e-05, "loss": 0.0, "step": 3689 }, { "epoch": 0.6206374569001766, "grad_norm": NaN, "learning_rate": 3.244383970647677e-05, "loss": 0.0, "step": 3690 }, { "epoch": 0.6208056513329409, "grad_norm": NaN, "learning_rate": 3.241867915424211e-05, "loss": 0.0, "step": 3691 }, { "epoch": 0.6209738457657051, "grad_norm": NaN, "learning_rate": 3.2393523681054114e-05, "loss": 0.0, "step": 3692 }, { "epoch": 0.6211420401984694, "grad_norm": NaN, "learning_rate": 3.236837329417993e-05, "loss": 0.0, "step": 3693 }, { "epoch": 0.6213102346312337, "grad_norm": NaN, "learning_rate": 3.2343228000885206e-05, "loss": 0.0, "step": 3694 }, { "epoch": 0.621478429063998, "grad_norm": NaN, "learning_rate": 3.231808780843416e-05, "loss": 0.0, "step": 3695 }, { "epoch": 0.6216466234967623, "grad_norm": NaN, "learning_rate": 3.229295272408949e-05, "loss": 0.0, "step": 3696 }, { "epoch": 0.6218148179295265, "grad_norm": NaN, "learning_rate": 3.226782275511245e-05, "loss": 0.0, "step": 3697 }, { "epoch": 0.6219830123622908, "grad_norm": NaN, "learning_rate": 3.22426979087628e-05, "loss": 0.0, "step": 3698 }, { "epoch": 0.6221512067950551, "grad_norm": NaN, "learning_rate": 3.2217578192298815e-05, "loss": 0.0, "step": 3699 }, { "epoch": 0.6223194012278194, "grad_norm": NaN, "learning_rate": 3.219246361297733e-05, "loss": 0.0, "step": 3700 }, { "epoch": 0.6224875956605836, "grad_norm": NaN, "learning_rate": 3.216735417805366e-05, "loss": 0.0, "step": 3701 }, { "epoch": 0.6226557900933479, "grad_norm": NaN, "learning_rate": 3.214224989478163e-05, "loss": 0.0, "step": 3702 }, { "epoch": 0.6228239845261122, "grad_norm": NaN, "learning_rate": 3.2117150770413574e-05, "loss": 0.0, "step": 3703 }, { "epoch": 0.6229921789588765, "grad_norm": NaN, "learning_rate": 3.209205681220037e-05, "loss": 0.0, "step": 3704 }, { "epoch": 0.6231603733916408, "grad_norm": NaN, "learning_rate": 3.2066968027391374e-05, "loss": 0.0, "step": 3705 }, { "epoch": 0.623328567824405, "grad_norm": NaN, "learning_rate": 3.204188442323445e-05, "loss": 0.0, "step": 3706 }, { "epoch": 0.6234967622571693, "grad_norm": NaN, "learning_rate": 3.201680600697596e-05, "loss": 0.0, "step": 3707 }, { "epoch": 0.6236649566899336, "grad_norm": NaN, "learning_rate": 3.1991732785860805e-05, "loss": 0.0, "step": 3708 }, { "epoch": 0.6238331511226979, "grad_norm": NaN, "learning_rate": 3.196666476713235e-05, "loss": 0.0, "step": 3709 }, { "epoch": 0.6240013455554622, "grad_norm": NaN, "learning_rate": 3.194160195803245e-05, "loss": 0.0, "step": 3710 }, { "epoch": 0.6241695399882264, "grad_norm": NaN, "learning_rate": 3.1916544365801484e-05, "loss": 0.0, "step": 3711 }, { "epoch": 0.6243377344209907, "grad_norm": NaN, "learning_rate": 3.18914919976783e-05, "loss": 0.0, "step": 3712 }, { "epoch": 0.6245059288537549, "grad_norm": NaN, "learning_rate": 3.1866444860900256e-05, "loss": 0.0, "step": 3713 }, { "epoch": 0.6246741232865192, "grad_norm": NaN, "learning_rate": 3.184140296270318e-05, "loss": 0.0, "step": 3714 }, { "epoch": 0.6248423177192834, "grad_norm": NaN, "learning_rate": 3.1816366310321385e-05, "loss": 0.0, "step": 3715 }, { "epoch": 0.6250105121520477, "grad_norm": NaN, "learning_rate": 3.179133491098772e-05, "loss": 0.0, "step": 3716 }, { "epoch": 0.625178706584812, "grad_norm": NaN, "learning_rate": 3.176630877193344e-05, "loss": 0.0, "step": 3717 }, { "epoch": 0.6253469010175763, "grad_norm": NaN, "learning_rate": 3.174128790038833e-05, "loss": 0.0, "step": 3718 }, { "epoch": 0.6255150954503406, "grad_norm": NaN, "learning_rate": 3.171627230358063e-05, "loss": 0.0, "step": 3719 }, { "epoch": 0.6256832898831048, "grad_norm": NaN, "learning_rate": 3.169126198873708e-05, "loss": 0.0, "step": 3720 }, { "epoch": 0.6258514843158691, "grad_norm": NaN, "learning_rate": 3.1666256963082876e-05, "loss": 0.0, "step": 3721 }, { "epoch": 0.6260196787486334, "grad_norm": NaN, "learning_rate": 3.164125723384168e-05, "loss": 0.0, "step": 3722 }, { "epoch": 0.6261878731813977, "grad_norm": NaN, "learning_rate": 3.161626280823562e-05, "loss": 0.0, "step": 3723 }, { "epoch": 0.626356067614162, "grad_norm": NaN, "learning_rate": 3.1591273693485345e-05, "loss": 0.0, "step": 3724 }, { "epoch": 0.6265242620469262, "grad_norm": NaN, "learning_rate": 3.156628989680991e-05, "loss": 0.0, "step": 3725 }, { "epoch": 0.6266924564796905, "grad_norm": NaN, "learning_rate": 3.154131142542686e-05, "loss": 0.0, "step": 3726 }, { "epoch": 0.6268606509124548, "grad_norm": NaN, "learning_rate": 3.151633828655216e-05, "loss": 0.0, "step": 3727 }, { "epoch": 0.6270288453452191, "grad_norm": NaN, "learning_rate": 3.149137048740032e-05, "loss": 0.0, "step": 3728 }, { "epoch": 0.6271970397779834, "grad_norm": NaN, "learning_rate": 3.146640803518425e-05, "loss": 0.0, "step": 3729 }, { "epoch": 0.6273652342107476, "grad_norm": NaN, "learning_rate": 3.1441450937115305e-05, "loss": 0.0, "step": 3730 }, { "epoch": 0.6275334286435119, "grad_norm": NaN, "learning_rate": 3.14164992004033e-05, "loss": 0.0, "step": 3731 }, { "epoch": 0.6277016230762762, "grad_norm": NaN, "learning_rate": 3.139155283225654e-05, "loss": 0.0, "step": 3732 }, { "epoch": 0.6278698175090405, "grad_norm": NaN, "learning_rate": 3.136661183988175e-05, "loss": 0.0, "step": 3733 }, { "epoch": 0.6280380119418048, "grad_norm": NaN, "learning_rate": 3.134167623048409e-05, "loss": 0.0, "step": 3734 }, { "epoch": 0.628206206374569, "grad_norm": NaN, "learning_rate": 3.1316746011267155e-05, "loss": 0.0, "step": 3735 }, { "epoch": 0.6283744008073333, "grad_norm": NaN, "learning_rate": 3.129182118943306e-05, "loss": 0.0, "step": 3736 }, { "epoch": 0.6285425952400976, "grad_norm": NaN, "learning_rate": 3.126690177218228e-05, "loss": 0.0, "step": 3737 }, { "epoch": 0.6287107896728619, "grad_norm": NaN, "learning_rate": 3.1241987766713754e-05, "loss": 0.0, "step": 3738 }, { "epoch": 0.6288789841056261, "grad_norm": NaN, "learning_rate": 3.1217079180224846e-05, "loss": 0.0, "step": 3739 }, { "epoch": 0.6290471785383904, "grad_norm": NaN, "learning_rate": 3.119217601991139e-05, "loss": 0.0, "step": 3740 }, { "epoch": 0.6292153729711547, "grad_norm": NaN, "learning_rate": 3.1167278292967624e-05, "loss": 0.0, "step": 3741 }, { "epoch": 0.6293835674039189, "grad_norm": NaN, "learning_rate": 3.1142386006586234e-05, "loss": 0.0, "step": 3742 }, { "epoch": 0.6295517618366832, "grad_norm": NaN, "learning_rate": 3.111749916795828e-05, "loss": 0.0, "step": 3743 }, { "epoch": 0.6297199562694474, "grad_norm": NaN, "learning_rate": 3.1092617784273336e-05, "loss": 0.0, "step": 3744 }, { "epoch": 0.6298881507022117, "grad_norm": NaN, "learning_rate": 3.1067741862719346e-05, "loss": 0.0, "step": 3745 }, { "epoch": 0.630056345134976, "grad_norm": NaN, "learning_rate": 3.104287141048267e-05, "loss": 0.0, "step": 3746 }, { "epoch": 0.6302245395677403, "grad_norm": NaN, "learning_rate": 3.1018006434748113e-05, "loss": 0.0, "step": 3747 }, { "epoch": 0.6303927340005046, "grad_norm": NaN, "learning_rate": 3.099314694269889e-05, "loss": 0.0, "step": 3748 }, { "epoch": 0.6305609284332688, "grad_norm": NaN, "learning_rate": 3.096829294151662e-05, "loss": 0.0, "step": 3749 }, { "epoch": 0.6307291228660331, "grad_norm": NaN, "learning_rate": 3.0943444438381366e-05, "loss": 0.0, "step": 3750 }, { "epoch": 0.6308973172987974, "grad_norm": NaN, "learning_rate": 3.091860144047155e-05, "loss": 0.0, "step": 3751 }, { "epoch": 0.6310655117315617, "grad_norm": NaN, "learning_rate": 3.089376395496405e-05, "loss": 0.0, "step": 3752 }, { "epoch": 0.631233706164326, "grad_norm": NaN, "learning_rate": 3.0868931989034155e-05, "loss": 0.0, "step": 3753 }, { "epoch": 0.6314019005970902, "grad_norm": NaN, "learning_rate": 3.084410554985553e-05, "loss": 0.0, "step": 3754 }, { "epoch": 0.6315700950298545, "grad_norm": NaN, "learning_rate": 3.0819284644600246e-05, "loss": 0.0, "step": 3755 }, { "epoch": 0.6317382894626188, "grad_norm": NaN, "learning_rate": 3.079446928043878e-05, "loss": 0.0, "step": 3756 }, { "epoch": 0.6319064838953831, "grad_norm": NaN, "learning_rate": 3.076965946454005e-05, "loss": 0.0, "step": 3757 }, { "epoch": 0.6320746783281473, "grad_norm": NaN, "learning_rate": 3.07448552040713e-05, "loss": 0.0, "step": 3758 }, { "epoch": 0.6322428727609116, "grad_norm": NaN, "learning_rate": 3.072005650619821e-05, "loss": 0.0, "step": 3759 }, { "epoch": 0.6324110671936759, "grad_norm": NaN, "learning_rate": 3.0695263378084834e-05, "loss": 0.0, "step": 3760 }, { "epoch": 0.6325792616264402, "grad_norm": NaN, "learning_rate": 3.0670475826893664e-05, "loss": 0.0, "step": 3761 }, { "epoch": 0.6327474560592045, "grad_norm": NaN, "learning_rate": 3.064569385978552e-05, "loss": 0.0, "step": 3762 }, { "epoch": 0.6329156504919687, "grad_norm": NaN, "learning_rate": 3.0620917483919637e-05, "loss": 0.0, "step": 3763 }, { "epoch": 0.633083844924733, "grad_norm": NaN, "learning_rate": 3.0596146706453616e-05, "loss": 0.0, "step": 3764 }, { "epoch": 0.6332520393574973, "grad_norm": NaN, "learning_rate": 3.05713815345435e-05, "loss": 0.0, "step": 3765 }, { "epoch": 0.6334202337902616, "grad_norm": NaN, "learning_rate": 3.054662197534363e-05, "loss": 0.0, "step": 3766 }, { "epoch": 0.6335884282230259, "grad_norm": NaN, "learning_rate": 3.0521868036006804e-05, "loss": 0.0, "step": 3767 }, { "epoch": 0.6337566226557901, "grad_norm": NaN, "learning_rate": 3.0497119723684108e-05, "loss": 0.0, "step": 3768 }, { "epoch": 0.6339248170885544, "grad_norm": NaN, "learning_rate": 3.0472377045525097e-05, "loss": 0.0, "step": 3769 }, { "epoch": 0.6340930115213187, "grad_norm": NaN, "learning_rate": 3.0447640008677635e-05, "loss": 0.0, "step": 3770 }, { "epoch": 0.634261205954083, "grad_norm": NaN, "learning_rate": 3.0422908620287972e-05, "loss": 0.0, "step": 3771 }, { "epoch": 0.6344294003868471, "grad_norm": NaN, "learning_rate": 3.0398182887500716e-05, "loss": 0.0, "step": 3772 }, { "epoch": 0.6345975948196114, "grad_norm": NaN, "learning_rate": 3.0373462817458886e-05, "loss": 0.0, "step": 3773 }, { "epoch": 0.6347657892523757, "grad_norm": NaN, "learning_rate": 3.0348748417303823e-05, "loss": 0.0, "step": 3774 }, { "epoch": 0.63493398368514, "grad_norm": NaN, "learning_rate": 3.0324039694175233e-05, "loss": 0.0, "step": 3775 }, { "epoch": 0.6351021781179043, "grad_norm": NaN, "learning_rate": 3.0299336655211184e-05, "loss": 0.0, "step": 3776 }, { "epoch": 0.6352703725506685, "grad_norm": NaN, "learning_rate": 3.0274639307548125e-05, "loss": 0.0, "step": 3777 }, { "epoch": 0.6354385669834328, "grad_norm": NaN, "learning_rate": 3.0249947658320832e-05, "loss": 0.0, "step": 3778 }, { "epoch": 0.6356067614161971, "grad_norm": NaN, "learning_rate": 3.022526171466245e-05, "loss": 0.0, "step": 3779 }, { "epoch": 0.6357749558489614, "grad_norm": NaN, "learning_rate": 3.0200581483704448e-05, "loss": 0.0, "step": 3780 }, { "epoch": 0.6359431502817257, "grad_norm": NaN, "learning_rate": 3.0175906972576708e-05, "loss": 0.0, "step": 3781 }, { "epoch": 0.6361113447144899, "grad_norm": NaN, "learning_rate": 3.01512381884074e-05, "loss": 0.0, "step": 3782 }, { "epoch": 0.6362795391472542, "grad_norm": NaN, "learning_rate": 3.0126575138323064e-05, "loss": 0.0, "step": 3783 }, { "epoch": 0.6364477335800185, "grad_norm": NaN, "learning_rate": 3.010191782944855e-05, "loss": 0.0, "step": 3784 }, { "epoch": 0.6366159280127828, "grad_norm": NaN, "learning_rate": 3.0077266268907124e-05, "loss": 0.0, "step": 3785 }, { "epoch": 0.636784122445547, "grad_norm": NaN, "learning_rate": 3.005262046382033e-05, "loss": 0.0, "step": 3786 }, { "epoch": 0.6369523168783113, "grad_norm": NaN, "learning_rate": 3.0027980421308062e-05, "loss": 0.0, "step": 3787 }, { "epoch": 0.6371205113110756, "grad_norm": NaN, "learning_rate": 3.000334614848854e-05, "loss": 0.0, "step": 3788 }, { "epoch": 0.6372887057438399, "grad_norm": NaN, "learning_rate": 2.9978717652478344e-05, "loss": 0.0, "step": 3789 }, { "epoch": 0.6374569001766042, "grad_norm": NaN, "learning_rate": 2.9954094940392375e-05, "loss": 0.0, "step": 3790 }, { "epoch": 0.6376250946093684, "grad_norm": NaN, "learning_rate": 2.9929478019343842e-05, "loss": 0.0, "step": 3791 }, { "epoch": 0.6377932890421327, "grad_norm": NaN, "learning_rate": 2.9904866896444295e-05, "loss": 0.0, "step": 3792 }, { "epoch": 0.637961483474897, "grad_norm": NaN, "learning_rate": 2.9880261578803636e-05, "loss": 0.0, "step": 3793 }, { "epoch": 0.6381296779076613, "grad_norm": NaN, "learning_rate": 2.9855662073530055e-05, "loss": 0.0, "step": 3794 }, { "epoch": 0.6382978723404256, "grad_norm": NaN, "learning_rate": 2.9831068387730076e-05, "loss": 0.0, "step": 3795 }, { "epoch": 0.6384660667731898, "grad_norm": NaN, "learning_rate": 2.9806480528508517e-05, "loss": 0.0, "step": 3796 }, { "epoch": 0.6386342612059541, "grad_norm": NaN, "learning_rate": 2.978189850296857e-05, "loss": 0.0, "step": 3797 }, { "epoch": 0.6388024556387184, "grad_norm": NaN, "learning_rate": 2.975732231821169e-05, "loss": 0.0, "step": 3798 }, { "epoch": 0.6389706500714827, "grad_norm": NaN, "learning_rate": 2.9732751981337658e-05, "loss": 0.0, "step": 3799 }, { "epoch": 0.639138844504247, "grad_norm": NaN, "learning_rate": 2.9708187499444574e-05, "loss": 0.0, "step": 3800 }, { "epoch": 0.6393070389370111, "grad_norm": NaN, "learning_rate": 2.9683628879628816e-05, "loss": 0.0, "step": 3801 }, { "epoch": 0.6394752333697754, "grad_norm": NaN, "learning_rate": 2.965907612898514e-05, "loss": 0.0, "step": 3802 }, { "epoch": 0.6396434278025397, "grad_norm": NaN, "learning_rate": 2.9634529254606542e-05, "loss": 0.0, "step": 3803 }, { "epoch": 0.639811622235304, "grad_norm": NaN, "learning_rate": 2.960998826358433e-05, "loss": 0.0, "step": 3804 }, { "epoch": 0.6399798166680682, "grad_norm": NaN, "learning_rate": 2.9585453163008124e-05, "loss": 0.0, "step": 3805 }, { "epoch": 0.6401480111008325, "grad_norm": NaN, "learning_rate": 2.9560923959965854e-05, "loss": 0.0, "step": 3806 }, { "epoch": 0.6403162055335968, "grad_norm": NaN, "learning_rate": 2.9536400661543717e-05, "loss": 0.0, "step": 3807 }, { "epoch": 0.6404843999663611, "grad_norm": NaN, "learning_rate": 2.9511883274826224e-05, "loss": 0.0, "step": 3808 }, { "epoch": 0.6406525943991254, "grad_norm": NaN, "learning_rate": 2.948737180689615e-05, "loss": 0.0, "step": 3809 }, { "epoch": 0.6408207888318896, "grad_norm": NaN, "learning_rate": 2.946286626483463e-05, "loss": 0.0, "step": 3810 }, { "epoch": 0.6409889832646539, "grad_norm": NaN, "learning_rate": 2.9438366655721018e-05, "loss": 0.0, "step": 3811 }, { "epoch": 0.6411571776974182, "grad_norm": NaN, "learning_rate": 2.9413872986632972e-05, "loss": 0.0, "step": 3812 }, { "epoch": 0.6413253721301825, "grad_norm": NaN, "learning_rate": 2.938938526464644e-05, "loss": 0.0, "step": 3813 }, { "epoch": 0.6414935665629468, "grad_norm": NaN, "learning_rate": 2.936490349683566e-05, "loss": 0.0, "step": 3814 }, { "epoch": 0.641661760995711, "grad_norm": NaN, "learning_rate": 2.934042769027313e-05, "loss": 0.0, "step": 3815 }, { "epoch": 0.6418299554284753, "grad_norm": NaN, "learning_rate": 2.931595785202964e-05, "loss": 0.0, "step": 3816 }, { "epoch": 0.6419981498612396, "grad_norm": NaN, "learning_rate": 2.9291493989174234e-05, "loss": 0.0, "step": 3817 }, { "epoch": 0.6421663442940039, "grad_norm": NaN, "learning_rate": 2.9267036108774282e-05, "loss": 0.0, "step": 3818 }, { "epoch": 0.6423345387267682, "grad_norm": NaN, "learning_rate": 2.9242584217895373e-05, "loss": 0.0, "step": 3819 }, { "epoch": 0.6425027331595324, "grad_norm": NaN, "learning_rate": 2.9218138323601397e-05, "loss": 0.0, "step": 3820 }, { "epoch": 0.6426709275922967, "grad_norm": NaN, "learning_rate": 2.9193698432954463e-05, "loss": 0.0, "step": 3821 }, { "epoch": 0.642839122025061, "grad_norm": NaN, "learning_rate": 2.916926455301504e-05, "loss": 0.0, "step": 3822 }, { "epoch": 0.6430073164578253, "grad_norm": NaN, "learning_rate": 2.914483669084176e-05, "loss": 0.0, "step": 3823 }, { "epoch": 0.6431755108905896, "grad_norm": NaN, "learning_rate": 2.912041485349157e-05, "loss": 0.0, "step": 3824 }, { "epoch": 0.6433437053233538, "grad_norm": NaN, "learning_rate": 2.909599904801964e-05, "loss": 0.0, "step": 3825 }, { "epoch": 0.6435118997561181, "grad_norm": NaN, "learning_rate": 2.907158928147947e-05, "loss": 0.0, "step": 3826 }, { "epoch": 0.6436800941888824, "grad_norm": NaN, "learning_rate": 2.9047185560922745e-05, "loss": 0.0, "step": 3827 }, { "epoch": 0.6438482886216467, "grad_norm": NaN, "learning_rate": 2.902278789339943e-05, "loss": 0.0, "step": 3828 }, { "epoch": 0.644016483054411, "grad_norm": NaN, "learning_rate": 2.899839628595771e-05, "loss": 0.0, "step": 3829 }, { "epoch": 0.6441846774871752, "grad_norm": NaN, "learning_rate": 2.8974010745644087e-05, "loss": 0.0, "step": 3830 }, { "epoch": 0.6443528719199394, "grad_norm": NaN, "learning_rate": 2.8949631279503264e-05, "loss": 0.0, "step": 3831 }, { "epoch": 0.6445210663527037, "grad_norm": NaN, "learning_rate": 2.892525789457818e-05, "loss": 0.0, "step": 3832 }, { "epoch": 0.644689260785468, "grad_norm": NaN, "learning_rate": 2.8900890597910023e-05, "loss": 0.0, "step": 3833 }, { "epoch": 0.6448574552182322, "grad_norm": NaN, "learning_rate": 2.8876529396538264e-05, "loss": 0.0, "step": 3834 }, { "epoch": 0.6450256496509965, "grad_norm": NaN, "learning_rate": 2.8852174297500567e-05, "loss": 0.0, "step": 3835 }, { "epoch": 0.6451938440837608, "grad_norm": NaN, "learning_rate": 2.882782530783285e-05, "loss": 0.0, "step": 3836 }, { "epoch": 0.6453620385165251, "grad_norm": NaN, "learning_rate": 2.880348243456926e-05, "loss": 0.0, "step": 3837 }, { "epoch": 0.6455302329492894, "grad_norm": NaN, "learning_rate": 2.877914568474218e-05, "loss": 0.0, "step": 3838 }, { "epoch": 0.6456984273820536, "grad_norm": NaN, "learning_rate": 2.8754815065382225e-05, "loss": 0.0, "step": 3839 }, { "epoch": 0.6458666218148179, "grad_norm": NaN, "learning_rate": 2.8730490583518234e-05, "loss": 0.0, "step": 3840 }, { "epoch": 0.6460348162475822, "grad_norm": NaN, "learning_rate": 2.870617224617726e-05, "loss": 0.0, "step": 3841 }, { "epoch": 0.6462030106803465, "grad_norm": NaN, "learning_rate": 2.8681860060384648e-05, "loss": 0.0, "step": 3842 }, { "epoch": 0.6463712051131107, "grad_norm": NaN, "learning_rate": 2.865755403316388e-05, "loss": 0.0, "step": 3843 }, { "epoch": 0.646539399545875, "grad_norm": NaN, "learning_rate": 2.8633254171536704e-05, "loss": 0.0, "step": 3844 }, { "epoch": 0.6467075939786393, "grad_norm": NaN, "learning_rate": 2.8608960482523056e-05, "loss": 0.0, "step": 3845 }, { "epoch": 0.6468757884114036, "grad_norm": NaN, "learning_rate": 2.858467297314115e-05, "loss": 0.0, "step": 3846 }, { "epoch": 0.6470439828441679, "grad_norm": NaN, "learning_rate": 2.8560391650407358e-05, "loss": 0.0, "step": 3847 }, { "epoch": 0.6472121772769321, "grad_norm": NaN, "learning_rate": 2.8536116521336287e-05, "loss": 0.0, "step": 3848 }, { "epoch": 0.6473803717096964, "grad_norm": NaN, "learning_rate": 2.8511847592940738e-05, "loss": 0.0, "step": 3849 }, { "epoch": 0.6475485661424607, "grad_norm": NaN, "learning_rate": 2.848758487223172e-05, "loss": 0.0, "step": 3850 }, { "epoch": 0.647716760575225, "grad_norm": NaN, "learning_rate": 2.84633283662185e-05, "loss": 0.0, "step": 3851 }, { "epoch": 0.6478849550079893, "grad_norm": NaN, "learning_rate": 2.843907808190849e-05, "loss": 0.0, "step": 3852 }, { "epoch": 0.6480531494407535, "grad_norm": NaN, "learning_rate": 2.8414834026307323e-05, "loss": 0.0, "step": 3853 }, { "epoch": 0.6482213438735178, "grad_norm": NaN, "learning_rate": 2.8390596206418817e-05, "loss": 0.0, "step": 3854 }, { "epoch": 0.6483895383062821, "grad_norm": NaN, "learning_rate": 2.8366364629245057e-05, "loss": 0.0, "step": 3855 }, { "epoch": 0.6485577327390464, "grad_norm": NaN, "learning_rate": 2.8342139301786236e-05, "loss": 0.0, "step": 3856 }, { "epoch": 0.6487259271718107, "grad_norm": NaN, "learning_rate": 2.8317920231040796e-05, "loss": 0.0, "step": 3857 }, { "epoch": 0.6488941216045749, "grad_norm": NaN, "learning_rate": 2.829370742400534e-05, "loss": 0.0, "step": 3858 }, { "epoch": 0.6490623160373392, "grad_norm": NaN, "learning_rate": 2.8269500887674687e-05, "loss": 0.0, "step": 3859 }, { "epoch": 0.6492305104701035, "grad_norm": NaN, "learning_rate": 2.8245300629041838e-05, "loss": 0.0, "step": 3860 }, { "epoch": 0.6493987049028677, "grad_norm": NaN, "learning_rate": 2.822110665509796e-05, "loss": 0.0, "step": 3861 }, { "epoch": 0.6495668993356319, "grad_norm": NaN, "learning_rate": 2.819691897283242e-05, "loss": 0.0, "step": 3862 }, { "epoch": 0.6497350937683962, "grad_norm": NaN, "learning_rate": 2.81727375892328e-05, "loss": 0.0, "step": 3863 }, { "epoch": 0.6499032882011605, "grad_norm": NaN, "learning_rate": 2.814856251128481e-05, "loss": 0.0, "step": 3864 }, { "epoch": 0.6500714826339248, "grad_norm": NaN, "learning_rate": 2.8124393745972354e-05, "loss": 0.0, "step": 3865 }, { "epoch": 0.6502396770666891, "grad_norm": NaN, "learning_rate": 2.8100231300277514e-05, "loss": 0.0, "step": 3866 }, { "epoch": 0.6504078714994533, "grad_norm": NaN, "learning_rate": 2.8076075181180576e-05, "loss": 0.0, "step": 3867 }, { "epoch": 0.6505760659322176, "grad_norm": NaN, "learning_rate": 2.8051925395659955e-05, "loss": 0.0, "step": 3868 }, { "epoch": 0.6507442603649819, "grad_norm": NaN, "learning_rate": 2.8027781950692254e-05, "loss": 0.0, "step": 3869 }, { "epoch": 0.6509124547977462, "grad_norm": NaN, "learning_rate": 2.800364485325223e-05, "loss": 0.0, "step": 3870 }, { "epoch": 0.6510806492305105, "grad_norm": NaN, "learning_rate": 2.797951411031285e-05, "loss": 0.0, "step": 3871 }, { "epoch": 0.6512488436632747, "grad_norm": NaN, "learning_rate": 2.79553897288452e-05, "loss": 0.0, "step": 3872 }, { "epoch": 0.651417038096039, "grad_norm": NaN, "learning_rate": 2.793127171581854e-05, "loss": 0.0, "step": 3873 }, { "epoch": 0.6515852325288033, "grad_norm": NaN, "learning_rate": 2.7907160078200308e-05, "loss": 0.0, "step": 3874 }, { "epoch": 0.6517534269615676, "grad_norm": NaN, "learning_rate": 2.7883054822956068e-05, "loss": 0.0, "step": 3875 }, { "epoch": 0.6519216213943319, "grad_norm": NaN, "learning_rate": 2.7858955957049566e-05, "loss": 0.0, "step": 3876 }, { "epoch": 0.6520898158270961, "grad_norm": NaN, "learning_rate": 2.7834863487442693e-05, "loss": 0.0, "step": 3877 }, { "epoch": 0.6522580102598604, "grad_norm": NaN, "learning_rate": 2.781077742109548e-05, "loss": 0.0, "step": 3878 }, { "epoch": 0.6524262046926247, "grad_norm": NaN, "learning_rate": 2.7786697764966152e-05, "loss": 0.0, "step": 3879 }, { "epoch": 0.652594399125389, "grad_norm": NaN, "learning_rate": 2.7762624526011038e-05, "loss": 0.0, "step": 3880 }, { "epoch": 0.6527625935581532, "grad_norm": NaN, "learning_rate": 2.7738557711184622e-05, "loss": 0.0, "step": 3881 }, { "epoch": 0.6529307879909175, "grad_norm": NaN, "learning_rate": 2.7714497327439524e-05, "loss": 0.0, "step": 3882 }, { "epoch": 0.6530989824236818, "grad_norm": NaN, "learning_rate": 2.7690443381726554e-05, "loss": 0.0, "step": 3883 }, { "epoch": 0.6532671768564461, "grad_norm": NaN, "learning_rate": 2.766639588099461e-05, "loss": 0.0, "step": 3884 }, { "epoch": 0.6534353712892104, "grad_norm": NaN, "learning_rate": 2.7642354832190747e-05, "loss": 0.0, "step": 3885 }, { "epoch": 0.6536035657219746, "grad_norm": NaN, "learning_rate": 2.761832024226013e-05, "loss": 0.0, "step": 3886 }, { "epoch": 0.6537717601547389, "grad_norm": NaN, "learning_rate": 2.7594292118146137e-05, "loss": 0.0, "step": 3887 }, { "epoch": 0.6539399545875032, "grad_norm": NaN, "learning_rate": 2.757027046679018e-05, "loss": 0.0, "step": 3888 }, { "epoch": 0.6541081490202675, "grad_norm": NaN, "learning_rate": 2.754625529513186e-05, "loss": 0.0, "step": 3889 }, { "epoch": 0.6542763434530317, "grad_norm": NaN, "learning_rate": 2.752224661010887e-05, "loss": 0.0, "step": 3890 }, { "epoch": 0.6544445378857959, "grad_norm": NaN, "learning_rate": 2.7498244418657094e-05, "loss": 0.0, "step": 3891 }, { "epoch": 0.6546127323185602, "grad_norm": NaN, "learning_rate": 2.7474248727710467e-05, "loss": 0.0, "step": 3892 }, { "epoch": 0.6547809267513245, "grad_norm": NaN, "learning_rate": 2.745025954420108e-05, "loss": 0.0, "step": 3893 }, { "epoch": 0.6549491211840888, "grad_norm": NaN, "learning_rate": 2.7426276875059143e-05, "loss": 0.0, "step": 3894 }, { "epoch": 0.655117315616853, "grad_norm": NaN, "learning_rate": 2.740230072721297e-05, "loss": 0.0, "step": 3895 }, { "epoch": 0.6552855100496173, "grad_norm": NaN, "learning_rate": 2.7378331107589007e-05, "loss": 0.0, "step": 3896 }, { "epoch": 0.6554537044823816, "grad_norm": NaN, "learning_rate": 2.7354368023111822e-05, "loss": 0.0, "step": 3897 }, { "epoch": 0.6556218989151459, "grad_norm": NaN, "learning_rate": 2.7330411480704054e-05, "loss": 0.0, "step": 3898 }, { "epoch": 0.6557900933479102, "grad_norm": NaN, "learning_rate": 2.7306461487286484e-05, "loss": 0.0, "step": 3899 }, { "epoch": 0.6559582877806744, "grad_norm": NaN, "learning_rate": 2.7282518049778027e-05, "loss": 0.0, "step": 3900 }, { "epoch": 0.6561264822134387, "grad_norm": NaN, "learning_rate": 2.7258581175095654e-05, "loss": 0.0, "step": 3901 }, { "epoch": 0.656294676646203, "grad_norm": NaN, "learning_rate": 2.7234650870154465e-05, "loss": 0.0, "step": 3902 }, { "epoch": 0.6564628710789673, "grad_norm": NaN, "learning_rate": 2.721072714186763e-05, "loss": 0.0, "step": 3903 }, { "epoch": 0.6566310655117316, "grad_norm": NaN, "learning_rate": 2.718680999714649e-05, "loss": 0.0, "step": 3904 }, { "epoch": 0.6567992599444958, "grad_norm": NaN, "learning_rate": 2.7162899442900414e-05, "loss": 0.0, "step": 3905 }, { "epoch": 0.6569674543772601, "grad_norm": NaN, "learning_rate": 2.7138995486036906e-05, "loss": 0.0, "step": 3906 }, { "epoch": 0.6571356488100244, "grad_norm": NaN, "learning_rate": 2.711509813346152e-05, "loss": 0.0, "step": 3907 }, { "epoch": 0.6573038432427887, "grad_norm": NaN, "learning_rate": 2.7091207392077977e-05, "loss": 0.0, "step": 3908 }, { "epoch": 0.657472037675553, "grad_norm": NaN, "learning_rate": 2.7067323268788024e-05, "loss": 0.0, "step": 3909 }, { "epoch": 0.6576402321083172, "grad_norm": NaN, "learning_rate": 2.7043445770491526e-05, "loss": 0.0, "step": 3910 }, { "epoch": 0.6578084265410815, "grad_norm": NaN, "learning_rate": 2.7019574904086393e-05, "loss": 0.0, "step": 3911 }, { "epoch": 0.6579766209738458, "grad_norm": NaN, "learning_rate": 2.6995710676468695e-05, "loss": 0.0, "step": 3912 }, { "epoch": 0.6581448154066101, "grad_norm": NaN, "learning_rate": 2.697185309453252e-05, "loss": 0.0, "step": 3913 }, { "epoch": 0.6583130098393744, "grad_norm": NaN, "learning_rate": 2.694800216517005e-05, "loss": 0.0, "step": 3914 }, { "epoch": 0.6584812042721386, "grad_norm": NaN, "learning_rate": 2.6924157895271563e-05, "loss": 0.0, "step": 3915 }, { "epoch": 0.6586493987049029, "grad_norm": NaN, "learning_rate": 2.6900320291725396e-05, "loss": 0.0, "step": 3916 }, { "epoch": 0.6588175931376672, "grad_norm": NaN, "learning_rate": 2.687648936141796e-05, "loss": 0.0, "step": 3917 }, { "epoch": 0.6589857875704315, "grad_norm": NaN, "learning_rate": 2.6852665111233755e-05, "loss": 0.0, "step": 3918 }, { "epoch": 0.6591539820031957, "grad_norm": NaN, "learning_rate": 2.6828847548055313e-05, "loss": 0.0, "step": 3919 }, { "epoch": 0.6593221764359599, "grad_norm": NaN, "learning_rate": 2.6805036678763307e-05, "loss": 0.0, "step": 3920 }, { "epoch": 0.6594903708687242, "grad_norm": NaN, "learning_rate": 2.67812325102364e-05, "loss": 0.0, "step": 3921 }, { "epoch": 0.6596585653014885, "grad_norm": NaN, "learning_rate": 2.6757435049351353e-05, "loss": 0.0, "step": 3922 }, { "epoch": 0.6598267597342528, "grad_norm": NaN, "learning_rate": 2.6733644302982975e-05, "loss": 0.0, "step": 3923 }, { "epoch": 0.659994954167017, "grad_norm": NaN, "learning_rate": 2.6709860278004172e-05, "loss": 0.0, "step": 3924 }, { "epoch": 0.6601631485997813, "grad_norm": NaN, "learning_rate": 2.6686082981285876e-05, "loss": 0.0, "step": 3925 }, { "epoch": 0.6603313430325456, "grad_norm": NaN, "learning_rate": 2.6662312419697077e-05, "loss": 0.0, "step": 3926 }, { "epoch": 0.6604995374653099, "grad_norm": NaN, "learning_rate": 2.6638548600104805e-05, "loss": 0.0, "step": 3927 }, { "epoch": 0.6606677318980742, "grad_norm": NaN, "learning_rate": 2.6614791529374196e-05, "loss": 0.0, "step": 3928 }, { "epoch": 0.6608359263308384, "grad_norm": NaN, "learning_rate": 2.6591041214368385e-05, "loss": 0.0, "step": 3929 }, { "epoch": 0.6610041207636027, "grad_norm": NaN, "learning_rate": 2.6567297661948565e-05, "loss": 0.0, "step": 3930 }, { "epoch": 0.661172315196367, "grad_norm": NaN, "learning_rate": 2.6543560878973996e-05, "loss": 0.0, "step": 3931 }, { "epoch": 0.6613405096291313, "grad_norm": NaN, "learning_rate": 2.6519830872301965e-05, "loss": 0.0, "step": 3932 }, { "epoch": 0.6615087040618955, "grad_norm": NaN, "learning_rate": 2.6496107648787794e-05, "loss": 0.0, "step": 3933 }, { "epoch": 0.6616768984946598, "grad_norm": NaN, "learning_rate": 2.6472391215284868e-05, "loss": 0.0, "step": 3934 }, { "epoch": 0.6618450929274241, "grad_norm": NaN, "learning_rate": 2.644868157864458e-05, "loss": 0.0, "step": 3935 }, { "epoch": 0.6620132873601884, "grad_norm": NaN, "learning_rate": 2.642497874571641e-05, "loss": 0.0, "step": 3936 }, { "epoch": 0.6621814817929527, "grad_norm": NaN, "learning_rate": 2.6401282723347826e-05, "loss": 0.0, "step": 3937 }, { "epoch": 0.6623496762257169, "grad_norm": NaN, "learning_rate": 2.637759351838434e-05, "loss": 0.0, "step": 3938 }, { "epoch": 0.6625178706584812, "grad_norm": NaN, "learning_rate": 2.6353911137669483e-05, "loss": 0.0, "step": 3939 }, { "epoch": 0.6626860650912455, "grad_norm": NaN, "learning_rate": 2.6330235588044865e-05, "loss": 0.0, "step": 3940 }, { "epoch": 0.6628542595240098, "grad_norm": NaN, "learning_rate": 2.630656687635007e-05, "loss": 0.0, "step": 3941 }, { "epoch": 0.6630224539567741, "grad_norm": NaN, "learning_rate": 2.628290500942272e-05, "loss": 0.0, "step": 3942 }, { "epoch": 0.6631906483895383, "grad_norm": NaN, "learning_rate": 2.6259249994098455e-05, "loss": 0.0, "step": 3943 }, { "epoch": 0.6633588428223026, "grad_norm": NaN, "learning_rate": 2.6235601837210966e-05, "loss": 0.0, "step": 3944 }, { "epoch": 0.6635270372550669, "grad_norm": NaN, "learning_rate": 2.6211960545591936e-05, "loss": 0.0, "step": 3945 }, { "epoch": 0.6636952316878312, "grad_norm": NaN, "learning_rate": 2.618832612607106e-05, "loss": 0.0, "step": 3946 }, { "epoch": 0.6638634261205955, "grad_norm": NaN, "learning_rate": 2.6164698585476056e-05, "loss": 0.0, "step": 3947 }, { "epoch": 0.6640316205533597, "grad_norm": NaN, "learning_rate": 2.6141077930632646e-05, "loss": 0.0, "step": 3948 }, { "epoch": 0.6641998149861239, "grad_norm": NaN, "learning_rate": 2.6117464168364603e-05, "loss": 0.0, "step": 3949 }, { "epoch": 0.6643680094188882, "grad_norm": NaN, "learning_rate": 2.6093857305493664e-05, "loss": 0.0, "step": 3950 }, { "epoch": 0.6645362038516525, "grad_norm": NaN, "learning_rate": 2.607025734883959e-05, "loss": 0.0, "step": 3951 }, { "epoch": 0.6647043982844167, "grad_norm": NaN, "learning_rate": 2.604666430522013e-05, "loss": 0.0, "step": 3952 }, { "epoch": 0.664872592717181, "grad_norm": NaN, "learning_rate": 2.6023078181451067e-05, "loss": 0.0, "step": 3953 }, { "epoch": 0.6650407871499453, "grad_norm": NaN, "learning_rate": 2.5999498984346155e-05, "loss": 0.0, "step": 3954 }, { "epoch": 0.6652089815827096, "grad_norm": NaN, "learning_rate": 2.597592672071717e-05, "loss": 0.0, "step": 3955 }, { "epoch": 0.6653771760154739, "grad_norm": NaN, "learning_rate": 2.5952361397373848e-05, "loss": 0.0, "step": 3956 }, { "epoch": 0.6655453704482381, "grad_norm": NaN, "learning_rate": 2.592880302112399e-05, "loss": 0.0, "step": 3957 }, { "epoch": 0.6657135648810024, "grad_norm": NaN, "learning_rate": 2.5905251598773323e-05, "loss": 0.0, "step": 3958 }, { "epoch": 0.6658817593137667, "grad_norm": NaN, "learning_rate": 2.5881707137125594e-05, "loss": 0.0, "step": 3959 }, { "epoch": 0.666049953746531, "grad_norm": NaN, "learning_rate": 2.5858169642982515e-05, "loss": 0.0, "step": 3960 }, { "epoch": 0.6662181481792953, "grad_norm": NaN, "learning_rate": 2.5834639123143844e-05, "loss": 0.0, "step": 3961 }, { "epoch": 0.6663863426120595, "grad_norm": NaN, "learning_rate": 2.5811115584407253e-05, "loss": 0.0, "step": 3962 }, { "epoch": 0.6665545370448238, "grad_norm": NaN, "learning_rate": 2.578759903356845e-05, "loss": 0.0, "step": 3963 }, { "epoch": 0.6667227314775881, "grad_norm": NaN, "learning_rate": 2.5764089477421067e-05, "loss": 0.0, "step": 3964 }, { "epoch": 0.6668909259103524, "grad_norm": NaN, "learning_rate": 2.5740586922756792e-05, "loss": 0.0, "step": 3965 }, { "epoch": 0.6670591203431167, "grad_norm": NaN, "learning_rate": 2.5717091376365243e-05, "loss": 0.0, "step": 3966 }, { "epoch": 0.6672273147758809, "grad_norm": NaN, "learning_rate": 2.5693602845034003e-05, "loss": 0.0, "step": 3967 }, { "epoch": 0.6673955092086452, "grad_norm": NaN, "learning_rate": 2.5670121335548657e-05, "loss": 0.0, "step": 3968 }, { "epoch": 0.6675637036414095, "grad_norm": NaN, "learning_rate": 2.5646646854692746e-05, "loss": 0.0, "step": 3969 }, { "epoch": 0.6677318980741738, "grad_norm": NaN, "learning_rate": 2.562317940924779e-05, "loss": 0.0, "step": 3970 }, { "epoch": 0.667900092506938, "grad_norm": NaN, "learning_rate": 2.559971900599326e-05, "loss": 0.0, "step": 3971 }, { "epoch": 0.6680682869397023, "grad_norm": NaN, "learning_rate": 2.55762656517066e-05, "loss": 0.0, "step": 3972 }, { "epoch": 0.6682364813724666, "grad_norm": NaN, "learning_rate": 2.5552819353163244e-05, "loss": 0.0, "step": 3973 }, { "epoch": 0.6684046758052309, "grad_norm": NaN, "learning_rate": 2.5529380117136554e-05, "loss": 0.0, "step": 3974 }, { "epoch": 0.6685728702379952, "grad_norm": NaN, "learning_rate": 2.5505947950397863e-05, "loss": 0.0, "step": 3975 }, { "epoch": 0.6687410646707594, "grad_norm": NaN, "learning_rate": 2.548252285971644e-05, "loss": 0.0, "step": 3976 }, { "epoch": 0.6689092591035237, "grad_norm": NaN, "learning_rate": 2.545910485185957e-05, "loss": 0.0, "step": 3977 }, { "epoch": 0.669077453536288, "grad_norm": NaN, "learning_rate": 2.5435693933592432e-05, "loss": 0.0, "step": 3978 }, { "epoch": 0.6692456479690522, "grad_norm": NaN, "learning_rate": 2.5412290111678184e-05, "loss": 0.0, "step": 3979 }, { "epoch": 0.6694138424018165, "grad_norm": NaN, "learning_rate": 2.5388893392877904e-05, "loss": 0.0, "step": 3980 }, { "epoch": 0.6695820368345807, "grad_norm": NaN, "learning_rate": 2.5365503783950685e-05, "loss": 0.0, "step": 3981 }, { "epoch": 0.669750231267345, "grad_norm": NaN, "learning_rate": 2.5342121291653498e-05, "loss": 0.0, "step": 3982 }, { "epoch": 0.6699184257001093, "grad_norm": NaN, "learning_rate": 2.5318745922741282e-05, "loss": 0.0, "step": 3983 }, { "epoch": 0.6700866201328736, "grad_norm": NaN, "learning_rate": 2.529537768396691e-05, "loss": 0.0, "step": 3984 }, { "epoch": 0.6702548145656378, "grad_norm": NaN, "learning_rate": 2.5272016582081236e-05, "loss": 0.0, "step": 3985 }, { "epoch": 0.6704230089984021, "grad_norm": NaN, "learning_rate": 2.5248662623832997e-05, "loss": 0.0, "step": 3986 }, { "epoch": 0.6705912034311664, "grad_norm": NaN, "learning_rate": 2.5225315815968896e-05, "loss": 0.0, "step": 3987 }, { "epoch": 0.6707593978639307, "grad_norm": NaN, "learning_rate": 2.520197616523357e-05, "loss": 0.0, "step": 3988 }, { "epoch": 0.670927592296695, "grad_norm": NaN, "learning_rate": 2.5178643678369572e-05, "loss": 0.0, "step": 3989 }, { "epoch": 0.6710957867294592, "grad_norm": NaN, "learning_rate": 2.51553183621174e-05, "loss": 0.0, "step": 3990 }, { "epoch": 0.6712639811622235, "grad_norm": NaN, "learning_rate": 2.5132000223215478e-05, "loss": 0.0, "step": 3991 }, { "epoch": 0.6714321755949878, "grad_norm": NaN, "learning_rate": 2.5108689268400132e-05, "loss": 0.0, "step": 3992 }, { "epoch": 0.6716003700277521, "grad_norm": NaN, "learning_rate": 2.5085385504405678e-05, "loss": 0.0, "step": 3993 }, { "epoch": 0.6717685644605164, "grad_norm": NaN, "learning_rate": 2.506208893796429e-05, "loss": 0.0, "step": 3994 }, { "epoch": 0.6719367588932806, "grad_norm": NaN, "learning_rate": 2.5038799575806095e-05, "loss": 0.0, "step": 3995 }, { "epoch": 0.6721049533260449, "grad_norm": NaN, "learning_rate": 2.5015517424659095e-05, "loss": 0.0, "step": 3996 }, { "epoch": 0.6722731477588092, "grad_norm": NaN, "learning_rate": 2.499224249124929e-05, "loss": 0.0, "step": 3997 }, { "epoch": 0.6724413421915735, "grad_norm": NaN, "learning_rate": 2.4968974782300524e-05, "loss": 0.0, "step": 3998 }, { "epoch": 0.6726095366243378, "grad_norm": NaN, "learning_rate": 2.4945714304534585e-05, "loss": 0.0, "step": 3999 }, { "epoch": 0.672777731057102, "grad_norm": NaN, "learning_rate": 2.4922461064671156e-05, "loss": 0.0, "step": 4000 }, { "epoch": 0.6729459254898663, "grad_norm": NaN, "learning_rate": 2.4899215069427823e-05, "loss": 0.0, "step": 4001 }, { "epoch": 0.6731141199226306, "grad_norm": NaN, "learning_rate": 2.487597632552013e-05, "loss": 0.0, "step": 4002 }, { "epoch": 0.6732823143553949, "grad_norm": NaN, "learning_rate": 2.4852744839661467e-05, "loss": 0.0, "step": 4003 }, { "epoch": 0.6734505087881592, "grad_norm": NaN, "learning_rate": 2.4829520618563162e-05, "loss": 0.0, "step": 4004 }, { "epoch": 0.6736187032209234, "grad_norm": NaN, "learning_rate": 2.4806303668934422e-05, "loss": 0.0, "step": 4005 }, { "epoch": 0.6737868976536877, "grad_norm": NaN, "learning_rate": 2.4783093997482364e-05, "loss": 0.0, "step": 4006 }, { "epoch": 0.673955092086452, "grad_norm": NaN, "learning_rate": 2.4759891610912005e-05, "loss": 0.0, "step": 4007 }, { "epoch": 0.6741232865192163, "grad_norm": NaN, "learning_rate": 2.4736696515926256e-05, "loss": 0.0, "step": 4008 }, { "epoch": 0.6742914809519804, "grad_norm": NaN, "learning_rate": 2.4713508719225896e-05, "loss": 0.0, "step": 4009 }, { "epoch": 0.6744596753847447, "grad_norm": NaN, "learning_rate": 2.4690328227509668e-05, "loss": 0.0, "step": 4010 }, { "epoch": 0.674627869817509, "grad_norm": NaN, "learning_rate": 2.4667155047474127e-05, "loss": 0.0, "step": 4011 }, { "epoch": 0.6747960642502733, "grad_norm": NaN, "learning_rate": 2.464398918581375e-05, "loss": 0.0, "step": 4012 }, { "epoch": 0.6749642586830376, "grad_norm": NaN, "learning_rate": 2.4620830649220873e-05, "loss": 0.0, "step": 4013 }, { "epoch": 0.6751324531158018, "grad_norm": NaN, "learning_rate": 2.4597679444385772e-05, "loss": 0.0, "step": 4014 }, { "epoch": 0.6753006475485661, "grad_norm": NaN, "learning_rate": 2.4574535577996566e-05, "loss": 0.0, "step": 4015 }, { "epoch": 0.6754688419813304, "grad_norm": NaN, "learning_rate": 2.4551399056739245e-05, "loss": 0.0, "step": 4016 }, { "epoch": 0.6756370364140947, "grad_norm": NaN, "learning_rate": 2.4528269887297677e-05, "loss": 0.0, "step": 4017 }, { "epoch": 0.675805230846859, "grad_norm": NaN, "learning_rate": 2.4505148076353662e-05, "loss": 0.0, "step": 4018 }, { "epoch": 0.6759734252796232, "grad_norm": NaN, "learning_rate": 2.4482033630586804e-05, "loss": 0.0, "step": 4019 }, { "epoch": 0.6761416197123875, "grad_norm": NaN, "learning_rate": 2.4458926556674615e-05, "loss": 0.0, "step": 4020 }, { "epoch": 0.6763098141451518, "grad_norm": NaN, "learning_rate": 2.443582686129245e-05, "loss": 0.0, "step": 4021 }, { "epoch": 0.6764780085779161, "grad_norm": NaN, "learning_rate": 2.4412734551113585e-05, "loss": 0.0, "step": 4022 }, { "epoch": 0.6766462030106803, "grad_norm": NaN, "learning_rate": 2.4389649632809115e-05, "loss": 0.0, "step": 4023 }, { "epoch": 0.6768143974434446, "grad_norm": NaN, "learning_rate": 2.4366572113048014e-05, "loss": 0.0, "step": 4024 }, { "epoch": 0.6769825918762089, "grad_norm": NaN, "learning_rate": 2.434350199849712e-05, "loss": 0.0, "step": 4025 }, { "epoch": 0.6771507863089732, "grad_norm": NaN, "learning_rate": 2.432043929582113e-05, "loss": 0.0, "step": 4026 }, { "epoch": 0.6773189807417375, "grad_norm": NaN, "learning_rate": 2.4297384011682595e-05, "loss": 0.0, "step": 4027 }, { "epoch": 0.6774871751745017, "grad_norm": NaN, "learning_rate": 2.427433615274194e-05, "loss": 0.0, "step": 4028 }, { "epoch": 0.677655369607266, "grad_norm": NaN, "learning_rate": 2.4251295725657404e-05, "loss": 0.0, "step": 4029 }, { "epoch": 0.6778235640400303, "grad_norm": NaN, "learning_rate": 2.422826273708515e-05, "loss": 0.0, "step": 4030 }, { "epoch": 0.6779917584727946, "grad_norm": NaN, "learning_rate": 2.420523719367913e-05, "loss": 0.0, "step": 4031 }, { "epoch": 0.6781599529055589, "grad_norm": NaN, "learning_rate": 2.418221910209117e-05, "loss": 0.0, "step": 4032 }, { "epoch": 0.6783281473383231, "grad_norm": NaN, "learning_rate": 2.415920846897091e-05, "loss": 0.0, "step": 4033 }, { "epoch": 0.6784963417710874, "grad_norm": NaN, "learning_rate": 2.413620530096592e-05, "loss": 0.0, "step": 4034 }, { "epoch": 0.6786645362038517, "grad_norm": NaN, "learning_rate": 2.411320960472152e-05, "loss": 0.0, "step": 4035 }, { "epoch": 0.678832730636616, "grad_norm": NaN, "learning_rate": 2.409022138688092e-05, "loss": 0.0, "step": 4036 }, { "epoch": 0.6790009250693803, "grad_norm": NaN, "learning_rate": 2.4067240654085132e-05, "loss": 0.0, "step": 4037 }, { "epoch": 0.6791691195021444, "grad_norm": NaN, "learning_rate": 2.4044267412973078e-05, "loss": 0.0, "step": 4038 }, { "epoch": 0.6793373139349087, "grad_norm": NaN, "learning_rate": 2.4021301670181446e-05, "loss": 0.0, "step": 4039 }, { "epoch": 0.679505508367673, "grad_norm": NaN, "learning_rate": 2.3998343432344784e-05, "loss": 0.0, "step": 4040 }, { "epoch": 0.6796737028004373, "grad_norm": NaN, "learning_rate": 2.3975392706095446e-05, "loss": 0.0, "step": 4041 }, { "epoch": 0.6798418972332015, "grad_norm": NaN, "learning_rate": 2.3952449498063683e-05, "loss": 0.0, "step": 4042 }, { "epoch": 0.6800100916659658, "grad_norm": NaN, "learning_rate": 2.3929513814877514e-05, "loss": 0.0, "step": 4043 }, { "epoch": 0.6801782860987301, "grad_norm": NaN, "learning_rate": 2.39065856631628e-05, "loss": 0.0, "step": 4044 }, { "epoch": 0.6803464805314944, "grad_norm": NaN, "learning_rate": 2.388366504954322e-05, "loss": 0.0, "step": 4045 }, { "epoch": 0.6805146749642587, "grad_norm": NaN, "learning_rate": 2.386075198064029e-05, "loss": 0.0, "step": 4046 }, { "epoch": 0.6806828693970229, "grad_norm": NaN, "learning_rate": 2.383784646307334e-05, "loss": 0.0, "step": 4047 }, { "epoch": 0.6808510638297872, "grad_norm": NaN, "learning_rate": 2.3814948503459507e-05, "loss": 0.0, "step": 4048 }, { "epoch": 0.6810192582625515, "grad_norm": NaN, "learning_rate": 2.379205810841376e-05, "loss": 0.0, "step": 4049 }, { "epoch": 0.6811874526953158, "grad_norm": NaN, "learning_rate": 2.3769175284548868e-05, "loss": 0.0, "step": 4050 }, { "epoch": 0.68135564712808, "grad_norm": NaN, "learning_rate": 2.374630003847545e-05, "loss": 0.0, "step": 4051 }, { "epoch": 0.6815238415608443, "grad_norm": NaN, "learning_rate": 2.372343237680189e-05, "loss": 0.0, "step": 4052 }, { "epoch": 0.6816920359936086, "grad_norm": NaN, "learning_rate": 2.3700572306134412e-05, "loss": 0.0, "step": 4053 }, { "epoch": 0.6818602304263729, "grad_norm": NaN, "learning_rate": 2.3677719833076996e-05, "loss": 0.0, "step": 4054 }, { "epoch": 0.6820284248591372, "grad_norm": NaN, "learning_rate": 2.3654874964231518e-05, "loss": 0.0, "step": 4055 }, { "epoch": 0.6821966192919015, "grad_norm": NaN, "learning_rate": 2.3632037706197573e-05, "loss": 0.0, "step": 4056 }, { "epoch": 0.6823648137246657, "grad_norm": NaN, "learning_rate": 2.36092080655726e-05, "loss": 0.0, "step": 4057 }, { "epoch": 0.68253300815743, "grad_norm": NaN, "learning_rate": 2.358638604895181e-05, "loss": 0.0, "step": 4058 }, { "epoch": 0.6827012025901943, "grad_norm": NaN, "learning_rate": 2.3563571662928252e-05, "loss": 0.0, "step": 4059 }, { "epoch": 0.6828693970229586, "grad_norm": NaN, "learning_rate": 2.354076491409274e-05, "loss": 0.0, "step": 4060 }, { "epoch": 0.6830375914557228, "grad_norm": NaN, "learning_rate": 2.3517965809033882e-05, "loss": 0.0, "step": 4061 }, { "epoch": 0.6832057858884871, "grad_norm": NaN, "learning_rate": 2.3495174354338084e-05, "loss": 0.0, "step": 4062 }, { "epoch": 0.6833739803212514, "grad_norm": NaN, "learning_rate": 2.3472390556589546e-05, "loss": 0.0, "step": 4063 }, { "epoch": 0.6835421747540157, "grad_norm": NaN, "learning_rate": 2.3449614422370253e-05, "loss": 0.0, "step": 4064 }, { "epoch": 0.68371036918678, "grad_norm": NaN, "learning_rate": 2.3426845958259968e-05, "loss": 0.0, "step": 4065 }, { "epoch": 0.6838785636195442, "grad_norm": NaN, "learning_rate": 2.3404085170836233e-05, "loss": 0.0, "step": 4066 }, { "epoch": 0.6840467580523085, "grad_norm": NaN, "learning_rate": 2.3381332066674422e-05, "loss": 0.0, "step": 4067 }, { "epoch": 0.6842149524850727, "grad_norm": NaN, "learning_rate": 2.3358586652347624e-05, "loss": 0.0, "step": 4068 }, { "epoch": 0.684383146917837, "grad_norm": NaN, "learning_rate": 2.3335848934426746e-05, "loss": 0.0, "step": 4069 }, { "epoch": 0.6845513413506013, "grad_norm": NaN, "learning_rate": 2.3313118919480436e-05, "loss": 0.0, "step": 4070 }, { "epoch": 0.6847195357833655, "grad_norm": NaN, "learning_rate": 2.329039661407518e-05, "loss": 0.0, "step": 4071 }, { "epoch": 0.6848877302161298, "grad_norm": NaN, "learning_rate": 2.3267682024775173e-05, "loss": 0.0, "step": 4072 }, { "epoch": 0.6850559246488941, "grad_norm": NaN, "learning_rate": 2.3244975158142413e-05, "loss": 0.0, "step": 4073 }, { "epoch": 0.6852241190816584, "grad_norm": NaN, "learning_rate": 2.3222276020736634e-05, "loss": 0.0, "step": 4074 }, { "epoch": 0.6853923135144226, "grad_norm": NaN, "learning_rate": 2.3199584619115406e-05, "loss": 0.0, "step": 4075 }, { "epoch": 0.6855605079471869, "grad_norm": NaN, "learning_rate": 2.3176900959834004e-05, "loss": 0.0, "step": 4076 }, { "epoch": 0.6857287023799512, "grad_norm": NaN, "learning_rate": 2.315422504944547e-05, "loss": 0.0, "step": 4077 }, { "epoch": 0.6858968968127155, "grad_norm": NaN, "learning_rate": 2.3131556894500616e-05, "loss": 0.0, "step": 4078 }, { "epoch": 0.6860650912454798, "grad_norm": NaN, "learning_rate": 2.310889650154805e-05, "loss": 0.0, "step": 4079 }, { "epoch": 0.686233285678244, "grad_norm": NaN, "learning_rate": 2.308624387713409e-05, "loss": 0.0, "step": 4080 }, { "epoch": 0.6864014801110083, "grad_norm": NaN, "learning_rate": 2.3063599027802818e-05, "loss": 0.0, "step": 4081 }, { "epoch": 0.6865696745437726, "grad_norm": NaN, "learning_rate": 2.3040961960096086e-05, "loss": 0.0, "step": 4082 }, { "epoch": 0.6867378689765369, "grad_norm": NaN, "learning_rate": 2.3018332680553477e-05, "loss": 0.0, "step": 4083 }, { "epoch": 0.6869060634093012, "grad_norm": NaN, "learning_rate": 2.2995711195712354e-05, "loss": 0.0, "step": 4084 }, { "epoch": 0.6870742578420654, "grad_norm": NaN, "learning_rate": 2.2973097512107796e-05, "loss": 0.0, "step": 4085 }, { "epoch": 0.6872424522748297, "grad_norm": NaN, "learning_rate": 2.2950491636272632e-05, "loss": 0.0, "step": 4086 }, { "epoch": 0.687410646707594, "grad_norm": NaN, "learning_rate": 2.292789357473748e-05, "loss": 0.0, "step": 4087 }, { "epoch": 0.6875788411403583, "grad_norm": NaN, "learning_rate": 2.290530333403065e-05, "loss": 0.0, "step": 4088 }, { "epoch": 0.6877470355731226, "grad_norm": NaN, "learning_rate": 2.28827209206782e-05, "loss": 0.0, "step": 4089 }, { "epoch": 0.6879152300058868, "grad_norm": NaN, "learning_rate": 2.2860146341203937e-05, "loss": 0.0, "step": 4090 }, { "epoch": 0.6880834244386511, "grad_norm": NaN, "learning_rate": 2.2837579602129426e-05, "loss": 0.0, "step": 4091 }, { "epoch": 0.6882516188714154, "grad_norm": NaN, "learning_rate": 2.281502070997394e-05, "loss": 0.0, "step": 4092 }, { "epoch": 0.6884198133041797, "grad_norm": NaN, "learning_rate": 2.2792469671254475e-05, "loss": 0.0, "step": 4093 }, { "epoch": 0.688588007736944, "grad_norm": NaN, "learning_rate": 2.276992649248576e-05, "loss": 0.0, "step": 4094 }, { "epoch": 0.6887562021697082, "grad_norm": NaN, "learning_rate": 2.2747391180180307e-05, "loss": 0.0, "step": 4095 }, { "epoch": 0.6889243966024725, "grad_norm": NaN, "learning_rate": 2.2724863740848296e-05, "loss": 0.0, "step": 4096 }, { "epoch": 0.6890925910352367, "grad_norm": NaN, "learning_rate": 2.2702344180997647e-05, "loss": 0.0, "step": 4097 }, { "epoch": 0.689260785468001, "grad_norm": NaN, "learning_rate": 2.2679832507134018e-05, "loss": 0.0, "step": 4098 }, { "epoch": 0.6894289799007652, "grad_norm": NaN, "learning_rate": 2.2657328725760774e-05, "loss": 0.0, "step": 4099 }, { "epoch": 0.6895971743335295, "grad_norm": NaN, "learning_rate": 2.2634832843379006e-05, "loss": 0.0, "step": 4100 }, { "epoch": 0.6897653687662938, "grad_norm": NaN, "learning_rate": 2.2612344866487523e-05, "loss": 0.0, "step": 4101 }, { "epoch": 0.6899335631990581, "grad_norm": NaN, "learning_rate": 2.2589864801582843e-05, "loss": 0.0, "step": 4102 }, { "epoch": 0.6901017576318224, "grad_norm": NaN, "learning_rate": 2.2567392655159197e-05, "loss": 0.0, "step": 4103 }, { "epoch": 0.6902699520645866, "grad_norm": NaN, "learning_rate": 2.254492843370857e-05, "loss": 0.0, "step": 4104 }, { "epoch": 0.6904381464973509, "grad_norm": NaN, "learning_rate": 2.2522472143720597e-05, "loss": 0.0, "step": 4105 }, { "epoch": 0.6906063409301152, "grad_norm": NaN, "learning_rate": 2.2500023791682662e-05, "loss": 0.0, "step": 4106 }, { "epoch": 0.6907745353628795, "grad_norm": NaN, "learning_rate": 2.2477583384079816e-05, "loss": 0.0, "step": 4107 }, { "epoch": 0.6909427297956438, "grad_norm": NaN, "learning_rate": 2.245515092739488e-05, "loss": 0.0, "step": 4108 }, { "epoch": 0.691110924228408, "grad_norm": NaN, "learning_rate": 2.2432726428108318e-05, "loss": 0.0, "step": 4109 }, { "epoch": 0.6912791186611723, "grad_norm": NaN, "learning_rate": 2.2410309892698322e-05, "loss": 0.0, "step": 4110 }, { "epoch": 0.6914473130939366, "grad_norm": NaN, "learning_rate": 2.238790132764076e-05, "loss": 0.0, "step": 4111 }, { "epoch": 0.6916155075267009, "grad_norm": NaN, "learning_rate": 2.2365500739409244e-05, "loss": 0.0, "step": 4112 }, { "epoch": 0.6917837019594651, "grad_norm": NaN, "learning_rate": 2.234310813447505e-05, "loss": 0.0, "step": 4113 }, { "epoch": 0.6919518963922294, "grad_norm": NaN, "learning_rate": 2.2320723519307136e-05, "loss": 0.0, "step": 4114 }, { "epoch": 0.6921200908249937, "grad_norm": NaN, "learning_rate": 2.2298346900372146e-05, "loss": 0.0, "step": 4115 }, { "epoch": 0.692288285257758, "grad_norm": NaN, "learning_rate": 2.2275978284134486e-05, "loss": 0.0, "step": 4116 }, { "epoch": 0.6924564796905223, "grad_norm": NaN, "learning_rate": 2.2253617677056172e-05, "loss": 0.0, "step": 4117 }, { "epoch": 0.6926246741232865, "grad_norm": NaN, "learning_rate": 2.2231265085596938e-05, "loss": 0.0, "step": 4118 }, { "epoch": 0.6927928685560508, "grad_norm": NaN, "learning_rate": 2.220892051621419e-05, "loss": 0.0, "step": 4119 }, { "epoch": 0.6929610629888151, "grad_norm": NaN, "learning_rate": 2.2186583975363022e-05, "loss": 0.0, "step": 4120 }, { "epoch": 0.6931292574215794, "grad_norm": NaN, "learning_rate": 2.216425546949622e-05, "loss": 0.0, "step": 4121 }, { "epoch": 0.6932974518543437, "grad_norm": NaN, "learning_rate": 2.214193500506423e-05, "loss": 0.0, "step": 4122 }, { "epoch": 0.6934656462871079, "grad_norm": NaN, "learning_rate": 2.2119622588515176e-05, "loss": 0.0, "step": 4123 }, { "epoch": 0.6936338407198722, "grad_norm": NaN, "learning_rate": 2.2097318226294893e-05, "loss": 0.0, "step": 4124 }, { "epoch": 0.6938020351526365, "grad_norm": NaN, "learning_rate": 2.207502192484685e-05, "loss": 0.0, "step": 4125 }, { "epoch": 0.6939702295854008, "grad_norm": NaN, "learning_rate": 2.2052733690612193e-05, "loss": 0.0, "step": 4126 }, { "epoch": 0.694138424018165, "grad_norm": NaN, "learning_rate": 2.2030453530029725e-05, "loss": 0.0, "step": 4127 }, { "epoch": 0.6943066184509292, "grad_norm": NaN, "learning_rate": 2.2008181449535976e-05, "loss": 0.0, "step": 4128 }, { "epoch": 0.6944748128836935, "grad_norm": NaN, "learning_rate": 2.1985917455565082e-05, "loss": 0.0, "step": 4129 }, { "epoch": 0.6946430073164578, "grad_norm": NaN, "learning_rate": 2.1963661554548853e-05, "loss": 0.0, "step": 4130 }, { "epoch": 0.6948112017492221, "grad_norm": NaN, "learning_rate": 2.194141375291676e-05, "loss": 0.0, "step": 4131 }, { "epoch": 0.6949793961819863, "grad_norm": NaN, "learning_rate": 2.191917405709598e-05, "loss": 0.0, "step": 4132 }, { "epoch": 0.6951475906147506, "grad_norm": NaN, "learning_rate": 2.1896942473511294e-05, "loss": 0.0, "step": 4133 }, { "epoch": 0.6953157850475149, "grad_norm": NaN, "learning_rate": 2.1874719008585154e-05, "loss": 0.0, "step": 4134 }, { "epoch": 0.6954839794802792, "grad_norm": NaN, "learning_rate": 2.185250366873765e-05, "loss": 0.0, "step": 4135 }, { "epoch": 0.6956521739130435, "grad_norm": NaN, "learning_rate": 2.1830296460386584e-05, "loss": 0.0, "step": 4136 }, { "epoch": 0.6958203683458077, "grad_norm": NaN, "learning_rate": 2.1808097389947356e-05, "loss": 0.0, "step": 4137 }, { "epoch": 0.695988562778572, "grad_norm": NaN, "learning_rate": 2.1785906463833023e-05, "loss": 0.0, "step": 4138 }, { "epoch": 0.6961567572113363, "grad_norm": NaN, "learning_rate": 2.1763723688454298e-05, "loss": 0.0, "step": 4139 }, { "epoch": 0.6963249516441006, "grad_norm": NaN, "learning_rate": 2.174154907021953e-05, "loss": 0.0, "step": 4140 }, { "epoch": 0.6964931460768649, "grad_norm": NaN, "learning_rate": 2.171938261553472e-05, "loss": 0.0, "step": 4141 }, { "epoch": 0.6966613405096291, "grad_norm": NaN, "learning_rate": 2.1697224330803522e-05, "loss": 0.0, "step": 4142 }, { "epoch": 0.6968295349423934, "grad_norm": NaN, "learning_rate": 2.1675074222427183e-05, "loss": 0.0, "step": 4143 }, { "epoch": 0.6969977293751577, "grad_norm": NaN, "learning_rate": 2.1652932296804663e-05, "loss": 0.0, "step": 4144 }, { "epoch": 0.697165923807922, "grad_norm": NaN, "learning_rate": 2.1630798560332492e-05, "loss": 0.0, "step": 4145 }, { "epoch": 0.6973341182406863, "grad_norm": NaN, "learning_rate": 2.160867301940487e-05, "loss": 0.0, "step": 4146 }, { "epoch": 0.6975023126734505, "grad_norm": NaN, "learning_rate": 2.1586555680413606e-05, "loss": 0.0, "step": 4147 }, { "epoch": 0.6976705071062148, "grad_norm": NaN, "learning_rate": 2.156444654974814e-05, "loss": 0.0, "step": 4148 }, { "epoch": 0.6978387015389791, "grad_norm": NaN, "learning_rate": 2.154234563379559e-05, "loss": 0.0, "step": 4149 }, { "epoch": 0.6980068959717434, "grad_norm": NaN, "learning_rate": 2.1520252938940643e-05, "loss": 0.0, "step": 4150 }, { "epoch": 0.6981750904045076, "grad_norm": NaN, "learning_rate": 2.1498168471565627e-05, "loss": 0.0, "step": 4151 }, { "epoch": 0.6983432848372719, "grad_norm": NaN, "learning_rate": 2.147609223805049e-05, "loss": 0.0, "step": 4152 }, { "epoch": 0.6985114792700362, "grad_norm": NaN, "learning_rate": 2.145402424477283e-05, "loss": 0.0, "step": 4153 }, { "epoch": 0.6986796737028005, "grad_norm": NaN, "learning_rate": 2.143196449810783e-05, "loss": 0.0, "step": 4154 }, { "epoch": 0.6988478681355648, "grad_norm": NaN, "learning_rate": 2.140991300442831e-05, "loss": 0.0, "step": 4155 }, { "epoch": 0.6990160625683289, "grad_norm": NaN, "learning_rate": 2.13878697701047e-05, "loss": 0.0, "step": 4156 }, { "epoch": 0.6991842570010932, "grad_norm": NaN, "learning_rate": 2.1365834801505036e-05, "loss": 0.0, "step": 4157 }, { "epoch": 0.6993524514338575, "grad_norm": NaN, "learning_rate": 2.134380810499497e-05, "loss": 0.0, "step": 4158 }, { "epoch": 0.6995206458666218, "grad_norm": NaN, "learning_rate": 2.1321789686937787e-05, "loss": 0.0, "step": 4159 }, { "epoch": 0.699688840299386, "grad_norm": NaN, "learning_rate": 2.1299779553694323e-05, "loss": 0.0, "step": 4160 }, { "epoch": 0.6998570347321503, "grad_norm": NaN, "learning_rate": 2.1277777711623104e-05, "loss": 0.0, "step": 4161 }, { "epoch": 0.7000252291649146, "grad_norm": NaN, "learning_rate": 2.12557841670802e-05, "loss": 0.0, "step": 4162 }, { "epoch": 0.7001934235976789, "grad_norm": NaN, "learning_rate": 2.1233798926419295e-05, "loss": 0.0, "step": 4163 }, { "epoch": 0.7003616180304432, "grad_norm": NaN, "learning_rate": 2.121182199599166e-05, "loss": 0.0, "step": 4164 }, { "epoch": 0.7005298124632074, "grad_norm": NaN, "learning_rate": 2.1189853382146226e-05, "loss": 0.0, "step": 4165 }, { "epoch": 0.7006980068959717, "grad_norm": NaN, "learning_rate": 2.1167893091229452e-05, "loss": 0.0, "step": 4166 }, { "epoch": 0.700866201328736, "grad_norm": NaN, "learning_rate": 2.1145941129585435e-05, "loss": 0.0, "step": 4167 }, { "epoch": 0.7010343957615003, "grad_norm": NaN, "learning_rate": 2.1123997503555813e-05, "loss": 0.0, "step": 4168 }, { "epoch": 0.7012025901942646, "grad_norm": NaN, "learning_rate": 2.1102062219479906e-05, "loss": 0.0, "step": 4169 }, { "epoch": 0.7013707846270288, "grad_norm": NaN, "learning_rate": 2.1080135283694547e-05, "loss": 0.0, "step": 4170 }, { "epoch": 0.7015389790597931, "grad_norm": NaN, "learning_rate": 2.105821670253419e-05, "loss": 0.0, "step": 4171 }, { "epoch": 0.7017071734925574, "grad_norm": NaN, "learning_rate": 2.1036306482330837e-05, "loss": 0.0, "step": 4172 }, { "epoch": 0.7018753679253217, "grad_norm": NaN, "learning_rate": 2.1014404629414148e-05, "loss": 0.0, "step": 4173 }, { "epoch": 0.702043562358086, "grad_norm": NaN, "learning_rate": 2.09925111501113e-05, "loss": 0.0, "step": 4174 }, { "epoch": 0.7022117567908502, "grad_norm": NaN, "learning_rate": 2.0970626050747082e-05, "loss": 0.0, "step": 4175 }, { "epoch": 0.7023799512236145, "grad_norm": NaN, "learning_rate": 2.0948749337643852e-05, "loss": 0.0, "step": 4176 }, { "epoch": 0.7025481456563788, "grad_norm": NaN, "learning_rate": 2.0926881017121547e-05, "loss": 0.0, "step": 4177 }, { "epoch": 0.7027163400891431, "grad_norm": NaN, "learning_rate": 2.0905021095497674e-05, "loss": 0.0, "step": 4178 }, { "epoch": 0.7028845345219074, "grad_norm": NaN, "learning_rate": 2.0883169579087324e-05, "loss": 0.0, "step": 4179 }, { "epoch": 0.7030527289546716, "grad_norm": NaN, "learning_rate": 2.0861326474203143e-05, "loss": 0.0, "step": 4180 }, { "epoch": 0.7032209233874359, "grad_norm": NaN, "learning_rate": 2.0839491787155387e-05, "loss": 0.0, "step": 4181 }, { "epoch": 0.7033891178202002, "grad_norm": NaN, "learning_rate": 2.0817665524251835e-05, "loss": 0.0, "step": 4182 }, { "epoch": 0.7035573122529645, "grad_norm": NaN, "learning_rate": 2.0795847691797853e-05, "loss": 0.0, "step": 4183 }, { "epoch": 0.7037255066857288, "grad_norm": NaN, "learning_rate": 2.0774038296096347e-05, "loss": 0.0, "step": 4184 }, { "epoch": 0.703893701118493, "grad_norm": NaN, "learning_rate": 2.075223734344785e-05, "loss": 0.0, "step": 4185 }, { "epoch": 0.7040618955512572, "grad_norm": NaN, "learning_rate": 2.0730444840150392e-05, "loss": 0.0, "step": 4186 }, { "epoch": 0.7042300899840215, "grad_norm": NaN, "learning_rate": 2.0708660792499578e-05, "loss": 0.0, "step": 4187 }, { "epoch": 0.7043982844167858, "grad_norm": NaN, "learning_rate": 2.0686885206788565e-05, "loss": 0.0, "step": 4188 }, { "epoch": 0.70456647884955, "grad_norm": NaN, "learning_rate": 2.0665118089308107e-05, "loss": 0.0, "step": 4189 }, { "epoch": 0.7047346732823143, "grad_norm": NaN, "learning_rate": 2.0643359446346462e-05, "loss": 0.0, "step": 4190 }, { "epoch": 0.7049028677150786, "grad_norm": NaN, "learning_rate": 2.0621609284189458e-05, "loss": 0.0, "step": 4191 }, { "epoch": 0.7050710621478429, "grad_norm": NaN, "learning_rate": 2.0599867609120477e-05, "loss": 0.0, "step": 4192 }, { "epoch": 0.7052392565806072, "grad_norm": NaN, "learning_rate": 2.0578134427420443e-05, "loss": 0.0, "step": 4193 }, { "epoch": 0.7054074510133714, "grad_norm": NaN, "learning_rate": 2.0556409745367828e-05, "loss": 0.0, "step": 4194 }, { "epoch": 0.7055756454461357, "grad_norm": NaN, "learning_rate": 2.053469356923865e-05, "loss": 0.0, "step": 4195 }, { "epoch": 0.7057438398789, "grad_norm": NaN, "learning_rate": 2.0512985905306453e-05, "loss": 0.0, "step": 4196 }, { "epoch": 0.7059120343116643, "grad_norm": NaN, "learning_rate": 2.0491286759842377e-05, "loss": 0.0, "step": 4197 }, { "epoch": 0.7060802287444286, "grad_norm": NaN, "learning_rate": 2.046959613911503e-05, "loss": 0.0, "step": 4198 }, { "epoch": 0.7062484231771928, "grad_norm": NaN, "learning_rate": 2.044791404939061e-05, "loss": 0.0, "step": 4199 }, { "epoch": 0.7064166176099571, "grad_norm": NaN, "learning_rate": 2.0426240496932814e-05, "loss": 0.0, "step": 4200 }, { "epoch": 0.7065848120427214, "grad_norm": NaN, "learning_rate": 2.0404575488002873e-05, "loss": 0.0, "step": 4201 }, { "epoch": 0.7067530064754857, "grad_norm": NaN, "learning_rate": 2.0382919028859605e-05, "loss": 0.0, "step": 4202 }, { "epoch": 0.70692120090825, "grad_norm": NaN, "learning_rate": 2.0361271125759302e-05, "loss": 0.0, "step": 4203 }, { "epoch": 0.7070893953410142, "grad_norm": NaN, "learning_rate": 2.0339631784955792e-05, "loss": 0.0, "step": 4204 }, { "epoch": 0.7072575897737785, "grad_norm": NaN, "learning_rate": 2.0318001012700433e-05, "loss": 0.0, "step": 4205 }, { "epoch": 0.7074257842065428, "grad_norm": NaN, "learning_rate": 2.0296378815242135e-05, "loss": 0.0, "step": 4206 }, { "epoch": 0.7075939786393071, "grad_norm": NaN, "learning_rate": 2.0274765198827302e-05, "loss": 0.0, "step": 4207 }, { "epoch": 0.7077621730720713, "grad_norm": NaN, "learning_rate": 2.0253160169699858e-05, "loss": 0.0, "step": 4208 }, { "epoch": 0.7079303675048356, "grad_norm": NaN, "learning_rate": 2.0231563734101243e-05, "loss": 0.0, "step": 4209 }, { "epoch": 0.7080985619375999, "grad_norm": NaN, "learning_rate": 2.0209975898270454e-05, "loss": 0.0, "step": 4210 }, { "epoch": 0.7082667563703642, "grad_norm": NaN, "learning_rate": 2.0188396668443965e-05, "loss": 0.0, "step": 4211 }, { "epoch": 0.7084349508031285, "grad_norm": NaN, "learning_rate": 2.016682605085577e-05, "loss": 0.0, "step": 4212 }, { "epoch": 0.7086031452358927, "grad_norm": NaN, "learning_rate": 2.014526405173738e-05, "loss": 0.0, "step": 4213 }, { "epoch": 0.708771339668657, "grad_norm": NaN, "learning_rate": 2.0123710677317803e-05, "loss": 0.0, "step": 4214 }, { "epoch": 0.7089395341014213, "grad_norm": NaN, "learning_rate": 2.0102165933823585e-05, "loss": 0.0, "step": 4215 }, { "epoch": 0.7091077285341855, "grad_norm": NaN, "learning_rate": 2.0080629827478753e-05, "loss": 0.0, "step": 4216 }, { "epoch": 0.7092759229669497, "grad_norm": NaN, "learning_rate": 2.0059102364504828e-05, "loss": 0.0, "step": 4217 }, { "epoch": 0.709444117399714, "grad_norm": NaN, "learning_rate": 2.0037583551120892e-05, "loss": 0.0, "step": 4218 }, { "epoch": 0.7096123118324783, "grad_norm": NaN, "learning_rate": 2.0016073393543473e-05, "loss": 0.0, "step": 4219 }, { "epoch": 0.7097805062652426, "grad_norm": NaN, "learning_rate": 1.99945718979866e-05, "loss": 0.0, "step": 4220 }, { "epoch": 0.7099487006980069, "grad_norm": NaN, "learning_rate": 1.9973079070661822e-05, "loss": 0.0, "step": 4221 }, { "epoch": 0.7101168951307711, "grad_norm": NaN, "learning_rate": 1.9951594917778187e-05, "loss": 0.0, "step": 4222 }, { "epoch": 0.7102850895635354, "grad_norm": NaN, "learning_rate": 1.9930119445542227e-05, "loss": 0.0, "step": 4223 }, { "epoch": 0.7104532839962997, "grad_norm": NaN, "learning_rate": 1.990865266015795e-05, "loss": 0.0, "step": 4224 }, { "epoch": 0.710621478429064, "grad_norm": NaN, "learning_rate": 1.9887194567826868e-05, "loss": 0.0, "step": 4225 }, { "epoch": 0.7107896728618283, "grad_norm": NaN, "learning_rate": 1.9865745174748e-05, "loss": 0.0, "step": 4226 }, { "epoch": 0.7109578672945925, "grad_norm": NaN, "learning_rate": 1.9844304487117836e-05, "loss": 0.0, "step": 4227 }, { "epoch": 0.7111260617273568, "grad_norm": NaN, "learning_rate": 1.9822872511130336e-05, "loss": 0.0, "step": 4228 }, { "epoch": 0.7112942561601211, "grad_norm": NaN, "learning_rate": 1.980144925297695e-05, "loss": 0.0, "step": 4229 }, { "epoch": 0.7114624505928854, "grad_norm": NaN, "learning_rate": 1.978003471884665e-05, "loss": 0.0, "step": 4230 }, { "epoch": 0.7116306450256497, "grad_norm": NaN, "learning_rate": 1.9758628914925837e-05, "loss": 0.0, "step": 4231 }, { "epoch": 0.7117988394584139, "grad_norm": NaN, "learning_rate": 1.9737231847398408e-05, "loss": 0.0, "step": 4232 }, { "epoch": 0.7119670338911782, "grad_norm": NaN, "learning_rate": 1.9715843522445737e-05, "loss": 0.0, "step": 4233 }, { "epoch": 0.7121352283239425, "grad_norm": NaN, "learning_rate": 1.9694463946246667e-05, "loss": 0.0, "step": 4234 }, { "epoch": 0.7123034227567068, "grad_norm": NaN, "learning_rate": 1.967309312497752e-05, "loss": 0.0, "step": 4235 }, { "epoch": 0.712471617189471, "grad_norm": NaN, "learning_rate": 1.965173106481209e-05, "loss": 0.0, "step": 4236 }, { "epoch": 0.7126398116222353, "grad_norm": NaN, "learning_rate": 1.963037777192162e-05, "loss": 0.0, "step": 4237 }, { "epoch": 0.7128080060549996, "grad_norm": NaN, "learning_rate": 1.960903325247487e-05, "loss": 0.0, "step": 4238 }, { "epoch": 0.7129762004877639, "grad_norm": NaN, "learning_rate": 1.9587697512638015e-05, "loss": 0.0, "step": 4239 }, { "epoch": 0.7131443949205282, "grad_norm": NaN, "learning_rate": 1.956637055857471e-05, "loss": 0.0, "step": 4240 }, { "epoch": 0.7133125893532924, "grad_norm": NaN, "learning_rate": 1.9545052396446055e-05, "loss": 0.0, "step": 4241 }, { "epoch": 0.7134807837860567, "grad_norm": NaN, "learning_rate": 1.9523743032410667e-05, "loss": 0.0, "step": 4242 }, { "epoch": 0.713648978218821, "grad_norm": NaN, "learning_rate": 1.9502442472624567e-05, "loss": 0.0, "step": 4243 }, { "epoch": 0.7138171726515853, "grad_norm": NaN, "learning_rate": 1.9481150723241236e-05, "loss": 0.0, "step": 4244 }, { "epoch": 0.7139853670843495, "grad_norm": NaN, "learning_rate": 1.9459867790411617e-05, "loss": 0.0, "step": 4245 }, { "epoch": 0.7141535615171137, "grad_norm": NaN, "learning_rate": 1.9438593680284145e-05, "loss": 0.0, "step": 4246 }, { "epoch": 0.714321755949878, "grad_norm": NaN, "learning_rate": 1.9417328399004646e-05, "loss": 0.0, "step": 4247 }, { "epoch": 0.7144899503826423, "grad_norm": NaN, "learning_rate": 1.939607195271643e-05, "loss": 0.0, "step": 4248 }, { "epoch": 0.7146581448154066, "grad_norm": NaN, "learning_rate": 1.9374824347560243e-05, "loss": 0.0, "step": 4249 }, { "epoch": 0.7148263392481709, "grad_norm": NaN, "learning_rate": 1.9353585589674285e-05, "loss": 0.0, "step": 4250 }, { "epoch": 0.7149945336809351, "grad_norm": NaN, "learning_rate": 1.9332355685194182e-05, "loss": 0.0, "step": 4251 }, { "epoch": 0.7151627281136994, "grad_norm": NaN, "learning_rate": 1.931113464025303e-05, "loss": 0.0, "step": 4252 }, { "epoch": 0.7153309225464637, "grad_norm": NaN, "learning_rate": 1.928992246098134e-05, "loss": 0.0, "step": 4253 }, { "epoch": 0.715499116979228, "grad_norm": NaN, "learning_rate": 1.926871915350707e-05, "loss": 0.0, "step": 4254 }, { "epoch": 0.7156673114119922, "grad_norm": NaN, "learning_rate": 1.9247524723955635e-05, "loss": 0.0, "step": 4255 }, { "epoch": 0.7158355058447565, "grad_norm": NaN, "learning_rate": 1.9226339178449865e-05, "loss": 0.0, "step": 4256 }, { "epoch": 0.7160037002775208, "grad_norm": NaN, "learning_rate": 1.920516252311002e-05, "loss": 0.0, "step": 4257 }, { "epoch": 0.7161718947102851, "grad_norm": NaN, "learning_rate": 1.918399476405378e-05, "loss": 0.0, "step": 4258 }, { "epoch": 0.7163400891430494, "grad_norm": NaN, "learning_rate": 1.9162835907396314e-05, "loss": 0.0, "step": 4259 }, { "epoch": 0.7165082835758136, "grad_norm": NaN, "learning_rate": 1.914168595925016e-05, "loss": 0.0, "step": 4260 }, { "epoch": 0.7166764780085779, "grad_norm": NaN, "learning_rate": 1.9120544925725298e-05, "loss": 0.0, "step": 4261 }, { "epoch": 0.7168446724413422, "grad_norm": NaN, "learning_rate": 1.909941281292912e-05, "loss": 0.0, "step": 4262 }, { "epoch": 0.7170128668741065, "grad_norm": NaN, "learning_rate": 1.907828962696649e-05, "loss": 0.0, "step": 4263 }, { "epoch": 0.7171810613068708, "grad_norm": NaN, "learning_rate": 1.9057175373939645e-05, "loss": 0.0, "step": 4264 }, { "epoch": 0.717349255739635, "grad_norm": NaN, "learning_rate": 1.9036070059948252e-05, "loss": 0.0, "step": 4265 }, { "epoch": 0.7175174501723993, "grad_norm": NaN, "learning_rate": 1.9014973691089387e-05, "loss": 0.0, "step": 4266 }, { "epoch": 0.7176856446051636, "grad_norm": NaN, "learning_rate": 1.899388627345759e-05, "loss": 0.0, "step": 4267 }, { "epoch": 0.7178538390379279, "grad_norm": NaN, "learning_rate": 1.897280781314476e-05, "loss": 0.0, "step": 4268 }, { "epoch": 0.7180220334706922, "grad_norm": NaN, "learning_rate": 1.8951738316240226e-05, "loss": 0.0, "step": 4269 }, { "epoch": 0.7181902279034564, "grad_norm": NaN, "learning_rate": 1.893067778883073e-05, "loss": 0.0, "step": 4270 }, { "epoch": 0.7183584223362207, "grad_norm": NaN, "learning_rate": 1.8909626237000432e-05, "loss": 0.0, "step": 4271 }, { "epoch": 0.718526616768985, "grad_norm": NaN, "learning_rate": 1.8888583666830876e-05, "loss": 0.0, "step": 4272 }, { "epoch": 0.7186948112017493, "grad_norm": NaN, "learning_rate": 1.886755008440103e-05, "loss": 0.0, "step": 4273 }, { "epoch": 0.7188630056345136, "grad_norm": NaN, "learning_rate": 1.8846525495787247e-05, "loss": 0.0, "step": 4274 }, { "epoch": 0.7190312000672777, "grad_norm": NaN, "learning_rate": 1.8825509907063327e-05, "loss": 0.0, "step": 4275 }, { "epoch": 0.719199394500042, "grad_norm": NaN, "learning_rate": 1.8804503324300422e-05, "loss": 0.0, "step": 4276 }, { "epoch": 0.7193675889328063, "grad_norm": NaN, "learning_rate": 1.8783505753567094e-05, "loss": 0.0, "step": 4277 }, { "epoch": 0.7195357833655706, "grad_norm": NaN, "learning_rate": 1.8762517200929292e-05, "loss": 0.0, "step": 4278 }, { "epoch": 0.7197039777983348, "grad_norm": NaN, "learning_rate": 1.8741537672450405e-05, "loss": 0.0, "step": 4279 }, { "epoch": 0.7198721722310991, "grad_norm": NaN, "learning_rate": 1.872056717419117e-05, "loss": 0.0, "step": 4280 }, { "epoch": 0.7200403666638634, "grad_norm": NaN, "learning_rate": 1.869960571220974e-05, "loss": 0.0, "step": 4281 }, { "epoch": 0.7202085610966277, "grad_norm": NaN, "learning_rate": 1.8678653292561605e-05, "loss": 0.0, "step": 4282 }, { "epoch": 0.720376755529392, "grad_norm": NaN, "learning_rate": 1.8657709921299736e-05, "loss": 0.0, "step": 4283 }, { "epoch": 0.7205449499621562, "grad_norm": NaN, "learning_rate": 1.8636775604474416e-05, "loss": 0.0, "step": 4284 }, { "epoch": 0.7207131443949205, "grad_norm": NaN, "learning_rate": 1.861585034813334e-05, "loss": 0.0, "step": 4285 }, { "epoch": 0.7208813388276848, "grad_norm": NaN, "learning_rate": 1.859493415832157e-05, "loss": 0.0, "step": 4286 }, { "epoch": 0.7210495332604491, "grad_norm": NaN, "learning_rate": 1.857402704108157e-05, "loss": 0.0, "step": 4287 }, { "epoch": 0.7212177276932134, "grad_norm": NaN, "learning_rate": 1.8553129002453163e-05, "loss": 0.0, "step": 4288 }, { "epoch": 0.7213859221259776, "grad_norm": NaN, "learning_rate": 1.8532240048473566e-05, "loss": 0.0, "step": 4289 }, { "epoch": 0.7215541165587419, "grad_norm": NaN, "learning_rate": 1.851136018517734e-05, "loss": 0.0, "step": 4290 }, { "epoch": 0.7217223109915062, "grad_norm": NaN, "learning_rate": 1.8490489418596478e-05, "loss": 0.0, "step": 4291 }, { "epoch": 0.7218905054242705, "grad_norm": NaN, "learning_rate": 1.84696277547603e-05, "loss": 0.0, "step": 4292 }, { "epoch": 0.7220586998570347, "grad_norm": NaN, "learning_rate": 1.8448775199695502e-05, "loss": 0.0, "step": 4293 }, { "epoch": 0.722226894289799, "grad_norm": NaN, "learning_rate": 1.8427931759426137e-05, "loss": 0.0, "step": 4294 }, { "epoch": 0.7223950887225633, "grad_norm": NaN, "learning_rate": 1.8407097439973674e-05, "loss": 0.0, "step": 4295 }, { "epoch": 0.7225632831553276, "grad_norm": NaN, "learning_rate": 1.8386272247356904e-05, "loss": 0.0, "step": 4296 }, { "epoch": 0.7227314775880919, "grad_norm": NaN, "learning_rate": 1.836545618759199e-05, "loss": 0.0, "step": 4297 }, { "epoch": 0.7228996720208561, "grad_norm": NaN, "learning_rate": 1.8344649266692455e-05, "loss": 0.0, "step": 4298 }, { "epoch": 0.7230678664536204, "grad_norm": NaN, "learning_rate": 1.8323851490669163e-05, "loss": 0.0, "step": 4299 }, { "epoch": 0.7232360608863847, "grad_norm": NaN, "learning_rate": 1.8303062865530406e-05, "loss": 0.0, "step": 4300 }, { "epoch": 0.723404255319149, "grad_norm": NaN, "learning_rate": 1.828228339728176e-05, "loss": 0.0, "step": 4301 }, { "epoch": 0.7235724497519133, "grad_norm": NaN, "learning_rate": 1.826151309192618e-05, "loss": 0.0, "step": 4302 }, { "epoch": 0.7237406441846775, "grad_norm": NaN, "learning_rate": 1.8240751955463958e-05, "loss": 0.0, "step": 4303 }, { "epoch": 0.7239088386174417, "grad_norm": NaN, "learning_rate": 1.821999999389279e-05, "loss": 0.0, "step": 4304 }, { "epoch": 0.724077033050206, "grad_norm": NaN, "learning_rate": 1.8199257213207672e-05, "loss": 0.0, "step": 4305 }, { "epoch": 0.7242452274829703, "grad_norm": NaN, "learning_rate": 1.817852361940095e-05, "loss": 0.0, "step": 4306 }, { "epoch": 0.7244134219157345, "grad_norm": NaN, "learning_rate": 1.8157799218462335e-05, "loss": 0.0, "step": 4307 }, { "epoch": 0.7245816163484988, "grad_norm": NaN, "learning_rate": 1.8137084016378875e-05, "loss": 0.0, "step": 4308 }, { "epoch": 0.7247498107812631, "grad_norm": NaN, "learning_rate": 1.8116378019134957e-05, "loss": 0.0, "step": 4309 }, { "epoch": 0.7249180052140274, "grad_norm": NaN, "learning_rate": 1.809568123271231e-05, "loss": 0.0, "step": 4310 }, { "epoch": 0.7250861996467917, "grad_norm": NaN, "learning_rate": 1.8074993663089988e-05, "loss": 0.0, "step": 4311 }, { "epoch": 0.7252543940795559, "grad_norm": NaN, "learning_rate": 1.8054315316244442e-05, "loss": 0.0, "step": 4312 }, { "epoch": 0.7254225885123202, "grad_norm": NaN, "learning_rate": 1.8033646198149377e-05, "loss": 0.0, "step": 4313 }, { "epoch": 0.7255907829450845, "grad_norm": NaN, "learning_rate": 1.801298631477589e-05, "loss": 0.0, "step": 4314 }, { "epoch": 0.7257589773778488, "grad_norm": NaN, "learning_rate": 1.7992335672092362e-05, "loss": 0.0, "step": 4315 }, { "epoch": 0.7259271718106131, "grad_norm": NaN, "learning_rate": 1.7971694276064564e-05, "loss": 0.0, "step": 4316 }, { "epoch": 0.7260953662433773, "grad_norm": NaN, "learning_rate": 1.7951062132655562e-05, "loss": 0.0, "step": 4317 }, { "epoch": 0.7262635606761416, "grad_norm": NaN, "learning_rate": 1.7930439247825737e-05, "loss": 0.0, "step": 4318 }, { "epoch": 0.7264317551089059, "grad_norm": NaN, "learning_rate": 1.7909825627532796e-05, "loss": 0.0, "step": 4319 }, { "epoch": 0.7265999495416702, "grad_norm": NaN, "learning_rate": 1.7889221277731816e-05, "loss": 0.0, "step": 4320 }, { "epoch": 0.7267681439744345, "grad_norm": NaN, "learning_rate": 1.786862620437515e-05, "loss": 0.0, "step": 4321 }, { "epoch": 0.7269363384071987, "grad_norm": NaN, "learning_rate": 1.784804041341248e-05, "loss": 0.0, "step": 4322 }, { "epoch": 0.727104532839963, "grad_norm": NaN, "learning_rate": 1.782746391079081e-05, "loss": 0.0, "step": 4323 }, { "epoch": 0.7272727272727273, "grad_norm": NaN, "learning_rate": 1.780689670245447e-05, "loss": 0.0, "step": 4324 }, { "epoch": 0.7274409217054916, "grad_norm": NaN, "learning_rate": 1.7786338794345082e-05, "loss": 0.0, "step": 4325 }, { "epoch": 0.7276091161382559, "grad_norm": NaN, "learning_rate": 1.7765790192401604e-05, "loss": 0.0, "step": 4326 }, { "epoch": 0.7277773105710201, "grad_norm": NaN, "learning_rate": 1.7745250902560278e-05, "loss": 0.0, "step": 4327 }, { "epoch": 0.7279455050037844, "grad_norm": NaN, "learning_rate": 1.772472093075471e-05, "loss": 0.0, "step": 4328 }, { "epoch": 0.7281136994365487, "grad_norm": NaN, "learning_rate": 1.7704200282915766e-05, "loss": 0.0, "step": 4329 }, { "epoch": 0.728281893869313, "grad_norm": NaN, "learning_rate": 1.7683688964971625e-05, "loss": 0.0, "step": 4330 }, { "epoch": 0.7284500883020772, "grad_norm": NaN, "learning_rate": 1.7663186982847757e-05, "loss": 0.0, "step": 4331 }, { "epoch": 0.7286182827348415, "grad_norm": NaN, "learning_rate": 1.7642694342466998e-05, "loss": 0.0, "step": 4332 }, { "epoch": 0.7287864771676058, "grad_norm": NaN, "learning_rate": 1.7622211049749422e-05, "loss": 0.0, "step": 4333 }, { "epoch": 0.72895467160037, "grad_norm": NaN, "learning_rate": 1.7601737110612416e-05, "loss": 0.0, "step": 4334 }, { "epoch": 0.7291228660331343, "grad_norm": NaN, "learning_rate": 1.7581272530970667e-05, "loss": 0.0, "step": 4335 }, { "epoch": 0.7292910604658985, "grad_norm": NaN, "learning_rate": 1.7560817316736183e-05, "loss": 0.0, "step": 4336 }, { "epoch": 0.7294592548986628, "grad_norm": NaN, "learning_rate": 1.754037147381824e-05, "loss": 0.0, "step": 4337 }, { "epoch": 0.7296274493314271, "grad_norm": NaN, "learning_rate": 1.7519935008123412e-05, "loss": 0.0, "step": 4338 }, { "epoch": 0.7297956437641914, "grad_norm": NaN, "learning_rate": 1.749950792555554e-05, "loss": 0.0, "step": 4339 }, { "epoch": 0.7299638381969556, "grad_norm": NaN, "learning_rate": 1.7479090232015822e-05, "loss": 0.0, "step": 4340 }, { "epoch": 0.7301320326297199, "grad_norm": NaN, "learning_rate": 1.7458681933402682e-05, "loss": 0.0, "step": 4341 }, { "epoch": 0.7303002270624842, "grad_norm": NaN, "learning_rate": 1.7438283035611846e-05, "loss": 0.0, "step": 4342 }, { "epoch": 0.7304684214952485, "grad_norm": NaN, "learning_rate": 1.741789354453633e-05, "loss": 0.0, "step": 4343 }, { "epoch": 0.7306366159280128, "grad_norm": NaN, "learning_rate": 1.7397513466066428e-05, "loss": 0.0, "step": 4344 }, { "epoch": 0.730804810360777, "grad_norm": NaN, "learning_rate": 1.7377142806089717e-05, "loss": 0.0, "step": 4345 }, { "epoch": 0.7309730047935413, "grad_norm": NaN, "learning_rate": 1.7356781570491053e-05, "loss": 0.0, "step": 4346 }, { "epoch": 0.7311411992263056, "grad_norm": NaN, "learning_rate": 1.7336429765152568e-05, "loss": 0.0, "step": 4347 }, { "epoch": 0.7313093936590699, "grad_norm": NaN, "learning_rate": 1.7316087395953655e-05, "loss": 0.0, "step": 4348 }, { "epoch": 0.7314775880918342, "grad_norm": NaN, "learning_rate": 1.7295754468771024e-05, "loss": 0.0, "step": 4349 }, { "epoch": 0.7316457825245984, "grad_norm": NaN, "learning_rate": 1.7275430989478626e-05, "loss": 0.0, "step": 4350 }, { "epoch": 0.7318139769573627, "grad_norm": NaN, "learning_rate": 1.7255116963947676e-05, "loss": 0.0, "step": 4351 }, { "epoch": 0.731982171390127, "grad_norm": NaN, "learning_rate": 1.7234812398046653e-05, "loss": 0.0, "step": 4352 }, { "epoch": 0.7321503658228913, "grad_norm": NaN, "learning_rate": 1.7214517297641353e-05, "loss": 0.0, "step": 4353 }, { "epoch": 0.7323185602556556, "grad_norm": NaN, "learning_rate": 1.719423166859479e-05, "loss": 0.0, "step": 4354 }, { "epoch": 0.7324867546884198, "grad_norm": NaN, "learning_rate": 1.7173955516767255e-05, "loss": 0.0, "step": 4355 }, { "epoch": 0.7326549491211841, "grad_norm": NaN, "learning_rate": 1.7153688848016277e-05, "loss": 0.0, "step": 4356 }, { "epoch": 0.7328231435539484, "grad_norm": NaN, "learning_rate": 1.713343166819671e-05, "loss": 0.0, "step": 4357 }, { "epoch": 0.7329913379867127, "grad_norm": NaN, "learning_rate": 1.7113183983160602e-05, "loss": 0.0, "step": 4358 }, { "epoch": 0.733159532419477, "grad_norm": NaN, "learning_rate": 1.7092945798757294e-05, "loss": 0.0, "step": 4359 }, { "epoch": 0.7333277268522412, "grad_norm": NaN, "learning_rate": 1.707271712083335e-05, "loss": 0.0, "step": 4360 }, { "epoch": 0.7334959212850055, "grad_norm": NaN, "learning_rate": 1.7052497955232633e-05, "loss": 0.0, "step": 4361 }, { "epoch": 0.7336641157177698, "grad_norm": NaN, "learning_rate": 1.7032288307796225e-05, "loss": 0.0, "step": 4362 }, { "epoch": 0.733832310150534, "grad_norm": NaN, "learning_rate": 1.7012088184362467e-05, "loss": 0.0, "step": 4363 }, { "epoch": 0.7340005045832982, "grad_norm": NaN, "learning_rate": 1.699189759076695e-05, "loss": 0.0, "step": 4364 }, { "epoch": 0.7341686990160625, "grad_norm": NaN, "learning_rate": 1.6971716532842504e-05, "loss": 0.0, "step": 4365 }, { "epoch": 0.7343368934488268, "grad_norm": NaN, "learning_rate": 1.6951545016419207e-05, "loss": 0.0, "step": 4366 }, { "epoch": 0.7345050878815911, "grad_norm": NaN, "learning_rate": 1.69313830473244e-05, "loss": 0.0, "step": 4367 }, { "epoch": 0.7346732823143554, "grad_norm": NaN, "learning_rate": 1.6911230631382618e-05, "loss": 0.0, "step": 4368 }, { "epoch": 0.7348414767471196, "grad_norm": NaN, "learning_rate": 1.6891087774415714e-05, "loss": 0.0, "step": 4369 }, { "epoch": 0.7350096711798839, "grad_norm": NaN, "learning_rate": 1.6870954482242707e-05, "loss": 0.0, "step": 4370 }, { "epoch": 0.7351778656126482, "grad_norm": NaN, "learning_rate": 1.6850830760679887e-05, "loss": 0.0, "step": 4371 }, { "epoch": 0.7353460600454125, "grad_norm": NaN, "learning_rate": 1.683071661554075e-05, "loss": 0.0, "step": 4372 }, { "epoch": 0.7355142544781768, "grad_norm": NaN, "learning_rate": 1.6810612052636078e-05, "loss": 0.0, "step": 4373 }, { "epoch": 0.735682448910941, "grad_norm": NaN, "learning_rate": 1.6790517077773845e-05, "loss": 0.0, "step": 4374 }, { "epoch": 0.7358506433437053, "grad_norm": NaN, "learning_rate": 1.677043169675926e-05, "loss": 0.0, "step": 4375 }, { "epoch": 0.7360188377764696, "grad_norm": NaN, "learning_rate": 1.675035591539475e-05, "loss": 0.0, "step": 4376 }, { "epoch": 0.7361870322092339, "grad_norm": NaN, "learning_rate": 1.6730289739480015e-05, "loss": 0.0, "step": 4377 }, { "epoch": 0.7363552266419982, "grad_norm": NaN, "learning_rate": 1.671023317481193e-05, "loss": 0.0, "step": 4378 }, { "epoch": 0.7365234210747624, "grad_norm": NaN, "learning_rate": 1.6690186227184618e-05, "loss": 0.0, "step": 4379 }, { "epoch": 0.7366916155075267, "grad_norm": NaN, "learning_rate": 1.667014890238941e-05, "loss": 0.0, "step": 4380 }, { "epoch": 0.736859809940291, "grad_norm": NaN, "learning_rate": 1.665012120621487e-05, "loss": 0.0, "step": 4381 }, { "epoch": 0.7370280043730553, "grad_norm": NaN, "learning_rate": 1.6630103144446775e-05, "loss": 0.0, "step": 4382 }, { "epoch": 0.7371961988058195, "grad_norm": NaN, "learning_rate": 1.6610094722868114e-05, "loss": 0.0, "step": 4383 }, { "epoch": 0.7373643932385838, "grad_norm": NaN, "learning_rate": 1.659009594725908e-05, "loss": 0.0, "step": 4384 }, { "epoch": 0.7375325876713481, "grad_norm": NaN, "learning_rate": 1.6570106823397142e-05, "loss": 0.0, "step": 4385 }, { "epoch": 0.7377007821041124, "grad_norm": NaN, "learning_rate": 1.6550127357056893e-05, "loss": 0.0, "step": 4386 }, { "epoch": 0.7378689765368767, "grad_norm": NaN, "learning_rate": 1.65301575540102e-05, "loss": 0.0, "step": 4387 }, { "epoch": 0.7380371709696409, "grad_norm": NaN, "learning_rate": 1.6510197420026086e-05, "loss": 0.0, "step": 4388 }, { "epoch": 0.7382053654024052, "grad_norm": NaN, "learning_rate": 1.6490246960870847e-05, "loss": 0.0, "step": 4389 }, { "epoch": 0.7383735598351695, "grad_norm": NaN, "learning_rate": 1.647030618230793e-05, "loss": 0.0, "step": 4390 }, { "epoch": 0.7385417542679338, "grad_norm": NaN, "learning_rate": 1.6450375090098003e-05, "loss": 0.0, "step": 4391 }, { "epoch": 0.7387099487006981, "grad_norm": NaN, "learning_rate": 1.643045368999892e-05, "loss": 0.0, "step": 4392 }, { "epoch": 0.7388781431334622, "grad_norm": NaN, "learning_rate": 1.641054198776578e-05, "loss": 0.0, "step": 4393 }, { "epoch": 0.7390463375662265, "grad_norm": NaN, "learning_rate": 1.6390639989150834e-05, "loss": 0.0, "step": 4394 }, { "epoch": 0.7392145319989908, "grad_norm": NaN, "learning_rate": 1.6370747699903556e-05, "loss": 0.0, "step": 4395 }, { "epoch": 0.7393827264317551, "grad_norm": NaN, "learning_rate": 1.6350865125770576e-05, "loss": 0.0, "step": 4396 }, { "epoch": 0.7395509208645193, "grad_norm": NaN, "learning_rate": 1.6330992272495786e-05, "loss": 0.0, "step": 4397 }, { "epoch": 0.7397191152972836, "grad_norm": NaN, "learning_rate": 1.6311129145820218e-05, "loss": 0.0, "step": 4398 }, { "epoch": 0.7398873097300479, "grad_norm": NaN, "learning_rate": 1.62912757514821e-05, "loss": 0.0, "step": 4399 }, { "epoch": 0.7400555041628122, "grad_norm": NaN, "learning_rate": 1.627143209521686e-05, "loss": 0.0, "step": 4400 }, { "epoch": 0.7402236985955765, "grad_norm": NaN, "learning_rate": 1.62515981827571e-05, "loss": 0.0, "step": 4401 }, { "epoch": 0.7403918930283407, "grad_norm": NaN, "learning_rate": 1.623177401983263e-05, "loss": 0.0, "step": 4402 }, { "epoch": 0.740560087461105, "grad_norm": NaN, "learning_rate": 1.6211959612170414e-05, "loss": 0.0, "step": 4403 }, { "epoch": 0.7407282818938693, "grad_norm": NaN, "learning_rate": 1.6192154965494616e-05, "loss": 0.0, "step": 4404 }, { "epoch": 0.7408964763266336, "grad_norm": NaN, "learning_rate": 1.6172360085526565e-05, "loss": 0.0, "step": 4405 }, { "epoch": 0.7410646707593979, "grad_norm": NaN, "learning_rate": 1.6152574977984814e-05, "loss": 0.0, "step": 4406 }, { "epoch": 0.7412328651921621, "grad_norm": NaN, "learning_rate": 1.613279964858504e-05, "loss": 0.0, "step": 4407 }, { "epoch": 0.7414010596249264, "grad_norm": NaN, "learning_rate": 1.6113034103040107e-05, "loss": 0.0, "step": 4408 }, { "epoch": 0.7415692540576907, "grad_norm": NaN, "learning_rate": 1.6093278347060055e-05, "loss": 0.0, "step": 4409 }, { "epoch": 0.741737448490455, "grad_norm": NaN, "learning_rate": 1.607353238635212e-05, "loss": 0.0, "step": 4410 }, { "epoch": 0.7419056429232193, "grad_norm": NaN, "learning_rate": 1.6053796226620688e-05, "loss": 0.0, "step": 4411 }, { "epoch": 0.7420738373559835, "grad_norm": NaN, "learning_rate": 1.6034069873567303e-05, "loss": 0.0, "step": 4412 }, { "epoch": 0.7422420317887478, "grad_norm": NaN, "learning_rate": 1.6014353332890676e-05, "loss": 0.0, "step": 4413 }, { "epoch": 0.7424102262215121, "grad_norm": NaN, "learning_rate": 1.599464661028673e-05, "loss": 0.0, "step": 4414 }, { "epoch": 0.7425784206542764, "grad_norm": NaN, "learning_rate": 1.5974949711448488e-05, "loss": 0.0, "step": 4415 }, { "epoch": 0.7427466150870407, "grad_norm": NaN, "learning_rate": 1.5955262642066175e-05, "loss": 0.0, "step": 4416 }, { "epoch": 0.7429148095198049, "grad_norm": NaN, "learning_rate": 1.593558540782717e-05, "loss": 0.0, "step": 4417 }, { "epoch": 0.7430830039525692, "grad_norm": NaN, "learning_rate": 1.5915918014415986e-05, "loss": 0.0, "step": 4418 }, { "epoch": 0.7432511983853335, "grad_norm": NaN, "learning_rate": 1.5896260467514336e-05, "loss": 0.0, "step": 4419 }, { "epoch": 0.7434193928180978, "grad_norm": NaN, "learning_rate": 1.5876612772801042e-05, "loss": 0.0, "step": 4420 }, { "epoch": 0.743587587250862, "grad_norm": NaN, "learning_rate": 1.58569749359521e-05, "loss": 0.0, "step": 4421 }, { "epoch": 0.7437557816836263, "grad_norm": NaN, "learning_rate": 1.5837346962640697e-05, "loss": 0.0, "step": 4422 }, { "epoch": 0.7439239761163905, "grad_norm": NaN, "learning_rate": 1.5817728858537107e-05, "loss": 0.0, "step": 4423 }, { "epoch": 0.7440921705491548, "grad_norm": NaN, "learning_rate": 1.5798120629308788e-05, "loss": 0.0, "step": 4424 }, { "epoch": 0.744260364981919, "grad_norm": NaN, "learning_rate": 1.5778522280620323e-05, "loss": 0.0, "step": 4425 }, { "epoch": 0.7444285594146833, "grad_norm": NaN, "learning_rate": 1.575893381813348e-05, "loss": 0.0, "step": 4426 }, { "epoch": 0.7445967538474476, "grad_norm": NaN, "learning_rate": 1.573935524750714e-05, "loss": 0.0, "step": 4427 }, { "epoch": 0.7447649482802119, "grad_norm": NaN, "learning_rate": 1.5719786574397328e-05, "loss": 0.0, "step": 4428 }, { "epoch": 0.7449331427129762, "grad_norm": NaN, "learning_rate": 1.570022780445719e-05, "loss": 0.0, "step": 4429 }, { "epoch": 0.7451013371457404, "grad_norm": NaN, "learning_rate": 1.5680678943337084e-05, "loss": 0.0, "step": 4430 }, { "epoch": 0.7452695315785047, "grad_norm": NaN, "learning_rate": 1.5661139996684433e-05, "loss": 0.0, "step": 4431 }, { "epoch": 0.745437726011269, "grad_norm": NaN, "learning_rate": 1.5641610970143816e-05, "loss": 0.0, "step": 4432 }, { "epoch": 0.7456059204440333, "grad_norm": NaN, "learning_rate": 1.5622091869356937e-05, "loss": 0.0, "step": 4433 }, { "epoch": 0.7457741148767976, "grad_norm": NaN, "learning_rate": 1.560258269996268e-05, "loss": 0.0, "step": 4434 }, { "epoch": 0.7459423093095618, "grad_norm": NaN, "learning_rate": 1.558308346759701e-05, "loss": 0.0, "step": 4435 }, { "epoch": 0.7461105037423261, "grad_norm": NaN, "learning_rate": 1.556359417789303e-05, "loss": 0.0, "step": 4436 }, { "epoch": 0.7462786981750904, "grad_norm": NaN, "learning_rate": 1.5544114836480984e-05, "loss": 0.0, "step": 4437 }, { "epoch": 0.7464468926078547, "grad_norm": NaN, "learning_rate": 1.5524645448988235e-05, "loss": 0.0, "step": 4438 }, { "epoch": 0.746615087040619, "grad_norm": NaN, "learning_rate": 1.550518602103927e-05, "loss": 0.0, "step": 4439 }, { "epoch": 0.7467832814733832, "grad_norm": NaN, "learning_rate": 1.5485736558255697e-05, "loss": 0.0, "step": 4440 }, { "epoch": 0.7469514759061475, "grad_norm": NaN, "learning_rate": 1.5466297066256235e-05, "loss": 0.0, "step": 4441 }, { "epoch": 0.7471196703389118, "grad_norm": NaN, "learning_rate": 1.544686755065677e-05, "loss": 0.0, "step": 4442 }, { "epoch": 0.7472878647716761, "grad_norm": NaN, "learning_rate": 1.542744801707025e-05, "loss": 0.0, "step": 4443 }, { "epoch": 0.7474560592044404, "grad_norm": NaN, "learning_rate": 1.5408038471106762e-05, "loss": 0.0, "step": 4444 }, { "epoch": 0.7476242536372046, "grad_norm": NaN, "learning_rate": 1.5388638918373495e-05, "loss": 0.0, "step": 4445 }, { "epoch": 0.7477924480699689, "grad_norm": NaN, "learning_rate": 1.5369249364474787e-05, "loss": 0.0, "step": 4446 }, { "epoch": 0.7479606425027332, "grad_norm": NaN, "learning_rate": 1.534986981501205e-05, "loss": 0.0, "step": 4447 }, { "epoch": 0.7481288369354975, "grad_norm": NaN, "learning_rate": 1.5330500275583825e-05, "loss": 0.0, "step": 4448 }, { "epoch": 0.7482970313682618, "grad_norm": NaN, "learning_rate": 1.5311140751785745e-05, "loss": 0.0, "step": 4449 }, { "epoch": 0.748465225801026, "grad_norm": NaN, "learning_rate": 1.529179124921055e-05, "loss": 0.0, "step": 4450 }, { "epoch": 0.7486334202337903, "grad_norm": NaN, "learning_rate": 1.527245177344813e-05, "loss": 0.0, "step": 4451 }, { "epoch": 0.7488016146665545, "grad_norm": NaN, "learning_rate": 1.5253122330085417e-05, "loss": 0.0, "step": 4452 }, { "epoch": 0.7489698090993188, "grad_norm": NaN, "learning_rate": 1.5233802924706475e-05, "loss": 0.0, "step": 4453 }, { "epoch": 0.749138003532083, "grad_norm": NaN, "learning_rate": 1.521449356289245e-05, "loss": 0.0, "step": 4454 }, { "epoch": 0.7493061979648473, "grad_norm": NaN, "learning_rate": 1.5195194250221629e-05, "loss": 0.0, "step": 4455 }, { "epoch": 0.7494743923976116, "grad_norm": NaN, "learning_rate": 1.5175904992269352e-05, "loss": 0.0, "step": 4456 }, { "epoch": 0.7496425868303759, "grad_norm": NaN, "learning_rate": 1.5156625794608076e-05, "loss": 0.0, "step": 4457 }, { "epoch": 0.7498107812631402, "grad_norm": NaN, "learning_rate": 1.5137356662807333e-05, "loss": 0.0, "step": 4458 }, { "epoch": 0.7499789756959044, "grad_norm": NaN, "learning_rate": 1.5118097602433773e-05, "loss": 0.0, "step": 4459 }, { "epoch": 0.7501471701286687, "grad_norm": NaN, "learning_rate": 1.5098848619051109e-05, "loss": 0.0, "step": 4460 }, { "epoch": 0.750315364561433, "grad_norm": NaN, "learning_rate": 1.5079609718220166e-05, "loss": 0.0, "step": 4461 }, { "epoch": 0.7504835589941973, "grad_norm": NaN, "learning_rate": 1.5060380905498833e-05, "loss": 0.0, "step": 4462 }, { "epoch": 0.7506517534269616, "grad_norm": NaN, "learning_rate": 1.504116218644212e-05, "loss": 0.0, "step": 4463 }, { "epoch": 0.7508199478597258, "grad_norm": NaN, "learning_rate": 1.5021953566602093e-05, "loss": 0.0, "step": 4464 }, { "epoch": 0.7509881422924901, "grad_norm": NaN, "learning_rate": 1.5002755051527906e-05, "loss": 0.0, "step": 4465 }, { "epoch": 0.7511563367252544, "grad_norm": NaN, "learning_rate": 1.4983566646765773e-05, "loss": 0.0, "step": 4466 }, { "epoch": 0.7513245311580187, "grad_norm": NaN, "learning_rate": 1.4964388357859044e-05, "loss": 0.0, "step": 4467 }, { "epoch": 0.751492725590783, "grad_norm": NaN, "learning_rate": 1.4945220190348102e-05, "loss": 0.0, "step": 4468 }, { "epoch": 0.7516609200235472, "grad_norm": NaN, "learning_rate": 1.492606214977041e-05, "loss": 0.0, "step": 4469 }, { "epoch": 0.7518291144563115, "grad_norm": NaN, "learning_rate": 1.4906914241660492e-05, "loss": 0.0, "step": 4470 }, { "epoch": 0.7519973088890758, "grad_norm": NaN, "learning_rate": 1.4887776471550002e-05, "loss": 0.0, "step": 4471 }, { "epoch": 0.7521655033218401, "grad_norm": NaN, "learning_rate": 1.4868648844967615e-05, "loss": 0.0, "step": 4472 }, { "epoch": 0.7523336977546043, "grad_norm": NaN, "learning_rate": 1.484953136743908e-05, "loss": 0.0, "step": 4473 }, { "epoch": 0.7525018921873686, "grad_norm": NaN, "learning_rate": 1.4830424044487223e-05, "loss": 0.0, "step": 4474 }, { "epoch": 0.7526700866201329, "grad_norm": NaN, "learning_rate": 1.4811326881631937e-05, "loss": 0.0, "step": 4475 }, { "epoch": 0.7528382810528972, "grad_norm": NaN, "learning_rate": 1.4792239884390186e-05, "loss": 0.0, "step": 4476 }, { "epoch": 0.7530064754856615, "grad_norm": NaN, "learning_rate": 1.477316305827598e-05, "loss": 0.0, "step": 4477 }, { "epoch": 0.7531746699184257, "grad_norm": NaN, "learning_rate": 1.4754096408800383e-05, "loss": 0.0, "step": 4478 }, { "epoch": 0.75334286435119, "grad_norm": NaN, "learning_rate": 1.4735039941471573e-05, "loss": 0.0, "step": 4479 }, { "epoch": 0.7535110587839543, "grad_norm": NaN, "learning_rate": 1.4715993661794736e-05, "loss": 0.0, "step": 4480 }, { "epoch": 0.7536792532167186, "grad_norm": NaN, "learning_rate": 1.469695757527213e-05, "loss": 0.0, "step": 4481 }, { "epoch": 0.7538474476494827, "grad_norm": NaN, "learning_rate": 1.4677931687403046e-05, "loss": 0.0, "step": 4482 }, { "epoch": 0.754015642082247, "grad_norm": NaN, "learning_rate": 1.4658916003683882e-05, "loss": 0.0, "step": 4483 }, { "epoch": 0.7541838365150113, "grad_norm": NaN, "learning_rate": 1.4639910529608042e-05, "loss": 0.0, "step": 4484 }, { "epoch": 0.7543520309477756, "grad_norm": NaN, "learning_rate": 1.4620915270665992e-05, "loss": 0.0, "step": 4485 }, { "epoch": 0.7545202253805399, "grad_norm": NaN, "learning_rate": 1.460193023234524e-05, "loss": 0.0, "step": 4486 }, { "epoch": 0.7546884198133041, "grad_norm": NaN, "learning_rate": 1.4582955420130372e-05, "loss": 0.0, "step": 4487 }, { "epoch": 0.7548566142460684, "grad_norm": NaN, "learning_rate": 1.4563990839502995e-05, "loss": 0.0, "step": 4488 }, { "epoch": 0.7550248086788327, "grad_norm": NaN, "learning_rate": 1.454503649594176e-05, "loss": 0.0, "step": 4489 }, { "epoch": 0.755193003111597, "grad_norm": NaN, "learning_rate": 1.4526092394922347e-05, "loss": 0.0, "step": 4490 }, { "epoch": 0.7553611975443613, "grad_norm": NaN, "learning_rate": 1.450715854191752e-05, "loss": 0.0, "step": 4491 }, { "epoch": 0.7555293919771255, "grad_norm": NaN, "learning_rate": 1.4488234942397056e-05, "loss": 0.0, "step": 4492 }, { "epoch": 0.7556975864098898, "grad_norm": NaN, "learning_rate": 1.4469321601827757e-05, "loss": 0.0, "step": 4493 }, { "epoch": 0.7558657808426541, "grad_norm": NaN, "learning_rate": 1.4450418525673481e-05, "loss": 0.0, "step": 4494 }, { "epoch": 0.7560339752754184, "grad_norm": NaN, "learning_rate": 1.4431525719395117e-05, "loss": 0.0, "step": 4495 }, { "epoch": 0.7562021697081827, "grad_norm": NaN, "learning_rate": 1.4412643188450581e-05, "loss": 0.0, "step": 4496 }, { "epoch": 0.7563703641409469, "grad_norm": NaN, "learning_rate": 1.4393770938294825e-05, "loss": 0.0, "step": 4497 }, { "epoch": 0.7565385585737112, "grad_norm": NaN, "learning_rate": 1.4374908974379837e-05, "loss": 0.0, "step": 4498 }, { "epoch": 0.7567067530064755, "grad_norm": NaN, "learning_rate": 1.4356057302154608e-05, "loss": 0.0, "step": 4499 }, { "epoch": 0.7568749474392398, "grad_norm": NaN, "learning_rate": 1.43372159270652e-05, "loss": 0.0, "step": 4500 }, { "epoch": 0.757043141872004, "grad_norm": NaN, "learning_rate": 1.4318384854554667e-05, "loss": 0.0, "step": 4501 }, { "epoch": 0.7572113363047683, "grad_norm": NaN, "learning_rate": 1.4299564090063088e-05, "loss": 0.0, "step": 4502 }, { "epoch": 0.7573795307375326, "grad_norm": NaN, "learning_rate": 1.4280753639027566e-05, "loss": 0.0, "step": 4503 }, { "epoch": 0.7575477251702969, "grad_norm": NaN, "learning_rate": 1.4261953506882252e-05, "loss": 0.0, "step": 4504 }, { "epoch": 0.7577159196030612, "grad_norm": NaN, "learning_rate": 1.4243163699058282e-05, "loss": 0.0, "step": 4505 }, { "epoch": 0.7578841140358255, "grad_norm": NaN, "learning_rate": 1.4224384220983817e-05, "loss": 0.0, "step": 4506 }, { "epoch": 0.7580523084685897, "grad_norm": NaN, "learning_rate": 1.4205615078084028e-05, "loss": 0.0, "step": 4507 }, { "epoch": 0.758220502901354, "grad_norm": NaN, "learning_rate": 1.418685627578114e-05, "loss": 0.0, "step": 4508 }, { "epoch": 0.7583886973341183, "grad_norm": NaN, "learning_rate": 1.4168107819494342e-05, "loss": 0.0, "step": 4509 }, { "epoch": 0.7585568917668826, "grad_norm": NaN, "learning_rate": 1.4149369714639853e-05, "loss": 0.0, "step": 4510 }, { "epoch": 0.7587250861996467, "grad_norm": NaN, "learning_rate": 1.4130641966630903e-05, "loss": 0.0, "step": 4511 }, { "epoch": 0.758893280632411, "grad_norm": NaN, "learning_rate": 1.4111924580877733e-05, "loss": 0.0, "step": 4512 }, { "epoch": 0.7590614750651753, "grad_norm": NaN, "learning_rate": 1.4093217562787575e-05, "loss": 0.0, "step": 4513 }, { "epoch": 0.7592296694979396, "grad_norm": NaN, "learning_rate": 1.4074520917764688e-05, "loss": 0.0, "step": 4514 }, { "epoch": 0.7593978639307039, "grad_norm": NaN, "learning_rate": 1.4055834651210309e-05, "loss": 0.0, "step": 4515 }, { "epoch": 0.7595660583634681, "grad_norm": NaN, "learning_rate": 1.4037158768522712e-05, "loss": 0.0, "step": 4516 }, { "epoch": 0.7597342527962324, "grad_norm": NaN, "learning_rate": 1.4018493275097139e-05, "loss": 0.0, "step": 4517 }, { "epoch": 0.7599024472289967, "grad_norm": NaN, "learning_rate": 1.3999838176325847e-05, "loss": 0.0, "step": 4518 }, { "epoch": 0.760070641661761, "grad_norm": NaN, "learning_rate": 1.3981193477598059e-05, "loss": 0.0, "step": 4519 }, { "epoch": 0.7602388360945252, "grad_norm": NaN, "learning_rate": 1.3962559184300062e-05, "loss": 0.0, "step": 4520 }, { "epoch": 0.7604070305272895, "grad_norm": NaN, "learning_rate": 1.3943935301815076e-05, "loss": 0.0, "step": 4521 }, { "epoch": 0.7605752249600538, "grad_norm": NaN, "learning_rate": 1.3925321835523326e-05, "loss": 0.0, "step": 4522 }, { "epoch": 0.7607434193928181, "grad_norm": NaN, "learning_rate": 1.3906718790802026e-05, "loss": 0.0, "step": 4523 }, { "epoch": 0.7609116138255824, "grad_norm": NaN, "learning_rate": 1.3888126173025412e-05, "loss": 0.0, "step": 4524 }, { "epoch": 0.7610798082583466, "grad_norm": NaN, "learning_rate": 1.386954398756467e-05, "loss": 0.0, "step": 4525 }, { "epoch": 0.7612480026911109, "grad_norm": NaN, "learning_rate": 1.3850972239787985e-05, "loss": 0.0, "step": 4526 }, { "epoch": 0.7614161971238752, "grad_norm": NaN, "learning_rate": 1.3832410935060513e-05, "loss": 0.0, "step": 4527 }, { "epoch": 0.7615843915566395, "grad_norm": NaN, "learning_rate": 1.3813860078744439e-05, "loss": 0.0, "step": 4528 }, { "epoch": 0.7617525859894038, "grad_norm": NaN, "learning_rate": 1.379531967619888e-05, "loss": 0.0, "step": 4529 }, { "epoch": 0.761920780422168, "grad_norm": NaN, "learning_rate": 1.3776789732779954e-05, "loss": 0.0, "step": 4530 }, { "epoch": 0.7620889748549323, "grad_norm": NaN, "learning_rate": 1.3758270253840744e-05, "loss": 0.0, "step": 4531 }, { "epoch": 0.7622571692876966, "grad_norm": NaN, "learning_rate": 1.373976124473133e-05, "loss": 0.0, "step": 4532 }, { "epoch": 0.7624253637204609, "grad_norm": NaN, "learning_rate": 1.3721262710798754e-05, "loss": 0.0, "step": 4533 }, { "epoch": 0.7625935581532252, "grad_norm": NaN, "learning_rate": 1.370277465738704e-05, "loss": 0.0, "step": 4534 }, { "epoch": 0.7627617525859894, "grad_norm": NaN, "learning_rate": 1.3684297089837155e-05, "loss": 0.0, "step": 4535 }, { "epoch": 0.7629299470187537, "grad_norm": NaN, "learning_rate": 1.3665830013487102e-05, "loss": 0.0, "step": 4536 }, { "epoch": 0.763098141451518, "grad_norm": NaN, "learning_rate": 1.3647373433671784e-05, "loss": 0.0, "step": 4537 }, { "epoch": 0.7632663358842823, "grad_norm": NaN, "learning_rate": 1.3628927355723114e-05, "loss": 0.0, "step": 4538 }, { "epoch": 0.7634345303170466, "grad_norm": NaN, "learning_rate": 1.3610491784969937e-05, "loss": 0.0, "step": 4539 }, { "epoch": 0.7636027247498108, "grad_norm": NaN, "learning_rate": 1.3592066726738113e-05, "loss": 0.0, "step": 4540 }, { "epoch": 0.763770919182575, "grad_norm": NaN, "learning_rate": 1.357365218635041e-05, "loss": 0.0, "step": 4541 }, { "epoch": 0.7639391136153393, "grad_norm": NaN, "learning_rate": 1.35552481691266e-05, "loss": 0.0, "step": 4542 }, { "epoch": 0.7641073080481036, "grad_norm": NaN, "learning_rate": 1.3536854680383375e-05, "loss": 0.0, "step": 4543 }, { "epoch": 0.7642755024808678, "grad_norm": NaN, "learning_rate": 1.3518471725434434e-05, "loss": 0.0, "step": 4544 }, { "epoch": 0.7644436969136321, "grad_norm": NaN, "learning_rate": 1.3500099309590397e-05, "loss": 0.0, "step": 4545 }, { "epoch": 0.7646118913463964, "grad_norm": NaN, "learning_rate": 1.3481737438158847e-05, "loss": 0.0, "step": 4546 }, { "epoch": 0.7647800857791607, "grad_norm": NaN, "learning_rate": 1.3463386116444327e-05, "loss": 0.0, "step": 4547 }, { "epoch": 0.764948280211925, "grad_norm": NaN, "learning_rate": 1.3445045349748304e-05, "loss": 0.0, "step": 4548 }, { "epoch": 0.7651164746446892, "grad_norm": NaN, "learning_rate": 1.3426715143369256e-05, "loss": 0.0, "step": 4549 }, { "epoch": 0.7652846690774535, "grad_norm": NaN, "learning_rate": 1.340839550260256e-05, "loss": 0.0, "step": 4550 }, { "epoch": 0.7654528635102178, "grad_norm": NaN, "learning_rate": 1.3390086432740551e-05, "loss": 0.0, "step": 4551 }, { "epoch": 0.7656210579429821, "grad_norm": NaN, "learning_rate": 1.3371787939072522e-05, "loss": 0.0, "step": 4552 }, { "epoch": 0.7657892523757464, "grad_norm": NaN, "learning_rate": 1.335350002688469e-05, "loss": 0.0, "step": 4553 }, { "epoch": 0.7659574468085106, "grad_norm": NaN, "learning_rate": 1.3335222701460243e-05, "loss": 0.0, "step": 4554 }, { "epoch": 0.7661256412412749, "grad_norm": NaN, "learning_rate": 1.3316955968079287e-05, "loss": 0.0, "step": 4555 }, { "epoch": 0.7662938356740392, "grad_norm": NaN, "learning_rate": 1.3298699832018862e-05, "loss": 0.0, "step": 4556 }, { "epoch": 0.7664620301068035, "grad_norm": NaN, "learning_rate": 1.3280454298552997e-05, "loss": 0.0, "step": 4557 }, { "epoch": 0.7666302245395678, "grad_norm": NaN, "learning_rate": 1.32622193729526e-05, "loss": 0.0, "step": 4558 }, { "epoch": 0.766798418972332, "grad_norm": NaN, "learning_rate": 1.3243995060485537e-05, "loss": 0.0, "step": 4559 }, { "epoch": 0.7669666134050963, "grad_norm": NaN, "learning_rate": 1.3225781366416595e-05, "loss": 0.0, "step": 4560 }, { "epoch": 0.7671348078378606, "grad_norm": NaN, "learning_rate": 1.3207578296007539e-05, "loss": 0.0, "step": 4561 }, { "epoch": 0.7673030022706249, "grad_norm": NaN, "learning_rate": 1.3189385854517012e-05, "loss": 0.0, "step": 4562 }, { "epoch": 0.7674711967033891, "grad_norm": NaN, "learning_rate": 1.3171204047200614e-05, "loss": 0.0, "step": 4563 }, { "epoch": 0.7676393911361534, "grad_norm": NaN, "learning_rate": 1.3153032879310839e-05, "loss": 0.0, "step": 4564 }, { "epoch": 0.7678075855689177, "grad_norm": NaN, "learning_rate": 1.3134872356097173e-05, "loss": 0.0, "step": 4565 }, { "epoch": 0.767975780001682, "grad_norm": NaN, "learning_rate": 1.311672248280597e-05, "loss": 0.0, "step": 4566 }, { "epoch": 0.7681439744344463, "grad_norm": NaN, "learning_rate": 1.309858326468053e-05, "loss": 0.0, "step": 4567 }, { "epoch": 0.7683121688672105, "grad_norm": NaN, "learning_rate": 1.3080454706961059e-05, "loss": 0.0, "step": 4568 }, { "epoch": 0.7684803632999748, "grad_norm": NaN, "learning_rate": 1.3062336814884702e-05, "loss": 0.0, "step": 4569 }, { "epoch": 0.7686485577327391, "grad_norm": NaN, "learning_rate": 1.3044229593685514e-05, "loss": 0.0, "step": 4570 }, { "epoch": 0.7688167521655033, "grad_norm": NaN, "learning_rate": 1.3026133048594469e-05, "loss": 0.0, "step": 4571 }, { "epoch": 0.7689849465982675, "grad_norm": NaN, "learning_rate": 1.3008047184839445e-05, "loss": 0.0, "step": 4572 }, { "epoch": 0.7691531410310318, "grad_norm": NaN, "learning_rate": 1.2989972007645263e-05, "loss": 0.0, "step": 4573 }, { "epoch": 0.7693213354637961, "grad_norm": NaN, "learning_rate": 1.2971907522233633e-05, "loss": 0.0, "step": 4574 }, { "epoch": 0.7694895298965604, "grad_norm": NaN, "learning_rate": 1.2953853733823185e-05, "loss": 0.0, "step": 4575 }, { "epoch": 0.7696577243293247, "grad_norm": NaN, "learning_rate": 1.2935810647629438e-05, "loss": 0.0, "step": 4576 }, { "epoch": 0.769825918762089, "grad_norm": NaN, "learning_rate": 1.2917778268864861e-05, "loss": 0.0, "step": 4577 }, { "epoch": 0.7699941131948532, "grad_norm": NaN, "learning_rate": 1.2899756602738805e-05, "loss": 0.0, "step": 4578 }, { "epoch": 0.7701623076276175, "grad_norm": NaN, "learning_rate": 1.2881745654457512e-05, "loss": 0.0, "step": 4579 }, { "epoch": 0.7703305020603818, "grad_norm": NaN, "learning_rate": 1.2863745429224144e-05, "loss": 0.0, "step": 4580 }, { "epoch": 0.7704986964931461, "grad_norm": NaN, "learning_rate": 1.2845755932238778e-05, "loss": 0.0, "step": 4581 }, { "epoch": 0.7706668909259103, "grad_norm": NaN, "learning_rate": 1.282777716869838e-05, "loss": 0.0, "step": 4582 }, { "epoch": 0.7708350853586746, "grad_norm": NaN, "learning_rate": 1.2809809143796803e-05, "loss": 0.0, "step": 4583 }, { "epoch": 0.7710032797914389, "grad_norm": NaN, "learning_rate": 1.2791851862724791e-05, "loss": 0.0, "step": 4584 }, { "epoch": 0.7711714742242032, "grad_norm": NaN, "learning_rate": 1.2773905330670038e-05, "loss": 0.0, "step": 4585 }, { "epoch": 0.7713396686569675, "grad_norm": NaN, "learning_rate": 1.2755969552817083e-05, "loss": 0.0, "step": 4586 }, { "epoch": 0.7715078630897317, "grad_norm": NaN, "learning_rate": 1.2738044534347365e-05, "loss": 0.0, "step": 4587 }, { "epoch": 0.771676057522496, "grad_norm": NaN, "learning_rate": 1.2720130280439218e-05, "loss": 0.0, "step": 4588 }, { "epoch": 0.7718442519552603, "grad_norm": NaN, "learning_rate": 1.270222679626788e-05, "loss": 0.0, "step": 4589 }, { "epoch": 0.7720124463880246, "grad_norm": NaN, "learning_rate": 1.2684334087005462e-05, "loss": 0.0, "step": 4590 }, { "epoch": 0.7721806408207889, "grad_norm": NaN, "learning_rate": 1.2666452157820963e-05, "loss": 0.0, "step": 4591 }, { "epoch": 0.7723488352535531, "grad_norm": NaN, "learning_rate": 1.2648581013880267e-05, "loss": 0.0, "step": 4592 }, { "epoch": 0.7725170296863174, "grad_norm": NaN, "learning_rate": 1.2630720660346163e-05, "loss": 0.0, "step": 4593 }, { "epoch": 0.7726852241190817, "grad_norm": NaN, "learning_rate": 1.2612871102378304e-05, "loss": 0.0, "step": 4594 }, { "epoch": 0.772853418551846, "grad_norm": NaN, "learning_rate": 1.2595032345133229e-05, "loss": 0.0, "step": 4595 }, { "epoch": 0.7730216129846103, "grad_norm": NaN, "learning_rate": 1.2577204393764331e-05, "loss": 0.0, "step": 4596 }, { "epoch": 0.7731898074173745, "grad_norm": NaN, "learning_rate": 1.2559387253421934e-05, "loss": 0.0, "step": 4597 }, { "epoch": 0.7733580018501388, "grad_norm": NaN, "learning_rate": 1.2541580929253204e-05, "loss": 0.0, "step": 4598 }, { "epoch": 0.7735261962829031, "grad_norm": NaN, "learning_rate": 1.252378542640219e-05, "loss": 0.0, "step": 4599 }, { "epoch": 0.7736943907156673, "grad_norm": NaN, "learning_rate": 1.2506000750009806e-05, "loss": 0.0, "step": 4600 }, { "epoch": 0.7738625851484315, "grad_norm": NaN, "learning_rate": 1.2488226905213829e-05, "loss": 0.0, "step": 4601 }, { "epoch": 0.7740307795811958, "grad_norm": NaN, "learning_rate": 1.247046389714896e-05, "loss": 0.0, "step": 4602 }, { "epoch": 0.7741989740139601, "grad_norm": NaN, "learning_rate": 1.245271173094672e-05, "loss": 0.0, "step": 4603 }, { "epoch": 0.7743671684467244, "grad_norm": NaN, "learning_rate": 1.2434970411735502e-05, "loss": 0.0, "step": 4604 }, { "epoch": 0.7745353628794887, "grad_norm": NaN, "learning_rate": 1.2417239944640586e-05, "loss": 0.0, "step": 4605 }, { "epoch": 0.7747035573122529, "grad_norm": NaN, "learning_rate": 1.2399520334784092e-05, "loss": 0.0, "step": 4606 }, { "epoch": 0.7748717517450172, "grad_norm": NaN, "learning_rate": 1.2381811587285031e-05, "loss": 0.0, "step": 4607 }, { "epoch": 0.7750399461777815, "grad_norm": NaN, "learning_rate": 1.2364113707259251e-05, "loss": 0.0, "step": 4608 }, { "epoch": 0.7752081406105458, "grad_norm": NaN, "learning_rate": 1.2346426699819458e-05, "loss": 0.0, "step": 4609 }, { "epoch": 0.77537633504331, "grad_norm": NaN, "learning_rate": 1.2328750570075265e-05, "loss": 0.0, "step": 4610 }, { "epoch": 0.7755445294760743, "grad_norm": NaN, "learning_rate": 1.2311085323133087e-05, "loss": 0.0, "step": 4611 }, { "epoch": 0.7757127239088386, "grad_norm": NaN, "learning_rate": 1.229343096409622e-05, "loss": 0.0, "step": 4612 }, { "epoch": 0.7758809183416029, "grad_norm": NaN, "learning_rate": 1.2275787498064795e-05, "loss": 0.0, "step": 4613 }, { "epoch": 0.7760491127743672, "grad_norm": NaN, "learning_rate": 1.2258154930135846e-05, "loss": 0.0, "step": 4614 }, { "epoch": 0.7762173072071314, "grad_norm": NaN, "learning_rate": 1.2240533265403198e-05, "loss": 0.0, "step": 4615 }, { "epoch": 0.7763855016398957, "grad_norm": NaN, "learning_rate": 1.2222922508957563e-05, "loss": 0.0, "step": 4616 }, { "epoch": 0.77655369607266, "grad_norm": NaN, "learning_rate": 1.2205322665886471e-05, "loss": 0.0, "step": 4617 }, { "epoch": 0.7767218905054243, "grad_norm": NaN, "learning_rate": 1.2187733741274354e-05, "loss": 0.0, "step": 4618 }, { "epoch": 0.7768900849381886, "grad_norm": NaN, "learning_rate": 1.2170155740202432e-05, "loss": 0.0, "step": 4619 }, { "epoch": 0.7770582793709528, "grad_norm": NaN, "learning_rate": 1.2152588667748805e-05, "loss": 0.0, "step": 4620 }, { "epoch": 0.7772264738037171, "grad_norm": NaN, "learning_rate": 1.2135032528988377e-05, "loss": 0.0, "step": 4621 }, { "epoch": 0.7773946682364814, "grad_norm": NaN, "learning_rate": 1.2117487328992955e-05, "loss": 0.0, "step": 4622 }, { "epoch": 0.7775628626692457, "grad_norm": NaN, "learning_rate": 1.2099953072831138e-05, "loss": 0.0, "step": 4623 }, { "epoch": 0.77773105710201, "grad_norm": NaN, "learning_rate": 1.208242976556837e-05, "loss": 0.0, "step": 4624 }, { "epoch": 0.7778992515347742, "grad_norm": NaN, "learning_rate": 1.2064917412266946e-05, "loss": 0.0, "step": 4625 }, { "epoch": 0.7780674459675385, "grad_norm": NaN, "learning_rate": 1.204741601798598e-05, "loss": 0.0, "step": 4626 }, { "epoch": 0.7782356404003028, "grad_norm": NaN, "learning_rate": 1.2029925587781432e-05, "loss": 0.0, "step": 4627 }, { "epoch": 0.7784038348330671, "grad_norm": NaN, "learning_rate": 1.2012446126706094e-05, "loss": 0.0, "step": 4628 }, { "epoch": 0.7785720292658314, "grad_norm": NaN, "learning_rate": 1.1994977639809574e-05, "loss": 0.0, "step": 4629 }, { "epoch": 0.7787402236985955, "grad_norm": NaN, "learning_rate": 1.1977520132138342e-05, "loss": 0.0, "step": 4630 }, { "epoch": 0.7789084181313598, "grad_norm": NaN, "learning_rate": 1.1960073608735673e-05, "loss": 0.0, "step": 4631 }, { "epoch": 0.7790766125641241, "grad_norm": NaN, "learning_rate": 1.1942638074641665e-05, "loss": 0.0, "step": 4632 }, { "epoch": 0.7792448069968884, "grad_norm": NaN, "learning_rate": 1.1925213534893232e-05, "loss": 0.0, "step": 4633 }, { "epoch": 0.7794130014296526, "grad_norm": NaN, "learning_rate": 1.1907799994524166e-05, "loss": 0.0, "step": 4634 }, { "epoch": 0.7795811958624169, "grad_norm": NaN, "learning_rate": 1.1890397458565022e-05, "loss": 0.0, "step": 4635 }, { "epoch": 0.7797493902951812, "grad_norm": NaN, "learning_rate": 1.1873005932043202e-05, "loss": 0.0, "step": 4636 }, { "epoch": 0.7799175847279455, "grad_norm": NaN, "learning_rate": 1.1855625419982907e-05, "loss": 0.0, "step": 4637 }, { "epoch": 0.7800857791607098, "grad_norm": NaN, "learning_rate": 1.18382559274052e-05, "loss": 0.0, "step": 4638 }, { "epoch": 0.780253973593474, "grad_norm": NaN, "learning_rate": 1.1820897459327918e-05, "loss": 0.0, "step": 4639 }, { "epoch": 0.7804221680262383, "grad_norm": NaN, "learning_rate": 1.1803550020765736e-05, "loss": 0.0, "step": 4640 }, { "epoch": 0.7805903624590026, "grad_norm": NaN, "learning_rate": 1.1786213616730108e-05, "loss": 0.0, "step": 4641 }, { "epoch": 0.7807585568917669, "grad_norm": NaN, "learning_rate": 1.176888825222936e-05, "loss": 0.0, "step": 4642 }, { "epoch": 0.7809267513245312, "grad_norm": NaN, "learning_rate": 1.175157393226859e-05, "loss": 0.0, "step": 4643 }, { "epoch": 0.7810949457572954, "grad_norm": NaN, "learning_rate": 1.1734270661849695e-05, "loss": 0.0, "step": 4644 }, { "epoch": 0.7812631401900597, "grad_norm": NaN, "learning_rate": 1.171697844597141e-05, "loss": 0.0, "step": 4645 }, { "epoch": 0.781431334622824, "grad_norm": NaN, "learning_rate": 1.1699697289629258e-05, "loss": 0.0, "step": 4646 }, { "epoch": 0.7815995290555883, "grad_norm": NaN, "learning_rate": 1.168242719781557e-05, "loss": 0.0, "step": 4647 }, { "epoch": 0.7817677234883526, "grad_norm": NaN, "learning_rate": 1.1665168175519486e-05, "loss": 0.0, "step": 4648 }, { "epoch": 0.7819359179211168, "grad_norm": NaN, "learning_rate": 1.1647920227726938e-05, "loss": 0.0, "step": 4649 }, { "epoch": 0.7821041123538811, "grad_norm": NaN, "learning_rate": 1.1630683359420652e-05, "loss": 0.0, "step": 4650 }, { "epoch": 0.7822723067866454, "grad_norm": NaN, "learning_rate": 1.16134575755802e-05, "loss": 0.0, "step": 4651 }, { "epoch": 0.7824405012194097, "grad_norm": NaN, "learning_rate": 1.1596242881181901e-05, "loss": 0.0, "step": 4652 }, { "epoch": 0.782608695652174, "grad_norm": NaN, "learning_rate": 1.157903928119889e-05, "loss": 0.0, "step": 4653 }, { "epoch": 0.7827768900849382, "grad_norm": NaN, "learning_rate": 1.156184678060107e-05, "loss": 0.0, "step": 4654 }, { "epoch": 0.7829450845177025, "grad_norm": NaN, "learning_rate": 1.1544665384355202e-05, "loss": 0.0, "step": 4655 }, { "epoch": 0.7831132789504668, "grad_norm": NaN, "learning_rate": 1.1527495097424778e-05, "loss": 0.0, "step": 4656 }, { "epoch": 0.7832814733832311, "grad_norm": NaN, "learning_rate": 1.1510335924770105e-05, "loss": 0.0, "step": 4657 }, { "epoch": 0.7834496678159953, "grad_norm": NaN, "learning_rate": 1.149318787134826e-05, "loss": 0.0, "step": 4658 }, { "epoch": 0.7836178622487595, "grad_norm": NaN, "learning_rate": 1.1476050942113148e-05, "loss": 0.0, "step": 4659 }, { "epoch": 0.7837860566815238, "grad_norm": NaN, "learning_rate": 1.1458925142015431e-05, "loss": 0.0, "step": 4660 }, { "epoch": 0.7839542511142881, "grad_norm": NaN, "learning_rate": 1.1441810476002552e-05, "loss": 0.0, "step": 4661 }, { "epoch": 0.7841224455470523, "grad_norm": NaN, "learning_rate": 1.1424706949018754e-05, "loss": 0.0, "step": 4662 }, { "epoch": 0.7842906399798166, "grad_norm": NaN, "learning_rate": 1.1407614566005048e-05, "loss": 0.0, "step": 4663 }, { "epoch": 0.7844588344125809, "grad_norm": NaN, "learning_rate": 1.1390533331899234e-05, "loss": 0.0, "step": 4664 }, { "epoch": 0.7846270288453452, "grad_norm": NaN, "learning_rate": 1.1373463251635892e-05, "loss": 0.0, "step": 4665 }, { "epoch": 0.7847952232781095, "grad_norm": NaN, "learning_rate": 1.135640433014636e-05, "loss": 0.0, "step": 4666 }, { "epoch": 0.7849634177108737, "grad_norm": NaN, "learning_rate": 1.1339356572358795e-05, "loss": 0.0, "step": 4667 }, { "epoch": 0.785131612143638, "grad_norm": NaN, "learning_rate": 1.1322319983198093e-05, "loss": 0.0, "step": 4668 }, { "epoch": 0.7852998065764023, "grad_norm": NaN, "learning_rate": 1.1305294567585933e-05, "loss": 0.0, "step": 4669 }, { "epoch": 0.7854680010091666, "grad_norm": NaN, "learning_rate": 1.1288280330440753e-05, "loss": 0.0, "step": 4670 }, { "epoch": 0.7856361954419309, "grad_norm": NaN, "learning_rate": 1.1271277276677805e-05, "loss": 0.0, "step": 4671 }, { "epoch": 0.7858043898746951, "grad_norm": NaN, "learning_rate": 1.1254285411209065e-05, "loss": 0.0, "step": 4672 }, { "epoch": 0.7859725843074594, "grad_norm": NaN, "learning_rate": 1.1237304738943294e-05, "loss": 0.0, "step": 4673 }, { "epoch": 0.7861407787402237, "grad_norm": NaN, "learning_rate": 1.1220335264785997e-05, "loss": 0.0, "step": 4674 }, { "epoch": 0.786308973172988, "grad_norm": NaN, "learning_rate": 1.1203376993639508e-05, "loss": 0.0, "step": 4675 }, { "epoch": 0.7864771676057523, "grad_norm": NaN, "learning_rate": 1.1186429930402852e-05, "loss": 0.0, "step": 4676 }, { "epoch": 0.7866453620385165, "grad_norm": NaN, "learning_rate": 1.1169494079971854e-05, "loss": 0.0, "step": 4677 }, { "epoch": 0.7868135564712808, "grad_norm": NaN, "learning_rate": 1.1152569447239075e-05, "loss": 0.0, "step": 4678 }, { "epoch": 0.7869817509040451, "grad_norm": NaN, "learning_rate": 1.1135656037093877e-05, "loss": 0.0, "step": 4679 }, { "epoch": 0.7871499453368094, "grad_norm": NaN, "learning_rate": 1.1118753854422353e-05, "loss": 0.0, "step": 4680 }, { "epoch": 0.7873181397695737, "grad_norm": NaN, "learning_rate": 1.110186290410734e-05, "loss": 0.0, "step": 4681 }, { "epoch": 0.7874863342023379, "grad_norm": NaN, "learning_rate": 1.1084983191028452e-05, "loss": 0.0, "step": 4682 }, { "epoch": 0.7876545286351022, "grad_norm": NaN, "learning_rate": 1.1068114720062044e-05, "loss": 0.0, "step": 4683 }, { "epoch": 0.7878227230678665, "grad_norm": NaN, "learning_rate": 1.1051257496081235e-05, "loss": 0.0, "step": 4684 }, { "epoch": 0.7879909175006308, "grad_norm": NaN, "learning_rate": 1.103441152395588e-05, "loss": 0.0, "step": 4685 }, { "epoch": 0.788159111933395, "grad_norm": NaN, "learning_rate": 1.1017576808552581e-05, "loss": 0.0, "step": 4686 }, { "epoch": 0.7883273063661593, "grad_norm": NaN, "learning_rate": 1.1000753354734733e-05, "loss": 0.0, "step": 4687 }, { "epoch": 0.7884955007989236, "grad_norm": NaN, "learning_rate": 1.0983941167362422e-05, "loss": 0.0, "step": 4688 }, { "epoch": 0.7886636952316878, "grad_norm": NaN, "learning_rate": 1.0967140251292507e-05, "loss": 0.0, "step": 4689 }, { "epoch": 0.7888318896644521, "grad_norm": NaN, "learning_rate": 1.0950350611378562e-05, "loss": 0.0, "step": 4690 }, { "epoch": 0.7890000840972163, "grad_norm": NaN, "learning_rate": 1.0933572252470958e-05, "loss": 0.0, "step": 4691 }, { "epoch": 0.7891682785299806, "grad_norm": NaN, "learning_rate": 1.0916805179416761e-05, "loss": 0.0, "step": 4692 }, { "epoch": 0.7893364729627449, "grad_norm": NaN, "learning_rate": 1.0900049397059792e-05, "loss": 0.0, "step": 4693 }, { "epoch": 0.7895046673955092, "grad_norm": NaN, "learning_rate": 1.0883304910240594e-05, "loss": 0.0, "step": 4694 }, { "epoch": 0.7896728618282735, "grad_norm": NaN, "learning_rate": 1.0866571723796487e-05, "loss": 0.0, "step": 4695 }, { "epoch": 0.7898410562610377, "grad_norm": NaN, "learning_rate": 1.0849849842561494e-05, "loss": 0.0, "step": 4696 }, { "epoch": 0.790009250693802, "grad_norm": NaN, "learning_rate": 1.0833139271366371e-05, "loss": 0.0, "step": 4697 }, { "epoch": 0.7901774451265663, "grad_norm": NaN, "learning_rate": 1.081644001503862e-05, "loss": 0.0, "step": 4698 }, { "epoch": 0.7903456395593306, "grad_norm": NaN, "learning_rate": 1.0799752078402469e-05, "loss": 0.0, "step": 4699 }, { "epoch": 0.7905138339920948, "grad_norm": NaN, "learning_rate": 1.078307546627887e-05, "loss": 0.0, "step": 4700 }, { "epoch": 0.7906820284248591, "grad_norm": NaN, "learning_rate": 1.076641018348552e-05, "loss": 0.0, "step": 4701 }, { "epoch": 0.7908502228576234, "grad_norm": NaN, "learning_rate": 1.0749756234836821e-05, "loss": 0.0, "step": 4702 }, { "epoch": 0.7910184172903877, "grad_norm": NaN, "learning_rate": 1.073311362514391e-05, "loss": 0.0, "step": 4703 }, { "epoch": 0.791186611723152, "grad_norm": NaN, "learning_rate": 1.0716482359214664e-05, "loss": 0.0, "step": 4704 }, { "epoch": 0.7913548061559162, "grad_norm": NaN, "learning_rate": 1.069986244185367e-05, "loss": 0.0, "step": 4705 }, { "epoch": 0.7915230005886805, "grad_norm": NaN, "learning_rate": 1.0683253877862225e-05, "loss": 0.0, "step": 4706 }, { "epoch": 0.7916911950214448, "grad_norm": NaN, "learning_rate": 1.0666656672038355e-05, "loss": 0.0, "step": 4707 }, { "epoch": 0.7918593894542091, "grad_norm": NaN, "learning_rate": 1.0650070829176823e-05, "loss": 0.0, "step": 4708 }, { "epoch": 0.7920275838869734, "grad_norm": NaN, "learning_rate": 1.0633496354069084e-05, "loss": 0.0, "step": 4709 }, { "epoch": 0.7921957783197376, "grad_norm": NaN, "learning_rate": 1.061693325150332e-05, "loss": 0.0, "step": 4710 }, { "epoch": 0.7923639727525019, "grad_norm": NaN, "learning_rate": 1.0600381526264408e-05, "loss": 0.0, "step": 4711 }, { "epoch": 0.7925321671852662, "grad_norm": NaN, "learning_rate": 1.0583841183133986e-05, "loss": 0.0, "step": 4712 }, { "epoch": 0.7927003616180305, "grad_norm": NaN, "learning_rate": 1.0567312226890364e-05, "loss": 0.0, "step": 4713 }, { "epoch": 0.7928685560507948, "grad_norm": NaN, "learning_rate": 1.0550794662308566e-05, "loss": 0.0, "step": 4714 }, { "epoch": 0.793036750483559, "grad_norm": NaN, "learning_rate": 1.0534288494160316e-05, "loss": 0.0, "step": 4715 }, { "epoch": 0.7932049449163233, "grad_norm": NaN, "learning_rate": 1.0517793727214098e-05, "loss": 0.0, "step": 4716 }, { "epoch": 0.7933731393490876, "grad_norm": NaN, "learning_rate": 1.0501310366235045e-05, "loss": 0.0, "step": 4717 }, { "epoch": 0.7935413337818518, "grad_norm": NaN, "learning_rate": 1.0484838415985015e-05, "loss": 0.0, "step": 4718 }, { "epoch": 0.793709528214616, "grad_norm": NaN, "learning_rate": 1.0468377881222568e-05, "loss": 0.0, "step": 4719 }, { "epoch": 0.7938777226473803, "grad_norm": NaN, "learning_rate": 1.0451928766702979e-05, "loss": 0.0, "step": 4720 }, { "epoch": 0.7940459170801446, "grad_norm": NaN, "learning_rate": 1.04354910771782e-05, "loss": 0.0, "step": 4721 }, { "epoch": 0.7942141115129089, "grad_norm": NaN, "learning_rate": 1.04190648173969e-05, "loss": 0.0, "step": 4722 }, { "epoch": 0.7943823059456732, "grad_norm": NaN, "learning_rate": 1.0402649992104434e-05, "loss": 0.0, "step": 4723 }, { "epoch": 0.7945505003784374, "grad_norm": NaN, "learning_rate": 1.0386246606042882e-05, "loss": 0.0, "step": 4724 }, { "epoch": 0.7947186948112017, "grad_norm": NaN, "learning_rate": 1.0369854663950983e-05, "loss": 0.0, "step": 4725 }, { "epoch": 0.794886889243966, "grad_norm": NaN, "learning_rate": 1.0353474170564188e-05, "loss": 0.0, "step": 4726 }, { "epoch": 0.7950550836767303, "grad_norm": NaN, "learning_rate": 1.0337105130614627e-05, "loss": 0.0, "step": 4727 }, { "epoch": 0.7952232781094946, "grad_norm": NaN, "learning_rate": 1.032074754883116e-05, "loss": 0.0, "step": 4728 }, { "epoch": 0.7953914725422588, "grad_norm": NaN, "learning_rate": 1.0304401429939293e-05, "loss": 0.0, "step": 4729 }, { "epoch": 0.7955596669750231, "grad_norm": NaN, "learning_rate": 1.0288066778661231e-05, "loss": 0.0, "step": 4730 }, { "epoch": 0.7957278614077874, "grad_norm": NaN, "learning_rate": 1.0271743599715865e-05, "loss": 0.0, "step": 4731 }, { "epoch": 0.7958960558405517, "grad_norm": NaN, "learning_rate": 1.0255431897818807e-05, "loss": 0.0, "step": 4732 }, { "epoch": 0.796064250273316, "grad_norm": NaN, "learning_rate": 1.0239131677682313e-05, "loss": 0.0, "step": 4733 }, { "epoch": 0.7962324447060802, "grad_norm": NaN, "learning_rate": 1.0222842944015326e-05, "loss": 0.0, "step": 4734 }, { "epoch": 0.7964006391388445, "grad_norm": NaN, "learning_rate": 1.0206565701523479e-05, "loss": 0.0, "step": 4735 }, { "epoch": 0.7965688335716088, "grad_norm": NaN, "learning_rate": 1.0190299954909093e-05, "loss": 0.0, "step": 4736 }, { "epoch": 0.7967370280043731, "grad_norm": NaN, "learning_rate": 1.0174045708871154e-05, "loss": 0.0, "step": 4737 }, { "epoch": 0.7969052224371374, "grad_norm": NaN, "learning_rate": 1.015780296810534e-05, "loss": 0.0, "step": 4738 }, { "epoch": 0.7970734168699016, "grad_norm": NaN, "learning_rate": 1.0141571737303968e-05, "loss": 0.0, "step": 4739 }, { "epoch": 0.7972416113026659, "grad_norm": NaN, "learning_rate": 1.0125352021156098e-05, "loss": 0.0, "step": 4740 }, { "epoch": 0.7974098057354302, "grad_norm": NaN, "learning_rate": 1.0109143824347411e-05, "loss": 0.0, "step": 4741 }, { "epoch": 0.7975780001681945, "grad_norm": NaN, "learning_rate": 1.0092947151560267e-05, "loss": 0.0, "step": 4742 }, { "epoch": 0.7977461946009587, "grad_norm": NaN, "learning_rate": 1.007676200747369e-05, "loss": 0.0, "step": 4743 }, { "epoch": 0.797914389033723, "grad_norm": NaN, "learning_rate": 1.0060588396763421e-05, "loss": 0.0, "step": 4744 }, { "epoch": 0.7980825834664873, "grad_norm": NaN, "learning_rate": 1.0044426324101813e-05, "loss": 0.0, "step": 4745 }, { "epoch": 0.7982507778992516, "grad_norm": NaN, "learning_rate": 1.0028275794157915e-05, "loss": 0.0, "step": 4746 }, { "epoch": 0.7984189723320159, "grad_norm": NaN, "learning_rate": 1.0012136811597428e-05, "loss": 0.0, "step": 4747 }, { "epoch": 0.79858716676478, "grad_norm": NaN, "learning_rate": 9.996009381082717e-06, "loss": 0.0, "step": 4748 }, { "epoch": 0.7987553611975443, "grad_norm": NaN, "learning_rate": 9.979893507272836e-06, "loss": 0.0, "step": 4749 }, { "epoch": 0.7989235556303086, "grad_norm": NaN, "learning_rate": 9.963789194823469e-06, "loss": 0.0, "step": 4750 }, { "epoch": 0.7990917500630729, "grad_norm": NaN, "learning_rate": 9.94769644838698e-06, "loss": 0.0, "step": 4751 }, { "epoch": 0.7992599444958371, "grad_norm": NaN, "learning_rate": 9.931615272612354e-06, "loss": 0.0, "step": 4752 }, { "epoch": 0.7994281389286014, "grad_norm": NaN, "learning_rate": 9.915545672145299e-06, "loss": 0.0, "step": 4753 }, { "epoch": 0.7995963333613657, "grad_norm": NaN, "learning_rate": 9.899487651628132e-06, "loss": 0.0, "step": 4754 }, { "epoch": 0.79976452779413, "grad_norm": NaN, "learning_rate": 9.883441215699823e-06, "loss": 0.0, "step": 4755 }, { "epoch": 0.7999327222268943, "grad_norm": NaN, "learning_rate": 9.867406368996023e-06, "loss": 0.0, "step": 4756 }, { "epoch": 0.8001009166596585, "grad_norm": NaN, "learning_rate": 9.85138311614901e-06, "loss": 0.0, "step": 4757 }, { "epoch": 0.8002691110924228, "grad_norm": NaN, "learning_rate": 9.835371461787724e-06, "loss": 0.0, "step": 4758 }, { "epoch": 0.8004373055251871, "grad_norm": NaN, "learning_rate": 9.819371410537747e-06, "loss": 0.0, "step": 4759 }, { "epoch": 0.8006054999579514, "grad_norm": NaN, "learning_rate": 9.803382967021318e-06, "loss": 0.0, "step": 4760 }, { "epoch": 0.8007736943907157, "grad_norm": NaN, "learning_rate": 9.787406135857329e-06, "loss": 0.0, "step": 4761 }, { "epoch": 0.8009418888234799, "grad_norm": NaN, "learning_rate": 9.7714409216613e-06, "loss": 0.0, "step": 4762 }, { "epoch": 0.8011100832562442, "grad_norm": NaN, "learning_rate": 9.755487329045404e-06, "loss": 0.0, "step": 4763 }, { "epoch": 0.8012782776890085, "grad_norm": NaN, "learning_rate": 9.739545362618446e-06, "loss": 0.0, "step": 4764 }, { "epoch": 0.8014464721217728, "grad_norm": NaN, "learning_rate": 9.7236150269859e-06, "loss": 0.0, "step": 4765 }, { "epoch": 0.8016146665545371, "grad_norm": NaN, "learning_rate": 9.707696326749848e-06, "loss": 0.0, "step": 4766 }, { "epoch": 0.8017828609873013, "grad_norm": NaN, "learning_rate": 9.691789266509028e-06, "loss": 0.0, "step": 4767 }, { "epoch": 0.8019510554200656, "grad_norm": NaN, "learning_rate": 9.675893850858803e-06, "loss": 0.0, "step": 4768 }, { "epoch": 0.8021192498528299, "grad_norm": NaN, "learning_rate": 9.660010084391197e-06, "loss": 0.0, "step": 4769 }, { "epoch": 0.8022874442855942, "grad_norm": NaN, "learning_rate": 9.644137971694844e-06, "loss": 0.0, "step": 4770 }, { "epoch": 0.8024556387183585, "grad_norm": NaN, "learning_rate": 9.628277517355021e-06, "loss": 0.0, "step": 4771 }, { "epoch": 0.8026238331511227, "grad_norm": NaN, "learning_rate": 9.61242872595362e-06, "loss": 0.0, "step": 4772 }, { "epoch": 0.802792027583887, "grad_norm": NaN, "learning_rate": 9.59659160206921e-06, "loss": 0.0, "step": 4773 }, { "epoch": 0.8029602220166513, "grad_norm": NaN, "learning_rate": 9.580766150276937e-06, "loss": 0.0, "step": 4774 }, { "epoch": 0.8031284164494156, "grad_norm": NaN, "learning_rate": 9.564952375148606e-06, "loss": 0.0, "step": 4775 }, { "epoch": 0.8032966108821799, "grad_norm": NaN, "learning_rate": 9.549150281252633e-06, "loss": 0.0, "step": 4776 }, { "epoch": 0.8034648053149441, "grad_norm": NaN, "learning_rate": 9.533359873154068e-06, "loss": 0.0, "step": 4777 }, { "epoch": 0.8036329997477083, "grad_norm": NaN, "learning_rate": 9.517581155414584e-06, "loss": 0.0, "step": 4778 }, { "epoch": 0.8038011941804726, "grad_norm": NaN, "learning_rate": 9.501814132592474e-06, "loss": 0.0, "step": 4779 }, { "epoch": 0.8039693886132369, "grad_norm": NaN, "learning_rate": 9.486058809242642e-06, "loss": 0.0, "step": 4780 }, { "epoch": 0.8041375830460011, "grad_norm": NaN, "learning_rate": 9.470315189916656e-06, "loss": 0.0, "step": 4781 }, { "epoch": 0.8043057774787654, "grad_norm": NaN, "learning_rate": 9.454583279162648e-06, "loss": 0.0, "step": 4782 }, { "epoch": 0.8044739719115297, "grad_norm": NaN, "learning_rate": 9.438863081525396e-06, "loss": 0.0, "step": 4783 }, { "epoch": 0.804642166344294, "grad_norm": NaN, "learning_rate": 9.423154601546281e-06, "loss": 0.0, "step": 4784 }, { "epoch": 0.8048103607770583, "grad_norm": NaN, "learning_rate": 9.407457843763324e-06, "loss": 0.0, "step": 4785 }, { "epoch": 0.8049785552098225, "grad_norm": NaN, "learning_rate": 9.39177281271113e-06, "loss": 0.0, "step": 4786 }, { "epoch": 0.8051467496425868, "grad_norm": NaN, "learning_rate": 9.376099512920939e-06, "loss": 0.0, "step": 4787 }, { "epoch": 0.8053149440753511, "grad_norm": NaN, "learning_rate": 9.360437948920564e-06, "loss": 0.0, "step": 4788 }, { "epoch": 0.8054831385081154, "grad_norm": NaN, "learning_rate": 9.3447881252345e-06, "loss": 0.0, "step": 4789 }, { "epoch": 0.8056513329408796, "grad_norm": NaN, "learning_rate": 9.329150046383772e-06, "loss": 0.0, "step": 4790 }, { "epoch": 0.8058195273736439, "grad_norm": NaN, "learning_rate": 9.313523716886063e-06, "loss": 0.0, "step": 4791 }, { "epoch": 0.8059877218064082, "grad_norm": NaN, "learning_rate": 9.297909141255633e-06, "loss": 0.0, "step": 4792 }, { "epoch": 0.8061559162391725, "grad_norm": NaN, "learning_rate": 9.282306324003364e-06, "loss": 0.0, "step": 4793 }, { "epoch": 0.8063241106719368, "grad_norm": NaN, "learning_rate": 9.266715269636733e-06, "loss": 0.0, "step": 4794 }, { "epoch": 0.806492305104701, "grad_norm": NaN, "learning_rate": 9.251135982659825e-06, "loss": 0.0, "step": 4795 }, { "epoch": 0.8066604995374653, "grad_norm": NaN, "learning_rate": 9.235568467573313e-06, "loss": 0.0, "step": 4796 }, { "epoch": 0.8068286939702296, "grad_norm": NaN, "learning_rate": 9.220012728874473e-06, "loss": 0.0, "step": 4797 }, { "epoch": 0.8069968884029939, "grad_norm": NaN, "learning_rate": 9.204468771057206e-06, "loss": 0.0, "step": 4798 }, { "epoch": 0.8071650828357582, "grad_norm": NaN, "learning_rate": 9.188936598611975e-06, "loss": 0.0, "step": 4799 }, { "epoch": 0.8073332772685224, "grad_norm": NaN, "learning_rate": 9.173416216025854e-06, "loss": 0.0, "step": 4800 }, { "epoch": 0.8075014717012867, "grad_norm": NaN, "learning_rate": 9.157907627782485e-06, "loss": 0.0, "step": 4801 }, { "epoch": 0.807669666134051, "grad_norm": NaN, "learning_rate": 9.142410838362164e-06, "loss": 0.0, "step": 4802 }, { "epoch": 0.8078378605668153, "grad_norm": NaN, "learning_rate": 9.12692585224172e-06, "loss": 0.0, "step": 4803 }, { "epoch": 0.8080060549995796, "grad_norm": NaN, "learning_rate": 9.111452673894588e-06, "loss": 0.0, "step": 4804 }, { "epoch": 0.8081742494323438, "grad_norm": NaN, "learning_rate": 9.095991307790786e-06, "loss": 0.0, "step": 4805 }, { "epoch": 0.8083424438651081, "grad_norm": NaN, "learning_rate": 9.08054175839696e-06, "loss": 0.0, "step": 4806 }, { "epoch": 0.8085106382978723, "grad_norm": NaN, "learning_rate": 9.065104030176297e-06, "loss": 0.0, "step": 4807 }, { "epoch": 0.8086788327306366, "grad_norm": NaN, "learning_rate": 9.049678127588585e-06, "loss": 0.0, "step": 4808 }, { "epoch": 0.8088470271634008, "grad_norm": NaN, "learning_rate": 9.034264055090175e-06, "loss": 0.0, "step": 4809 }, { "epoch": 0.8090152215961651, "grad_norm": NaN, "learning_rate": 9.01886181713405e-06, "loss": 0.0, "step": 4810 }, { "epoch": 0.8091834160289294, "grad_norm": NaN, "learning_rate": 9.003471418169734e-06, "loss": 0.0, "step": 4811 }, { "epoch": 0.8093516104616937, "grad_norm": NaN, "learning_rate": 8.988092862643332e-06, "loss": 0.0, "step": 4812 }, { "epoch": 0.809519804894458, "grad_norm": NaN, "learning_rate": 8.972726154997547e-06, "loss": 0.0, "step": 4813 }, { "epoch": 0.8096879993272222, "grad_norm": NaN, "learning_rate": 8.957371299671641e-06, "loss": 0.0, "step": 4814 }, { "epoch": 0.8098561937599865, "grad_norm": NaN, "learning_rate": 8.942028301101468e-06, "loss": 0.0, "step": 4815 }, { "epoch": 0.8100243881927508, "grad_norm": NaN, "learning_rate": 8.926697163719438e-06, "loss": 0.0, "step": 4816 }, { "epoch": 0.8101925826255151, "grad_norm": NaN, "learning_rate": 8.911377891954543e-06, "loss": 0.0, "step": 4817 }, { "epoch": 0.8103607770582794, "grad_norm": NaN, "learning_rate": 8.89607049023236e-06, "loss": 0.0, "step": 4818 }, { "epoch": 0.8105289714910436, "grad_norm": NaN, "learning_rate": 8.88077496297503e-06, "loss": 0.0, "step": 4819 }, { "epoch": 0.8106971659238079, "grad_norm": NaN, "learning_rate": 8.865491314601249e-06, "loss": 0.0, "step": 4820 }, { "epoch": 0.8108653603565722, "grad_norm": NaN, "learning_rate": 8.850219549526278e-06, "loss": 0.0, "step": 4821 }, { "epoch": 0.8110335547893365, "grad_norm": NaN, "learning_rate": 8.834959672161991e-06, "loss": 0.0, "step": 4822 }, { "epoch": 0.8112017492221008, "grad_norm": NaN, "learning_rate": 8.819711686916783e-06, "loss": 0.0, "step": 4823 }, { "epoch": 0.811369943654865, "grad_norm": NaN, "learning_rate": 8.804475598195616e-06, "loss": 0.0, "step": 4824 }, { "epoch": 0.8115381380876293, "grad_norm": NaN, "learning_rate": 8.789251410400023e-06, "loss": 0.0, "step": 4825 }, { "epoch": 0.8117063325203936, "grad_norm": NaN, "learning_rate": 8.774039127928124e-06, "loss": 0.0, "step": 4826 }, { "epoch": 0.8118745269531579, "grad_norm": NaN, "learning_rate": 8.758838755174564e-06, "loss": 0.0, "step": 4827 }, { "epoch": 0.8120427213859222, "grad_norm": NaN, "learning_rate": 8.743650296530559e-06, "loss": 0.0, "step": 4828 }, { "epoch": 0.8122109158186864, "grad_norm": NaN, "learning_rate": 8.728473756383887e-06, "loss": 0.0, "step": 4829 }, { "epoch": 0.8123791102514507, "grad_norm": NaN, "learning_rate": 8.71330913911888e-06, "loss": 0.0, "step": 4830 }, { "epoch": 0.812547304684215, "grad_norm": NaN, "learning_rate": 8.698156449116424e-06, "loss": 0.0, "step": 4831 }, { "epoch": 0.8127154991169793, "grad_norm": NaN, "learning_rate": 8.68301569075396e-06, "loss": 0.0, "step": 4832 }, { "epoch": 0.8128836935497435, "grad_norm": NaN, "learning_rate": 8.667886868405478e-06, "loss": 0.0, "step": 4833 }, { "epoch": 0.8130518879825078, "grad_norm": NaN, "learning_rate": 8.652769986441544e-06, "loss": 0.0, "step": 4834 }, { "epoch": 0.8132200824152721, "grad_norm": NaN, "learning_rate": 8.637665049229243e-06, "loss": 0.0, "step": 4835 }, { "epoch": 0.8133882768480364, "grad_norm": NaN, "learning_rate": 8.622572061132227e-06, "loss": 0.0, "step": 4836 }, { "epoch": 0.8135564712808006, "grad_norm": NaN, "learning_rate": 8.607491026510678e-06, "loss": 0.0, "step": 4837 }, { "epoch": 0.8137246657135648, "grad_norm": NaN, "learning_rate": 8.592421949721364e-06, "loss": 0.0, "step": 4838 }, { "epoch": 0.8138928601463291, "grad_norm": NaN, "learning_rate": 8.577364835117552e-06, "loss": 0.0, "step": 4839 }, { "epoch": 0.8140610545790934, "grad_norm": NaN, "learning_rate": 8.56231968704908e-06, "loss": 0.0, "step": 4840 }, { "epoch": 0.8142292490118577, "grad_norm": NaN, "learning_rate": 8.547286509862306e-06, "loss": 0.0, "step": 4841 }, { "epoch": 0.814397443444622, "grad_norm": NaN, "learning_rate": 8.532265307900178e-06, "loss": 0.0, "step": 4842 }, { "epoch": 0.8145656378773862, "grad_norm": NaN, "learning_rate": 8.51725608550214e-06, "loss": 0.0, "step": 4843 }, { "epoch": 0.8147338323101505, "grad_norm": NaN, "learning_rate": 8.50225884700418e-06, "loss": 0.0, "step": 4844 }, { "epoch": 0.8149020267429148, "grad_norm": NaN, "learning_rate": 8.487273596738832e-06, "loss": 0.0, "step": 4845 }, { "epoch": 0.8150702211756791, "grad_norm": NaN, "learning_rate": 8.472300339035178e-06, "loss": 0.0, "step": 4846 }, { "epoch": 0.8152384156084433, "grad_norm": NaN, "learning_rate": 8.457339078218823e-06, "loss": 0.0, "step": 4847 }, { "epoch": 0.8154066100412076, "grad_norm": NaN, "learning_rate": 8.442389818611906e-06, "loss": 0.0, "step": 4848 }, { "epoch": 0.8155748044739719, "grad_norm": NaN, "learning_rate": 8.427452564533094e-06, "loss": 0.0, "step": 4849 }, { "epoch": 0.8157429989067362, "grad_norm": NaN, "learning_rate": 8.412527320297598e-06, "loss": 0.0, "step": 4850 }, { "epoch": 0.8159111933395005, "grad_norm": NaN, "learning_rate": 8.397614090217155e-06, "loss": 0.0, "step": 4851 }, { "epoch": 0.8160793877722647, "grad_norm": NaN, "learning_rate": 8.382712878600023e-06, "loss": 0.0, "step": 4852 }, { "epoch": 0.816247582205029, "grad_norm": NaN, "learning_rate": 8.367823689751009e-06, "loss": 0.0, "step": 4853 }, { "epoch": 0.8164157766377933, "grad_norm": NaN, "learning_rate": 8.352946527971406e-06, "loss": 0.0, "step": 4854 }, { "epoch": 0.8165839710705576, "grad_norm": NaN, "learning_rate": 8.338081397559088e-06, "loss": 0.0, "step": 4855 }, { "epoch": 0.8167521655033219, "grad_norm": NaN, "learning_rate": 8.323228302808417e-06, "loss": 0.0, "step": 4856 }, { "epoch": 0.8169203599360861, "grad_norm": NaN, "learning_rate": 8.308387248010279e-06, "loss": 0.0, "step": 4857 }, { "epoch": 0.8170885543688504, "grad_norm": NaN, "learning_rate": 8.293558237452082e-06, "loss": 0.0, "step": 4858 }, { "epoch": 0.8172567488016147, "grad_norm": NaN, "learning_rate": 8.278741275417784e-06, "loss": 0.0, "step": 4859 }, { "epoch": 0.817424943234379, "grad_norm": NaN, "learning_rate": 8.263936366187824e-06, "loss": 0.0, "step": 4860 }, { "epoch": 0.8175931376671433, "grad_norm": NaN, "learning_rate": 8.24914351403918e-06, "loss": 0.0, "step": 4861 }, { "epoch": 0.8177613320999075, "grad_norm": NaN, "learning_rate": 8.234362723245326e-06, "loss": 0.0, "step": 4862 }, { "epoch": 0.8179295265326718, "grad_norm": NaN, "learning_rate": 8.219593998076292e-06, "loss": 0.0, "step": 4863 }, { "epoch": 0.8180977209654361, "grad_norm": NaN, "learning_rate": 8.204837342798589e-06, "loss": 0.0, "step": 4864 }, { "epoch": 0.8182659153982004, "grad_norm": NaN, "learning_rate": 8.19009276167524e-06, "loss": 0.0, "step": 4865 }, { "epoch": 0.8184341098309645, "grad_norm": NaN, "learning_rate": 8.17536025896578e-06, "loss": 0.0, "step": 4866 }, { "epoch": 0.8186023042637288, "grad_norm": NaN, "learning_rate": 8.160639838926293e-06, "loss": 0.0, "step": 4867 }, { "epoch": 0.8187704986964931, "grad_norm": NaN, "learning_rate": 8.145931505809329e-06, "loss": 0.0, "step": 4868 }, { "epoch": 0.8189386931292574, "grad_norm": NaN, "learning_rate": 8.131235263863957e-06, "loss": 0.0, "step": 4869 }, { "epoch": 0.8191068875620217, "grad_norm": NaN, "learning_rate": 8.116551117335763e-06, "loss": 0.0, "step": 4870 }, { "epoch": 0.8192750819947859, "grad_norm": NaN, "learning_rate": 8.101879070466833e-06, "loss": 0.0, "step": 4871 }, { "epoch": 0.8194432764275502, "grad_norm": NaN, "learning_rate": 8.087219127495748e-06, "loss": 0.0, "step": 4872 }, { "epoch": 0.8196114708603145, "grad_norm": NaN, "learning_rate": 8.07257129265761e-06, "loss": 0.0, "step": 4873 }, { "epoch": 0.8197796652930788, "grad_norm": NaN, "learning_rate": 8.057935570184e-06, "loss": 0.0, "step": 4874 }, { "epoch": 0.819947859725843, "grad_norm": NaN, "learning_rate": 8.043311964303036e-06, "loss": 0.0, "step": 4875 }, { "epoch": 0.8201160541586073, "grad_norm": NaN, "learning_rate": 8.028700479239303e-06, "loss": 0.0, "step": 4876 }, { "epoch": 0.8202842485913716, "grad_norm": NaN, "learning_rate": 8.014101119213902e-06, "loss": 0.0, "step": 4877 }, { "epoch": 0.8204524430241359, "grad_norm": NaN, "learning_rate": 7.999513888444399e-06, "loss": 0.0, "step": 4878 }, { "epoch": 0.8206206374569002, "grad_norm": NaN, "learning_rate": 7.98493879114492e-06, "loss": 0.0, "step": 4879 }, { "epoch": 0.8207888318896644, "grad_norm": NaN, "learning_rate": 7.970375831526023e-06, "loss": 0.0, "step": 4880 }, { "epoch": 0.8209570263224287, "grad_norm": NaN, "learning_rate": 7.955825013794793e-06, "loss": 0.0, "step": 4881 }, { "epoch": 0.821125220755193, "grad_norm": NaN, "learning_rate": 7.94128634215478e-06, "loss": 0.0, "step": 4882 }, { "epoch": 0.8212934151879573, "grad_norm": NaN, "learning_rate": 7.926759820806067e-06, "loss": 0.0, "step": 4883 }, { "epoch": 0.8214616096207216, "grad_norm": NaN, "learning_rate": 7.912245453945199e-06, "loss": 0.0, "step": 4884 }, { "epoch": 0.8216298040534858, "grad_norm": NaN, "learning_rate": 7.89774324576521e-06, "loss": 0.0, "step": 4885 }, { "epoch": 0.8217979984862501, "grad_norm": NaN, "learning_rate": 7.883253200455627e-06, "loss": 0.0, "step": 4886 }, { "epoch": 0.8219661929190144, "grad_norm": NaN, "learning_rate": 7.868775322202454e-06, "loss": 0.0, "step": 4887 }, { "epoch": 0.8221343873517787, "grad_norm": NaN, "learning_rate": 7.854309615188198e-06, "loss": 0.0, "step": 4888 }, { "epoch": 0.822302581784543, "grad_norm": NaN, "learning_rate": 7.839856083591834e-06, "loss": 0.0, "step": 4889 }, { "epoch": 0.8224707762173072, "grad_norm": NaN, "learning_rate": 7.825414731588816e-06, "loss": 0.0, "step": 4890 }, { "epoch": 0.8226389706500715, "grad_norm": NaN, "learning_rate": 7.81098556335111e-06, "loss": 0.0, "step": 4891 }, { "epoch": 0.8228071650828358, "grad_norm": NaN, "learning_rate": 7.796568583047132e-06, "loss": 0.0, "step": 4892 }, { "epoch": 0.8229753595156001, "grad_norm": NaN, "learning_rate": 7.782163794841784e-06, "loss": 0.0, "step": 4893 }, { "epoch": 0.8231435539483644, "grad_norm": NaN, "learning_rate": 7.76777120289644e-06, "loss": 0.0, "step": 4894 }, { "epoch": 0.8233117483811286, "grad_norm": NaN, "learning_rate": 7.753390811368971e-06, "loss": 0.0, "step": 4895 }, { "epoch": 0.8234799428138928, "grad_norm": NaN, "learning_rate": 7.739022624413717e-06, "loss": 0.0, "step": 4896 }, { "epoch": 0.8236481372466571, "grad_norm": NaN, "learning_rate": 7.724666646181472e-06, "loss": 0.0, "step": 4897 }, { "epoch": 0.8238163316794214, "grad_norm": NaN, "learning_rate": 7.710322880819521e-06, "loss": 0.0, "step": 4898 }, { "epoch": 0.8239845261121856, "grad_norm": NaN, "learning_rate": 7.695991332471608e-06, "loss": 0.0, "step": 4899 }, { "epoch": 0.8241527205449499, "grad_norm": NaN, "learning_rate": 7.681672005277979e-06, "loss": 0.0, "step": 4900 }, { "epoch": 0.8243209149777142, "grad_norm": NaN, "learning_rate": 7.667364903375313e-06, "loss": 0.0, "step": 4901 }, { "epoch": 0.8244891094104785, "grad_norm": NaN, "learning_rate": 7.653070030896774e-06, "loss": 0.0, "step": 4902 }, { "epoch": 0.8246573038432428, "grad_norm": NaN, "learning_rate": 7.638787391971986e-06, "loss": 0.0, "step": 4903 }, { "epoch": 0.824825498276007, "grad_norm": NaN, "learning_rate": 7.624516990727049e-06, "loss": 0.0, "step": 4904 }, { "epoch": 0.8249936927087713, "grad_norm": NaN, "learning_rate": 7.610258831284528e-06, "loss": 0.0, "step": 4905 }, { "epoch": 0.8251618871415356, "grad_norm": NaN, "learning_rate": 7.596012917763434e-06, "loss": 0.0, "step": 4906 }, { "epoch": 0.8253300815742999, "grad_norm": NaN, "learning_rate": 7.581779254279259e-06, "loss": 0.0, "step": 4907 }, { "epoch": 0.8254982760070642, "grad_norm": NaN, "learning_rate": 7.5675578449439455e-06, "loss": 0.0, "step": 4908 }, { "epoch": 0.8256664704398284, "grad_norm": NaN, "learning_rate": 7.553348693865897e-06, "loss": 0.0, "step": 4909 }, { "epoch": 0.8258346648725927, "grad_norm": NaN, "learning_rate": 7.539151805149985e-06, "loss": 0.0, "step": 4910 }, { "epoch": 0.826002859305357, "grad_norm": NaN, "learning_rate": 7.524967182897513e-06, "loss": 0.0, "step": 4911 }, { "epoch": 0.8261710537381213, "grad_norm": NaN, "learning_rate": 7.510794831206291e-06, "loss": 0.0, "step": 4912 }, { "epoch": 0.8263392481708856, "grad_norm": NaN, "learning_rate": 7.496634754170534e-06, "loss": 0.0, "step": 4913 }, { "epoch": 0.8265074426036498, "grad_norm": NaN, "learning_rate": 7.48248695588093e-06, "loss": 0.0, "step": 4914 }, { "epoch": 0.8266756370364141, "grad_norm": NaN, "learning_rate": 7.468351440424609e-06, "loss": 0.0, "step": 4915 }, { "epoch": 0.8268438314691784, "grad_norm": NaN, "learning_rate": 7.454228211885184e-06, "loss": 0.0, "step": 4916 }, { "epoch": 0.8270120259019427, "grad_norm": NaN, "learning_rate": 7.440117274342695e-06, "loss": 0.0, "step": 4917 }, { "epoch": 0.827180220334707, "grad_norm": NaN, "learning_rate": 7.426018631873621e-06, "loss": 0.0, "step": 4918 }, { "epoch": 0.8273484147674712, "grad_norm": NaN, "learning_rate": 7.411932288550899e-06, "loss": 0.0, "step": 4919 }, { "epoch": 0.8275166092002355, "grad_norm": NaN, "learning_rate": 7.397858248443939e-06, "loss": 0.0, "step": 4920 }, { "epoch": 0.8276848036329998, "grad_norm": NaN, "learning_rate": 7.383796515618557e-06, "loss": 0.0, "step": 4921 }, { "epoch": 0.8278529980657641, "grad_norm": NaN, "learning_rate": 7.369747094137036e-06, "loss": 0.0, "step": 4922 }, { "epoch": 0.8280211924985283, "grad_norm": NaN, "learning_rate": 7.35570998805809e-06, "loss": 0.0, "step": 4923 }, { "epoch": 0.8281893869312926, "grad_norm": NaN, "learning_rate": 7.341685201436887e-06, "loss": 0.0, "step": 4924 }, { "epoch": 0.8283575813640569, "grad_norm": NaN, "learning_rate": 7.327672738325025e-06, "loss": 0.0, "step": 4925 }, { "epoch": 0.8285257757968211, "grad_norm": NaN, "learning_rate": 7.313672602770561e-06, "loss": 0.0, "step": 4926 }, { "epoch": 0.8286939702295854, "grad_norm": NaN, "learning_rate": 7.2996847988179466e-06, "loss": 0.0, "step": 4927 }, { "epoch": 0.8288621646623496, "grad_norm": NaN, "learning_rate": 7.2857093305081394e-06, "loss": 0.0, "step": 4928 }, { "epoch": 0.8290303590951139, "grad_norm": NaN, "learning_rate": 7.271746201878477e-06, "loss": 0.0, "step": 4929 }, { "epoch": 0.8291985535278782, "grad_norm": NaN, "learning_rate": 7.257795416962753e-06, "loss": 0.0, "step": 4930 }, { "epoch": 0.8293667479606425, "grad_norm": NaN, "learning_rate": 7.2438569797911806e-06, "loss": 0.0, "step": 4931 }, { "epoch": 0.8295349423934067, "grad_norm": NaN, "learning_rate": 7.229930894390446e-06, "loss": 0.0, "step": 4932 }, { "epoch": 0.829703136826171, "grad_norm": NaN, "learning_rate": 7.216017164783617e-06, "loss": 0.0, "step": 4933 }, { "epoch": 0.8298713312589353, "grad_norm": NaN, "learning_rate": 7.202115794990228e-06, "loss": 0.0, "step": 4934 }, { "epoch": 0.8300395256916996, "grad_norm": NaN, "learning_rate": 7.188226789026209e-06, "loss": 0.0, "step": 4935 }, { "epoch": 0.8302077201244639, "grad_norm": NaN, "learning_rate": 7.174350150903958e-06, "loss": 0.0, "step": 4936 }, { "epoch": 0.8303759145572281, "grad_norm": NaN, "learning_rate": 7.160485884632278e-06, "loss": 0.0, "step": 4937 }, { "epoch": 0.8305441089899924, "grad_norm": NaN, "learning_rate": 7.1466339942163955e-06, "loss": 0.0, "step": 4938 }, { "epoch": 0.8307123034227567, "grad_norm": NaN, "learning_rate": 7.132794483657957e-06, "loss": 0.0, "step": 4939 }, { "epoch": 0.830880497855521, "grad_norm": NaN, "learning_rate": 7.118967356955059e-06, "loss": 0.0, "step": 4940 }, { "epoch": 0.8310486922882853, "grad_norm": NaN, "learning_rate": 7.105152618102195e-06, "loss": 0.0, "step": 4941 }, { "epoch": 0.8312168867210495, "grad_norm": NaN, "learning_rate": 7.091350271090297e-06, "loss": 0.0, "step": 4942 }, { "epoch": 0.8313850811538138, "grad_norm": NaN, "learning_rate": 7.077560319906695e-06, "loss": 0.0, "step": 4943 }, { "epoch": 0.8315532755865781, "grad_norm": NaN, "learning_rate": 7.06378276853516e-06, "loss": 0.0, "step": 4944 }, { "epoch": 0.8317214700193424, "grad_norm": NaN, "learning_rate": 7.0500176209558735e-06, "loss": 0.0, "step": 4945 }, { "epoch": 0.8318896644521067, "grad_norm": NaN, "learning_rate": 7.036264881145427e-06, "loss": 0.0, "step": 4946 }, { "epoch": 0.8320578588848709, "grad_norm": NaN, "learning_rate": 7.0225245530768315e-06, "loss": 0.0, "step": 4947 }, { "epoch": 0.8322260533176352, "grad_norm": NaN, "learning_rate": 7.0087966407195064e-06, "loss": 0.0, "step": 4948 }, { "epoch": 0.8323942477503995, "grad_norm": NaN, "learning_rate": 6.9950811480393175e-06, "loss": 0.0, "step": 4949 }, { "epoch": 0.8325624421831638, "grad_norm": NaN, "learning_rate": 6.981378078998502e-06, "loss": 0.0, "step": 4950 }, { "epoch": 0.832730636615928, "grad_norm": NaN, "learning_rate": 6.96768743755572e-06, "loss": 0.0, "step": 4951 }, { "epoch": 0.8328988310486923, "grad_norm": NaN, "learning_rate": 6.954009227666042e-06, "loss": 0.0, "step": 4952 }, { "epoch": 0.8330670254814566, "grad_norm": NaN, "learning_rate": 6.940343453280962e-06, "loss": 0.0, "step": 4953 }, { "epoch": 0.8332352199142209, "grad_norm": NaN, "learning_rate": 6.926690118348361e-06, "loss": 0.0, "step": 4954 }, { "epoch": 0.8334034143469851, "grad_norm": NaN, "learning_rate": 6.91304922681254e-06, "loss": 0.0, "step": 4955 }, { "epoch": 0.8335716087797493, "grad_norm": NaN, "learning_rate": 6.899420782614185e-06, "loss": 0.0, "step": 4956 }, { "epoch": 0.8337398032125136, "grad_norm": NaN, "learning_rate": 6.88580478969042e-06, "loss": 0.0, "step": 4957 }, { "epoch": 0.8339079976452779, "grad_norm": NaN, "learning_rate": 6.872201251974747e-06, "loss": 0.0, "step": 4958 }, { "epoch": 0.8340761920780422, "grad_norm": NaN, "learning_rate": 6.858610173397067e-06, "loss": 0.0, "step": 4959 }, { "epoch": 0.8342443865108065, "grad_norm": NaN, "learning_rate": 6.8450315578836965e-06, "loss": 0.0, "step": 4960 }, { "epoch": 0.8344125809435707, "grad_norm": NaN, "learning_rate": 6.831465409357346e-06, "loss": 0.0, "step": 4961 }, { "epoch": 0.834580775376335, "grad_norm": NaN, "learning_rate": 6.817911731737131e-06, "loss": 0.0, "step": 4962 }, { "epoch": 0.8347489698090993, "grad_norm": NaN, "learning_rate": 6.8043705289385475e-06, "loss": 0.0, "step": 4963 }, { "epoch": 0.8349171642418636, "grad_norm": NaN, "learning_rate": 6.790841804873499e-06, "loss": 0.0, "step": 4964 }, { "epoch": 0.8350853586746279, "grad_norm": NaN, "learning_rate": 6.777325563450282e-06, "loss": 0.0, "step": 4965 }, { "epoch": 0.8352535531073921, "grad_norm": NaN, "learning_rate": 6.763821808573589e-06, "loss": 0.0, "step": 4966 }, { "epoch": 0.8354217475401564, "grad_norm": NaN, "learning_rate": 6.750330544144501e-06, "loss": 0.0, "step": 4967 }, { "epoch": 0.8355899419729207, "grad_norm": NaN, "learning_rate": 6.736851774060482e-06, "loss": 0.0, "step": 4968 }, { "epoch": 0.835758136405685, "grad_norm": NaN, "learning_rate": 6.723385502215429e-06, "loss": 0.0, "step": 4969 }, { "epoch": 0.8359263308384492, "grad_norm": NaN, "learning_rate": 6.709931732499569e-06, "loss": 0.0, "step": 4970 }, { "epoch": 0.8360945252712135, "grad_norm": NaN, "learning_rate": 6.69649046879956e-06, "loss": 0.0, "step": 4971 }, { "epoch": 0.8362627197039778, "grad_norm": NaN, "learning_rate": 6.6830617149984175e-06, "loss": 0.0, "step": 4972 }, { "epoch": 0.8364309141367421, "grad_norm": NaN, "learning_rate": 6.669645474975578e-06, "loss": 0.0, "step": 4973 }, { "epoch": 0.8365991085695064, "grad_norm": NaN, "learning_rate": 6.6562417526068345e-06, "loss": 0.0, "step": 4974 }, { "epoch": 0.8367673030022706, "grad_norm": NaN, "learning_rate": 6.642850551764368e-06, "loss": 0.0, "step": 4975 }, { "epoch": 0.8369354974350349, "grad_norm": NaN, "learning_rate": 6.629471876316739e-06, "loss": 0.0, "step": 4976 }, { "epoch": 0.8371036918677992, "grad_norm": NaN, "learning_rate": 6.616105730128919e-06, "loss": 0.0, "step": 4977 }, { "epoch": 0.8372718863005635, "grad_norm": NaN, "learning_rate": 6.602752117062228e-06, "loss": 0.0, "step": 4978 }, { "epoch": 0.8374400807333278, "grad_norm": NaN, "learning_rate": 6.589411040974369e-06, "loss": 0.0, "step": 4979 }, { "epoch": 0.837608275166092, "grad_norm": NaN, "learning_rate": 6.576082505719433e-06, "loss": 0.0, "step": 4980 }, { "epoch": 0.8377764695988563, "grad_norm": NaN, "learning_rate": 6.562766515147889e-06, "loss": 0.0, "step": 4981 }, { "epoch": 0.8379446640316206, "grad_norm": NaN, "learning_rate": 6.549463073106571e-06, "loss": 0.0, "step": 4982 }, { "epoch": 0.8381128584643849, "grad_norm": NaN, "learning_rate": 6.536172183438699e-06, "loss": 0.0, "step": 4983 }, { "epoch": 0.8382810528971492, "grad_norm": NaN, "learning_rate": 6.522893849983852e-06, "loss": 0.0, "step": 4984 }, { "epoch": 0.8384492473299133, "grad_norm": NaN, "learning_rate": 6.509628076578006e-06, "loss": 0.0, "step": 4985 }, { "epoch": 0.8386174417626776, "grad_norm": NaN, "learning_rate": 6.496374867053495e-06, "loss": 0.0, "step": 4986 }, { "epoch": 0.8387856361954419, "grad_norm": NaN, "learning_rate": 6.483134225239013e-06, "loss": 0.0, "step": 4987 }, { "epoch": 0.8389538306282062, "grad_norm": NaN, "learning_rate": 6.46990615495962e-06, "loss": 0.0, "step": 4988 }, { "epoch": 0.8391220250609704, "grad_norm": NaN, "learning_rate": 6.456690660036796e-06, "loss": 0.0, "step": 4989 }, { "epoch": 0.8392902194937347, "grad_norm": NaN, "learning_rate": 6.443487744288318e-06, "loss": 0.0, "step": 4990 }, { "epoch": 0.839458413926499, "grad_norm": NaN, "learning_rate": 6.430297411528374e-06, "loss": 0.0, "step": 4991 }, { "epoch": 0.8396266083592633, "grad_norm": NaN, "learning_rate": 6.41711966556749e-06, "loss": 0.0, "step": 4992 }, { "epoch": 0.8397948027920276, "grad_norm": NaN, "learning_rate": 6.4039545102125845e-06, "loss": 0.0, "step": 4993 }, { "epoch": 0.8399629972247918, "grad_norm": NaN, "learning_rate": 6.390801949266917e-06, "loss": 0.0, "step": 4994 }, { "epoch": 0.8401311916575561, "grad_norm": NaN, "learning_rate": 6.3776619865301206e-06, "loss": 0.0, "step": 4995 }, { "epoch": 0.8402993860903204, "grad_norm": NaN, "learning_rate": 6.364534625798174e-06, "loss": 0.0, "step": 4996 }, { "epoch": 0.8404675805230847, "grad_norm": NaN, "learning_rate": 6.351419870863417e-06, "loss": 0.0, "step": 4997 }, { "epoch": 0.840635774955849, "grad_norm": NaN, "learning_rate": 6.338317725514581e-06, "loss": 0.0, "step": 4998 }, { "epoch": 0.8408039693886132, "grad_norm": NaN, "learning_rate": 6.325228193536714e-06, "loss": 0.0, "step": 4999 }, { "epoch": 0.8409721638213775, "grad_norm": NaN, "learning_rate": 6.312151278711237e-06, "loss": 0.0, "step": 5000 }, { "epoch": 0.8411403582541418, "grad_norm": NaN, "learning_rate": 6.299086984815927e-06, "loss": 0.0, "step": 5001 }, { "epoch": 0.8413085526869061, "grad_norm": NaN, "learning_rate": 6.286035315624905e-06, "loss": 0.0, "step": 5002 }, { "epoch": 0.8414767471196704, "grad_norm": NaN, "learning_rate": 6.272996274908654e-06, "loss": 0.0, "step": 5003 }, { "epoch": 0.8416449415524346, "grad_norm": NaN, "learning_rate": 6.259969866434012e-06, "loss": 0.0, "step": 5004 }, { "epoch": 0.8418131359851989, "grad_norm": NaN, "learning_rate": 6.246956093964146e-06, "loss": 0.0, "step": 5005 }, { "epoch": 0.8419813304179632, "grad_norm": NaN, "learning_rate": 6.233954961258615e-06, "loss": 0.0, "step": 5006 }, { "epoch": 0.8421495248507275, "grad_norm": NaN, "learning_rate": 6.220966472073287e-06, "loss": 0.0, "step": 5007 }, { "epoch": 0.8423177192834918, "grad_norm": NaN, "learning_rate": 6.207990630160393e-06, "loss": 0.0, "step": 5008 }, { "epoch": 0.842485913716256, "grad_norm": NaN, "learning_rate": 6.195027439268497e-06, "loss": 0.0, "step": 5009 }, { "epoch": 0.8426541081490203, "grad_norm": NaN, "learning_rate": 6.182076903142536e-06, "loss": 0.0, "step": 5010 }, { "epoch": 0.8428223025817846, "grad_norm": NaN, "learning_rate": 6.169139025523773e-06, "loss": 0.0, "step": 5011 }, { "epoch": 0.8429904970145489, "grad_norm": NaN, "learning_rate": 6.156213810149813e-06, "loss": 0.0, "step": 5012 }, { "epoch": 0.8431586914473131, "grad_norm": NaN, "learning_rate": 6.143301260754591e-06, "loss": 0.0, "step": 5013 }, { "epoch": 0.8433268858800773, "grad_norm": NaN, "learning_rate": 6.130401381068424e-06, "loss": 0.0, "step": 5014 }, { "epoch": 0.8434950803128416, "grad_norm": NaN, "learning_rate": 6.117514174817929e-06, "loss": 0.0, "step": 5015 }, { "epoch": 0.8436632747456059, "grad_norm": NaN, "learning_rate": 6.104639645726079e-06, "loss": 0.0, "step": 5016 }, { "epoch": 0.8438314691783702, "grad_norm": NaN, "learning_rate": 6.091777797512177e-06, "loss": 0.0, "step": 5017 }, { "epoch": 0.8439996636111344, "grad_norm": NaN, "learning_rate": 6.078928633891867e-06, "loss": 0.0, "step": 5018 }, { "epoch": 0.8441678580438987, "grad_norm": NaN, "learning_rate": 6.066092158577136e-06, "loss": 0.0, "step": 5019 }, { "epoch": 0.844336052476663, "grad_norm": NaN, "learning_rate": 6.053268375276289e-06, "loss": 0.0, "step": 5020 }, { "epoch": 0.8445042469094273, "grad_norm": NaN, "learning_rate": 6.040457287693963e-06, "loss": 0.0, "step": 5021 }, { "epoch": 0.8446724413421915, "grad_norm": NaN, "learning_rate": 6.027658899531169e-06, "loss": 0.0, "step": 5022 }, { "epoch": 0.8448406357749558, "grad_norm": NaN, "learning_rate": 6.014873214485195e-06, "loss": 0.0, "step": 5023 }, { "epoch": 0.8450088302077201, "grad_norm": NaN, "learning_rate": 6.002100236249691e-06, "loss": 0.0, "step": 5024 }, { "epoch": 0.8451770246404844, "grad_norm": NaN, "learning_rate": 5.989339968514612e-06, "loss": 0.0, "step": 5025 }, { "epoch": 0.8453452190732487, "grad_norm": NaN, "learning_rate": 5.976592414966281e-06, "loss": 0.0, "step": 5026 }, { "epoch": 0.845513413506013, "grad_norm": NaN, "learning_rate": 5.963857579287308e-06, "loss": 0.0, "step": 5027 }, { "epoch": 0.8456816079387772, "grad_norm": NaN, "learning_rate": 5.9511354651566485e-06, "loss": 0.0, "step": 5028 }, { "epoch": 0.8458498023715415, "grad_norm": NaN, "learning_rate": 5.938426076249565e-06, "loss": 0.0, "step": 5029 }, { "epoch": 0.8460179968043058, "grad_norm": NaN, "learning_rate": 5.925729416237685e-06, "loss": 0.0, "step": 5030 }, { "epoch": 0.8461861912370701, "grad_norm": NaN, "learning_rate": 5.913045488788915e-06, "loss": 0.0, "step": 5031 }, { "epoch": 0.8463543856698343, "grad_norm": NaN, "learning_rate": 5.900374297567502e-06, "loss": 0.0, "step": 5032 }, { "epoch": 0.8465225801025986, "grad_norm": NaN, "learning_rate": 5.887715846233993e-06, "loss": 0.0, "step": 5033 }, { "epoch": 0.8466907745353629, "grad_norm": NaN, "learning_rate": 5.8750701384453095e-06, "loss": 0.0, "step": 5034 }, { "epoch": 0.8468589689681272, "grad_norm": NaN, "learning_rate": 5.86243717785463e-06, "loss": 0.0, "step": 5035 }, { "epoch": 0.8470271634008915, "grad_norm": NaN, "learning_rate": 5.849816968111482e-06, "loss": 0.0, "step": 5036 }, { "epoch": 0.8471953578336557, "grad_norm": NaN, "learning_rate": 5.837209512861702e-06, "loss": 0.0, "step": 5037 }, { "epoch": 0.84736355226642, "grad_norm": NaN, "learning_rate": 5.824614815747442e-06, "loss": 0.0, "step": 5038 }, { "epoch": 0.8475317466991843, "grad_norm": NaN, "learning_rate": 5.81203288040717e-06, "loss": 0.0, "step": 5039 }, { "epoch": 0.8476999411319486, "grad_norm": NaN, "learning_rate": 5.799463710475667e-06, "loss": 0.0, "step": 5040 }, { "epoch": 0.8478681355647129, "grad_norm": NaN, "learning_rate": 5.786907309584011e-06, "loss": 0.0, "step": 5041 }, { "epoch": 0.8480363299974771, "grad_norm": NaN, "learning_rate": 5.774363681359624e-06, "loss": 0.0, "step": 5042 }, { "epoch": 0.8482045244302414, "grad_norm": NaN, "learning_rate": 5.761832829426223e-06, "loss": 0.0, "step": 5043 }, { "epoch": 0.8483727188630056, "grad_norm": NaN, "learning_rate": 5.749314757403812e-06, "loss": 0.0, "step": 5044 }, { "epoch": 0.8485409132957699, "grad_norm": NaN, "learning_rate": 5.736809468908727e-06, "loss": 0.0, "step": 5045 }, { "epoch": 0.8487091077285341, "grad_norm": NaN, "learning_rate": 5.724316967553622e-06, "loss": 0.0, "step": 5046 }, { "epoch": 0.8488773021612984, "grad_norm": NaN, "learning_rate": 5.711837256947427e-06, "loss": 0.0, "step": 5047 }, { "epoch": 0.8490454965940627, "grad_norm": NaN, "learning_rate": 5.6993703406953924e-06, "loss": 0.0, "step": 5048 }, { "epoch": 0.849213691026827, "grad_norm": NaN, "learning_rate": 5.686916222399069e-06, "loss": 0.0, "step": 5049 }, { "epoch": 0.8493818854595913, "grad_norm": NaN, "learning_rate": 5.674474905656307e-06, "loss": 0.0, "step": 5050 }, { "epoch": 0.8495500798923555, "grad_norm": NaN, "learning_rate": 5.662046394061277e-06, "loss": 0.0, "step": 5051 }, { "epoch": 0.8497182743251198, "grad_norm": NaN, "learning_rate": 5.649630691204433e-06, "loss": 0.0, "step": 5052 }, { "epoch": 0.8498864687578841, "grad_norm": NaN, "learning_rate": 5.637227800672529e-06, "loss": 0.0, "step": 5053 }, { "epoch": 0.8500546631906484, "grad_norm": NaN, "learning_rate": 5.62483772604862e-06, "loss": 0.0, "step": 5054 }, { "epoch": 0.8502228576234127, "grad_norm": NaN, "learning_rate": 5.612460470912067e-06, "loss": 0.0, "step": 5055 }, { "epoch": 0.8503910520561769, "grad_norm": NaN, "learning_rate": 5.6000960388385095e-06, "loss": 0.0, "step": 5056 }, { "epoch": 0.8505592464889412, "grad_norm": NaN, "learning_rate": 5.587744433399905e-06, "loss": 0.0, "step": 5057 }, { "epoch": 0.8507274409217055, "grad_norm": NaN, "learning_rate": 5.5754056581644735e-06, "loss": 0.0, "step": 5058 }, { "epoch": 0.8508956353544698, "grad_norm": NaN, "learning_rate": 5.563079716696773e-06, "loss": 0.0, "step": 5059 }, { "epoch": 0.851063829787234, "grad_norm": NaN, "learning_rate": 5.550766612557623e-06, "loss": 0.0, "step": 5060 }, { "epoch": 0.8512320242199983, "grad_norm": NaN, "learning_rate": 5.538466349304133e-06, "loss": 0.0, "step": 5061 }, { "epoch": 0.8514002186527626, "grad_norm": NaN, "learning_rate": 5.526178930489711e-06, "loss": 0.0, "step": 5062 }, { "epoch": 0.8515684130855269, "grad_norm": NaN, "learning_rate": 5.513904359664074e-06, "loss": 0.0, "step": 5063 }, { "epoch": 0.8517366075182912, "grad_norm": NaN, "learning_rate": 5.501642640373189e-06, "loss": 0.0, "step": 5064 }, { "epoch": 0.8519048019510554, "grad_norm": NaN, "learning_rate": 5.489393776159335e-06, "loss": 0.0, "step": 5065 }, { "epoch": 0.8520729963838197, "grad_norm": NaN, "learning_rate": 5.477157770561064e-06, "loss": 0.0, "step": 5066 }, { "epoch": 0.852241190816584, "grad_norm": NaN, "learning_rate": 5.4649346271132385e-06, "loss": 0.0, "step": 5067 }, { "epoch": 0.8524093852493483, "grad_norm": NaN, "learning_rate": 5.452724349346977e-06, "loss": 0.0, "step": 5068 }, { "epoch": 0.8525775796821126, "grad_norm": NaN, "learning_rate": 5.440526940789698e-06, "loss": 0.0, "step": 5069 }, { "epoch": 0.8527457741148768, "grad_norm": NaN, "learning_rate": 5.428342404965076e-06, "loss": 0.0, "step": 5070 }, { "epoch": 0.8529139685476411, "grad_norm": NaN, "learning_rate": 5.416170745393112e-06, "loss": 0.0, "step": 5071 }, { "epoch": 0.8530821629804054, "grad_norm": NaN, "learning_rate": 5.404011965590055e-06, "loss": 0.0, "step": 5072 }, { "epoch": 0.8532503574131696, "grad_norm": NaN, "learning_rate": 5.391866069068436e-06, "loss": 0.0, "step": 5073 }, { "epoch": 0.8534185518459338, "grad_norm": NaN, "learning_rate": 5.379733059337067e-06, "loss": 0.0, "step": 5074 }, { "epoch": 0.8535867462786981, "grad_norm": NaN, "learning_rate": 5.367612939901045e-06, "loss": 0.0, "step": 5075 }, { "epoch": 0.8537549407114624, "grad_norm": NaN, "learning_rate": 5.355505714261732e-06, "loss": 0.0, "step": 5076 }, { "epoch": 0.8539231351442267, "grad_norm": NaN, "learning_rate": 5.343411385916769e-06, "loss": 0.0, "step": 5077 }, { "epoch": 0.854091329576991, "grad_norm": NaN, "learning_rate": 5.331329958360071e-06, "loss": 0.0, "step": 5078 }, { "epoch": 0.8542595240097552, "grad_norm": NaN, "learning_rate": 5.319261435081834e-06, "loss": 0.0, "step": 5079 }, { "epoch": 0.8544277184425195, "grad_norm": NaN, "learning_rate": 5.307205819568517e-06, "loss": 0.0, "step": 5080 }, { "epoch": 0.8545959128752838, "grad_norm": NaN, "learning_rate": 5.295163115302854e-06, "loss": 0.0, "step": 5081 }, { "epoch": 0.8547641073080481, "grad_norm": NaN, "learning_rate": 5.283133325763828e-06, "loss": 0.0, "step": 5082 }, { "epoch": 0.8549323017408124, "grad_norm": NaN, "learning_rate": 5.271116454426739e-06, "loss": 0.0, "step": 5083 }, { "epoch": 0.8551004961735766, "grad_norm": NaN, "learning_rate": 5.259112504763114e-06, "loss": 0.0, "step": 5084 }, { "epoch": 0.8552686906063409, "grad_norm": NaN, "learning_rate": 5.247121480240763e-06, "loss": 0.0, "step": 5085 }, { "epoch": 0.8554368850391052, "grad_norm": NaN, "learning_rate": 5.235143384323743e-06, "loss": 0.0, "step": 5086 }, { "epoch": 0.8556050794718695, "grad_norm": NaN, "learning_rate": 5.2231782204724136e-06, "loss": 0.0, "step": 5087 }, { "epoch": 0.8557732739046338, "grad_norm": NaN, "learning_rate": 5.211225992143365e-06, "loss": 0.0, "step": 5088 }, { "epoch": 0.855941468337398, "grad_norm": NaN, "learning_rate": 5.199286702789468e-06, "loss": 0.0, "step": 5089 }, { "epoch": 0.8561096627701623, "grad_norm": NaN, "learning_rate": 5.187360355859838e-06, "loss": 0.0, "step": 5090 }, { "epoch": 0.8562778572029266, "grad_norm": NaN, "learning_rate": 5.175446954799873e-06, "loss": 0.0, "step": 5091 }, { "epoch": 0.8564460516356909, "grad_norm": NaN, "learning_rate": 5.163546503051226e-06, "loss": 0.0, "step": 5092 }, { "epoch": 0.8566142460684552, "grad_norm": NaN, "learning_rate": 5.151659004051795e-06, "loss": 0.0, "step": 5093 }, { "epoch": 0.8567824405012194, "grad_norm": NaN, "learning_rate": 5.139784461235747e-06, "loss": 0.0, "step": 5094 }, { "epoch": 0.8569506349339837, "grad_norm": NaN, "learning_rate": 5.127922878033503e-06, "loss": 0.0, "step": 5095 }, { "epoch": 0.857118829366748, "grad_norm": NaN, "learning_rate": 5.116074257871744e-06, "loss": 0.0, "step": 5096 }, { "epoch": 0.8572870237995123, "grad_norm": NaN, "learning_rate": 5.1042386041734055e-06, "loss": 0.0, "step": 5097 }, { "epoch": 0.8574552182322766, "grad_norm": NaN, "learning_rate": 5.092415920357674e-06, "loss": 0.0, "step": 5098 }, { "epoch": 0.8576234126650408, "grad_norm": NaN, "learning_rate": 5.080606209839977e-06, "loss": 0.0, "step": 5099 }, { "epoch": 0.8577916070978051, "grad_norm": NaN, "learning_rate": 5.068809476032033e-06, "loss": 0.0, "step": 5100 }, { "epoch": 0.8579598015305694, "grad_norm": NaN, "learning_rate": 5.0570257223417684e-06, "loss": 0.0, "step": 5101 }, { "epoch": 0.8581279959633337, "grad_norm": NaN, "learning_rate": 5.045254952173384e-06, "loss": 0.0, "step": 5102 }, { "epoch": 0.8582961903960978, "grad_norm": NaN, "learning_rate": 5.033497168927315e-06, "loss": 0.0, "step": 5103 }, { "epoch": 0.8584643848288621, "grad_norm": NaN, "learning_rate": 5.021752376000266e-06, "loss": 0.0, "step": 5104 }, { "epoch": 0.8586325792616264, "grad_norm": NaN, "learning_rate": 5.010020576785174e-06, "loss": 0.0, "step": 5105 }, { "epoch": 0.8588007736943907, "grad_norm": NaN, "learning_rate": 4.998301774671216e-06, "loss": 0.0, "step": 5106 }, { "epoch": 0.858968968127155, "grad_norm": NaN, "learning_rate": 4.9865959730438225e-06, "loss": 0.0, "step": 5107 }, { "epoch": 0.8591371625599192, "grad_norm": NaN, "learning_rate": 4.974903175284678e-06, "loss": 0.0, "step": 5108 }, { "epoch": 0.8593053569926835, "grad_norm": NaN, "learning_rate": 4.963223384771703e-06, "loss": 0.0, "step": 5109 }, { "epoch": 0.8594735514254478, "grad_norm": NaN, "learning_rate": 4.951556604879048e-06, "loss": 0.0, "step": 5110 }, { "epoch": 0.8596417458582121, "grad_norm": NaN, "learning_rate": 4.939902838977117e-06, "loss": 0.0, "step": 5111 }, { "epoch": 0.8598099402909763, "grad_norm": NaN, "learning_rate": 4.928262090432556e-06, "loss": 0.0, "step": 5112 }, { "epoch": 0.8599781347237406, "grad_norm": NaN, "learning_rate": 4.916634362608241e-06, "loss": 0.0, "step": 5113 }, { "epoch": 0.8601463291565049, "grad_norm": NaN, "learning_rate": 4.905019658863297e-06, "loss": 0.0, "step": 5114 }, { "epoch": 0.8603145235892692, "grad_norm": NaN, "learning_rate": 4.8934179825530725e-06, "loss": 0.0, "step": 5115 }, { "epoch": 0.8604827180220335, "grad_norm": NaN, "learning_rate": 4.8818293370291816e-06, "loss": 0.0, "step": 5116 }, { "epoch": 0.8606509124547977, "grad_norm": NaN, "learning_rate": 4.87025372563944e-06, "loss": 0.0, "step": 5117 }, { "epoch": 0.860819106887562, "grad_norm": NaN, "learning_rate": 4.858691151727918e-06, "loss": 0.0, "step": 5118 }, { "epoch": 0.8609873013203263, "grad_norm": NaN, "learning_rate": 4.847141618634898e-06, "loss": 0.0, "step": 5119 }, { "epoch": 0.8611554957530906, "grad_norm": NaN, "learning_rate": 4.8356051296969355e-06, "loss": 0.0, "step": 5120 }, { "epoch": 0.8613236901858549, "grad_norm": NaN, "learning_rate": 4.8240816882467786e-06, "loss": 0.0, "step": 5121 }, { "epoch": 0.8614918846186191, "grad_norm": NaN, "learning_rate": 4.812571297613422e-06, "loss": 0.0, "step": 5122 }, { "epoch": 0.8616600790513834, "grad_norm": NaN, "learning_rate": 4.801073961122082e-06, "loss": 0.0, "step": 5123 }, { "epoch": 0.8618282734841477, "grad_norm": NaN, "learning_rate": 4.789589682094225e-06, "loss": 0.0, "step": 5124 }, { "epoch": 0.861996467916912, "grad_norm": NaN, "learning_rate": 4.778118463847525e-06, "loss": 0.0, "step": 5125 }, { "epoch": 0.8621646623496763, "grad_norm": NaN, "learning_rate": 4.766660309695886e-06, "loss": 0.0, "step": 5126 }, { "epoch": 0.8623328567824405, "grad_norm": NaN, "learning_rate": 4.755215222949433e-06, "loss": 0.0, "step": 5127 }, { "epoch": 0.8625010512152048, "grad_norm": NaN, "learning_rate": 4.743783206914537e-06, "loss": 0.0, "step": 5128 }, { "epoch": 0.8626692456479691, "grad_norm": NaN, "learning_rate": 4.73236426489378e-06, "loss": 0.0, "step": 5129 }, { "epoch": 0.8628374400807334, "grad_norm": NaN, "learning_rate": 4.720958400185954e-06, "loss": 0.0, "step": 5130 }, { "epoch": 0.8630056345134977, "grad_norm": NaN, "learning_rate": 4.709565616086098e-06, "loss": 0.0, "step": 5131 }, { "epoch": 0.8631738289462619, "grad_norm": NaN, "learning_rate": 4.69818591588545e-06, "loss": 0.0, "step": 5132 }, { "epoch": 0.8633420233790261, "grad_norm": NaN, "learning_rate": 4.686819302871481e-06, "loss": 0.0, "step": 5133 }, { "epoch": 0.8635102178117904, "grad_norm": NaN, "learning_rate": 4.675465780327876e-06, "loss": 0.0, "step": 5134 }, { "epoch": 0.8636784122445547, "grad_norm": NaN, "learning_rate": 4.664125351534537e-06, "loss": 0.0, "step": 5135 }, { "epoch": 0.8638466066773189, "grad_norm": NaN, "learning_rate": 4.652798019767607e-06, "loss": 0.0, "step": 5136 }, { "epoch": 0.8640148011100832, "grad_norm": NaN, "learning_rate": 4.641483788299405e-06, "loss": 0.0, "step": 5137 }, { "epoch": 0.8641829955428475, "grad_norm": NaN, "learning_rate": 4.630182660398496e-06, "loss": 0.0, "step": 5138 }, { "epoch": 0.8643511899756118, "grad_norm": NaN, "learning_rate": 4.618894639329629e-06, "loss": 0.0, "step": 5139 }, { "epoch": 0.8645193844083761, "grad_norm": NaN, "learning_rate": 4.607619728353818e-06, "loss": 0.0, "step": 5140 }, { "epoch": 0.8646875788411403, "grad_norm": NaN, "learning_rate": 4.596357930728245e-06, "loss": 0.0, "step": 5141 }, { "epoch": 0.8648557732739046, "grad_norm": NaN, "learning_rate": 4.585109249706315e-06, "loss": 0.0, "step": 5142 }, { "epoch": 0.8650239677066689, "grad_norm": NaN, "learning_rate": 4.573873688537639e-06, "loss": 0.0, "step": 5143 }, { "epoch": 0.8651921621394332, "grad_norm": NaN, "learning_rate": 4.562651250468059e-06, "loss": 0.0, "step": 5144 }, { "epoch": 0.8653603565721975, "grad_norm": NaN, "learning_rate": 4.551441938739603e-06, "loss": 0.0, "step": 5145 }, { "epoch": 0.8655285510049617, "grad_norm": NaN, "learning_rate": 4.540245756590522e-06, "loss": 0.0, "step": 5146 }, { "epoch": 0.865696745437726, "grad_norm": NaN, "learning_rate": 4.529062707255261e-06, "loss": 0.0, "step": 5147 }, { "epoch": 0.8658649398704903, "grad_norm": NaN, "learning_rate": 4.517892793964484e-06, "loss": 0.0, "step": 5148 }, { "epoch": 0.8660331343032546, "grad_norm": NaN, "learning_rate": 4.5067360199450456e-06, "loss": 0.0, "step": 5149 }, { "epoch": 0.8662013287360188, "grad_norm": NaN, "learning_rate": 4.495592388420017e-06, "loss": 0.0, "step": 5150 }, { "epoch": 0.8663695231687831, "grad_norm": NaN, "learning_rate": 4.484461902608672e-06, "loss": 0.0, "step": 5151 }, { "epoch": 0.8665377176015474, "grad_norm": NaN, "learning_rate": 4.473344565726467e-06, "loss": 0.0, "step": 5152 }, { "epoch": 0.8667059120343117, "grad_norm": NaN, "learning_rate": 4.462240380985106e-06, "loss": 0.0, "step": 5153 }, { "epoch": 0.866874106467076, "grad_norm": NaN, "learning_rate": 4.451149351592437e-06, "loss": 0.0, "step": 5154 }, { "epoch": 0.8670423008998402, "grad_norm": NaN, "learning_rate": 4.44007148075255e-06, "loss": 0.0, "step": 5155 }, { "epoch": 0.8672104953326045, "grad_norm": NaN, "learning_rate": 4.429006771665706e-06, "loss": 0.0, "step": 5156 }, { "epoch": 0.8673786897653688, "grad_norm": NaN, "learning_rate": 4.417955227528381e-06, "loss": 0.0, "step": 5157 }, { "epoch": 0.8675468841981331, "grad_norm": NaN, "learning_rate": 4.406916851533249e-06, "loss": 0.0, "step": 5158 }, { "epoch": 0.8677150786308974, "grad_norm": NaN, "learning_rate": 4.395891646869166e-06, "loss": 0.0, "step": 5159 }, { "epoch": 0.8678832730636616, "grad_norm": NaN, "learning_rate": 4.384879616721182e-06, "loss": 0.0, "step": 5160 }, { "epoch": 0.8680514674964259, "grad_norm": NaN, "learning_rate": 4.373880764270566e-06, "loss": 0.0, "step": 5161 }, { "epoch": 0.8682196619291901, "grad_norm": NaN, "learning_rate": 4.362895092694757e-06, "loss": 0.0, "step": 5162 }, { "epoch": 0.8683878563619544, "grad_norm": NaN, "learning_rate": 4.351922605167386e-06, "loss": 0.0, "step": 5163 }, { "epoch": 0.8685560507947186, "grad_norm": NaN, "learning_rate": 4.340963304858281e-06, "loss": 0.0, "step": 5164 }, { "epoch": 0.8687242452274829, "grad_norm": NaN, "learning_rate": 4.330017194933478e-06, "loss": 0.0, "step": 5165 }, { "epoch": 0.8688924396602472, "grad_norm": NaN, "learning_rate": 4.319084278555169e-06, "loss": 0.0, "step": 5166 }, { "epoch": 0.8690606340930115, "grad_norm": NaN, "learning_rate": 4.308164558881761e-06, "loss": 0.0, "step": 5167 }, { "epoch": 0.8692288285257758, "grad_norm": NaN, "learning_rate": 4.297258039067831e-06, "loss": 0.0, "step": 5168 }, { "epoch": 0.86939702295854, "grad_norm": NaN, "learning_rate": 4.286364722264158e-06, "loss": 0.0, "step": 5169 }, { "epoch": 0.8695652173913043, "grad_norm": NaN, "learning_rate": 4.275484611617691e-06, "loss": 0.0, "step": 5170 }, { "epoch": 0.8697334118240686, "grad_norm": NaN, "learning_rate": 4.264617710271579e-06, "loss": 0.0, "step": 5171 }, { "epoch": 0.8699016062568329, "grad_norm": NaN, "learning_rate": 4.253764021365131e-06, "loss": 0.0, "step": 5172 }, { "epoch": 0.8700698006895972, "grad_norm": NaN, "learning_rate": 4.242923548033878e-06, "loss": 0.0, "step": 5173 }, { "epoch": 0.8702379951223614, "grad_norm": NaN, "learning_rate": 4.23209629340951e-06, "loss": 0.0, "step": 5174 }, { "epoch": 0.8704061895551257, "grad_norm": NaN, "learning_rate": 4.221282260619891e-06, "loss": 0.0, "step": 5175 }, { "epoch": 0.87057438398789, "grad_norm": NaN, "learning_rate": 4.210481452789061e-06, "loss": 0.0, "step": 5176 }, { "epoch": 0.8707425784206543, "grad_norm": NaN, "learning_rate": 4.19969387303728e-06, "loss": 0.0, "step": 5177 }, { "epoch": 0.8709107728534186, "grad_norm": NaN, "learning_rate": 4.1889195244809495e-06, "loss": 0.0, "step": 5178 }, { "epoch": 0.8710789672861828, "grad_norm": NaN, "learning_rate": 4.178158410232652e-06, "loss": 0.0, "step": 5179 }, { "epoch": 0.8712471617189471, "grad_norm": NaN, "learning_rate": 4.167410533401151e-06, "loss": 0.0, "step": 5180 }, { "epoch": 0.8714153561517114, "grad_norm": NaN, "learning_rate": 4.1566758970913945e-06, "loss": 0.0, "step": 5181 }, { "epoch": 0.8715835505844757, "grad_norm": NaN, "learning_rate": 4.145954504404498e-06, "loss": 0.0, "step": 5182 }, { "epoch": 0.87175174501724, "grad_norm": NaN, "learning_rate": 4.1352463584377556e-06, "loss": 0.0, "step": 5183 }, { "epoch": 0.8719199394500042, "grad_norm": NaN, "learning_rate": 4.12455146228461e-06, "loss": 0.0, "step": 5184 }, { "epoch": 0.8720881338827685, "grad_norm": NaN, "learning_rate": 4.113869819034727e-06, "loss": 0.0, "step": 5185 }, { "epoch": 0.8722563283155328, "grad_norm": NaN, "learning_rate": 4.103201431773896e-06, "loss": 0.0, "step": 5186 }, { "epoch": 0.8724245227482971, "grad_norm": NaN, "learning_rate": 4.0925463035841e-06, "loss": 0.0, "step": 5187 }, { "epoch": 0.8725927171810614, "grad_norm": NaN, "learning_rate": 4.081904437543482e-06, "loss": 0.0, "step": 5188 }, { "epoch": 0.8727609116138256, "grad_norm": NaN, "learning_rate": 4.071275836726357e-06, "loss": 0.0, "step": 5189 }, { "epoch": 0.8729291060465899, "grad_norm": NaN, "learning_rate": 4.060660504203217e-06, "loss": 0.0, "step": 5190 }, { "epoch": 0.8730973004793542, "grad_norm": NaN, "learning_rate": 4.0500584430407056e-06, "loss": 0.0, "step": 5191 }, { "epoch": 0.8732654949121184, "grad_norm": NaN, "learning_rate": 4.039469656301631e-06, "loss": 0.0, "step": 5192 }, { "epoch": 0.8734336893448826, "grad_norm": NaN, "learning_rate": 4.028894147044992e-06, "loss": 0.0, "step": 5193 }, { "epoch": 0.8736018837776469, "grad_norm": NaN, "learning_rate": 4.018331918325935e-06, "loss": 0.0, "step": 5194 }, { "epoch": 0.8737700782104112, "grad_norm": NaN, "learning_rate": 4.007782973195756e-06, "loss": 0.0, "step": 5195 }, { "epoch": 0.8739382726431755, "grad_norm": NaN, "learning_rate": 3.997247314701935e-06, "loss": 0.0, "step": 5196 }, { "epoch": 0.8741064670759398, "grad_norm": NaN, "learning_rate": 3.9867249458880984e-06, "loss": 0.0, "step": 5197 }, { "epoch": 0.874274661508704, "grad_norm": NaN, "learning_rate": 3.976215869794054e-06, "loss": 0.0, "step": 5198 }, { "epoch": 0.8744428559414683, "grad_norm": NaN, "learning_rate": 3.965720089455749e-06, "loss": 0.0, "step": 5199 }, { "epoch": 0.8746110503742326, "grad_norm": NaN, "learning_rate": 3.955237607905299e-06, "loss": 0.0, "step": 5200 }, { "epoch": 0.8747792448069969, "grad_norm": NaN, "learning_rate": 3.9447684281709585e-06, "loss": 0.0, "step": 5201 }, { "epoch": 0.8749474392397611, "grad_norm": NaN, "learning_rate": 3.934312553277186e-06, "loss": 0.0, "step": 5202 }, { "epoch": 0.8751156336725254, "grad_norm": NaN, "learning_rate": 3.923869986244549e-06, "loss": 0.0, "step": 5203 }, { "epoch": 0.8752838281052897, "grad_norm": NaN, "learning_rate": 3.91344073008979e-06, "loss": 0.0, "step": 5204 }, { "epoch": 0.875452022538054, "grad_norm": NaN, "learning_rate": 3.9030247878258046e-06, "loss": 0.0, "step": 5205 }, { "epoch": 0.8756202169708183, "grad_norm": NaN, "learning_rate": 3.89262216246164e-06, "loss": 0.0, "step": 5206 }, { "epoch": 0.8757884114035825, "grad_norm": NaN, "learning_rate": 3.882232857002505e-06, "loss": 0.0, "step": 5207 }, { "epoch": 0.8759566058363468, "grad_norm": NaN, "learning_rate": 3.871856874449748e-06, "loss": 0.0, "step": 5208 }, { "epoch": 0.8761248002691111, "grad_norm": NaN, "learning_rate": 3.861494217800865e-06, "loss": 0.0, "step": 5209 }, { "epoch": 0.8762929947018754, "grad_norm": NaN, "learning_rate": 3.851144890049535e-06, "loss": 0.0, "step": 5210 }, { "epoch": 0.8764611891346397, "grad_norm": NaN, "learning_rate": 3.840808894185543e-06, "loss": 0.0, "step": 5211 }, { "epoch": 0.8766293835674039, "grad_norm": NaN, "learning_rate": 3.830486233194852e-06, "loss": 0.0, "step": 5212 }, { "epoch": 0.8767975780001682, "grad_norm": NaN, "learning_rate": 3.820176910059553e-06, "loss": 0.0, "step": 5213 }, { "epoch": 0.8769657724329325, "grad_norm": NaN, "learning_rate": 3.8098809277579027e-06, "loss": 0.0, "step": 5214 }, { "epoch": 0.8771339668656968, "grad_norm": NaN, "learning_rate": 3.7995982892643e-06, "loss": 0.0, "step": 5215 }, { "epoch": 0.8773021612984611, "grad_norm": NaN, "learning_rate": 3.789328997549274e-06, "loss": 0.0, "step": 5216 }, { "epoch": 0.8774703557312253, "grad_norm": NaN, "learning_rate": 3.7790730555795075e-06, "loss": 0.0, "step": 5217 }, { "epoch": 0.8776385501639896, "grad_norm": NaN, "learning_rate": 3.768830466317835e-06, "loss": 0.0, "step": 5218 }, { "epoch": 0.8778067445967539, "grad_norm": NaN, "learning_rate": 3.7586012327232244e-06, "loss": 0.0, "step": 5219 }, { "epoch": 0.8779749390295182, "grad_norm": NaN, "learning_rate": 3.7483853577507834e-06, "loss": 0.0, "step": 5220 }, { "epoch": 0.8781431334622823, "grad_norm": NaN, "learning_rate": 3.7381828443517563e-06, "loss": 0.0, "step": 5221 }, { "epoch": 0.8783113278950466, "grad_norm": NaN, "learning_rate": 3.7279936954735526e-06, "loss": 0.0, "step": 5222 }, { "epoch": 0.8784795223278109, "grad_norm": NaN, "learning_rate": 3.7178179140596957e-06, "loss": 0.0, "step": 5223 }, { "epoch": 0.8786477167605752, "grad_norm": NaN, "learning_rate": 3.7076555030498506e-06, "loss": 0.0, "step": 5224 }, { "epoch": 0.8788159111933395, "grad_norm": NaN, "learning_rate": 3.6975064653798297e-06, "loss": 0.0, "step": 5225 }, { "epoch": 0.8789841056261037, "grad_norm": NaN, "learning_rate": 3.687370803981571e-06, "loss": 0.0, "step": 5226 }, { "epoch": 0.879152300058868, "grad_norm": NaN, "learning_rate": 3.6772485217831555e-06, "loss": 0.0, "step": 5227 }, { "epoch": 0.8793204944916323, "grad_norm": NaN, "learning_rate": 3.6671396217087985e-06, "loss": 0.0, "step": 5228 }, { "epoch": 0.8794886889243966, "grad_norm": NaN, "learning_rate": 3.6570441066788363e-06, "loss": 0.0, "step": 5229 }, { "epoch": 0.8796568833571609, "grad_norm": NaN, "learning_rate": 3.64696197960977e-06, "loss": 0.0, "step": 5230 }, { "epoch": 0.8798250777899251, "grad_norm": NaN, "learning_rate": 3.6368932434142076e-06, "loss": 0.0, "step": 5231 }, { "epoch": 0.8799932722226894, "grad_norm": NaN, "learning_rate": 3.6268379010008846e-06, "loss": 0.0, "step": 5232 }, { "epoch": 0.8801614666554537, "grad_norm": NaN, "learning_rate": 3.6167959552746768e-06, "loss": 0.0, "step": 5233 }, { "epoch": 0.880329661088218, "grad_norm": NaN, "learning_rate": 3.6067674091366075e-06, "loss": 0.0, "step": 5234 }, { "epoch": 0.8804978555209823, "grad_norm": NaN, "learning_rate": 3.5967522654837994e-06, "loss": 0.0, "step": 5235 }, { "epoch": 0.8806660499537465, "grad_norm": NaN, "learning_rate": 3.5867505272095093e-06, "loss": 0.0, "step": 5236 }, { "epoch": 0.8808342443865108, "grad_norm": NaN, "learning_rate": 3.576762197203132e-06, "loss": 0.0, "step": 5237 }, { "epoch": 0.8810024388192751, "grad_norm": NaN, "learning_rate": 3.566787278350192e-06, "loss": 0.0, "step": 5238 }, { "epoch": 0.8811706332520394, "grad_norm": NaN, "learning_rate": 3.556825773532324e-06, "loss": 0.0, "step": 5239 }, { "epoch": 0.8813388276848036, "grad_norm": NaN, "learning_rate": 3.546877685627298e-06, "loss": 0.0, "step": 5240 }, { "epoch": 0.8815070221175679, "grad_norm": NaN, "learning_rate": 3.5369430175090033e-06, "loss": 0.0, "step": 5241 }, { "epoch": 0.8816752165503322, "grad_norm": NaN, "learning_rate": 3.5270217720474564e-06, "loss": 0.0, "step": 5242 }, { "epoch": 0.8818434109830965, "grad_norm": NaN, "learning_rate": 3.5171139521087916e-06, "loss": 0.0, "step": 5243 }, { "epoch": 0.8820116054158608, "grad_norm": NaN, "learning_rate": 3.5072195605552694e-06, "loss": 0.0, "step": 5244 }, { "epoch": 0.882179799848625, "grad_norm": NaN, "learning_rate": 3.4973386002452535e-06, "loss": 0.0, "step": 5245 }, { "epoch": 0.8823479942813893, "grad_norm": NaN, "learning_rate": 3.487471074033272e-06, "loss": 0.0, "step": 5246 }, { "epoch": 0.8825161887141536, "grad_norm": NaN, "learning_rate": 3.4776169847699213e-06, "loss": 0.0, "step": 5247 }, { "epoch": 0.8826843831469179, "grad_norm": NaN, "learning_rate": 3.467776335301942e-06, "loss": 0.0, "step": 5248 }, { "epoch": 0.8828525775796822, "grad_norm": NaN, "learning_rate": 3.457949128472193e-06, "loss": 0.0, "step": 5249 }, { "epoch": 0.8830207720124464, "grad_norm": NaN, "learning_rate": 3.4481353671196313e-06, "loss": 0.0, "step": 5250 }, { "epoch": 0.8831889664452106, "grad_norm": NaN, "learning_rate": 3.438335054079361e-06, "loss": 0.0, "step": 5251 }, { "epoch": 0.8833571608779749, "grad_norm": NaN, "learning_rate": 3.428548192182568e-06, "loss": 0.0, "step": 5252 }, { "epoch": 0.8835253553107392, "grad_norm": NaN, "learning_rate": 3.4187747842565733e-06, "loss": 0.0, "step": 5253 }, { "epoch": 0.8836935497435034, "grad_norm": NaN, "learning_rate": 3.4090148331248016e-06, "loss": 0.0, "step": 5254 }, { "epoch": 0.8838617441762677, "grad_norm": NaN, "learning_rate": 3.3992683416067984e-06, "loss": 0.0, "step": 5255 }, { "epoch": 0.884029938609032, "grad_norm": NaN, "learning_rate": 3.3895353125182106e-06, "loss": 0.0, "step": 5256 }, { "epoch": 0.8841981330417963, "grad_norm": NaN, "learning_rate": 3.3798157486708113e-06, "loss": 0.0, "step": 5257 }, { "epoch": 0.8843663274745606, "grad_norm": NaN, "learning_rate": 3.3701096528724494e-06, "loss": 0.0, "step": 5258 }, { "epoch": 0.8845345219073248, "grad_norm": NaN, "learning_rate": 3.3604170279271374e-06, "loss": 0.0, "step": 5259 }, { "epoch": 0.8847027163400891, "grad_norm": NaN, "learning_rate": 3.350737876634957e-06, "loss": 0.0, "step": 5260 }, { "epoch": 0.8848709107728534, "grad_norm": NaN, "learning_rate": 3.3410722017920947e-06, "loss": 0.0, "step": 5261 }, { "epoch": 0.8850391052056177, "grad_norm": NaN, "learning_rate": 3.331420006190866e-06, "loss": 0.0, "step": 5262 }, { "epoch": 0.885207299638382, "grad_norm": NaN, "learning_rate": 3.32178129261968e-06, "loss": 0.0, "step": 5263 }, { "epoch": 0.8853754940711462, "grad_norm": NaN, "learning_rate": 3.3121560638630535e-06, "loss": 0.0, "step": 5264 }, { "epoch": 0.8855436885039105, "grad_norm": NaN, "learning_rate": 3.302544322701606e-06, "loss": 0.0, "step": 5265 }, { "epoch": 0.8857118829366748, "grad_norm": NaN, "learning_rate": 3.292946071912051e-06, "loss": 0.0, "step": 5266 }, { "epoch": 0.8858800773694391, "grad_norm": NaN, "learning_rate": 3.2833613142672358e-06, "loss": 0.0, "step": 5267 }, { "epoch": 0.8860482718022034, "grad_norm": NaN, "learning_rate": 3.2737900525360853e-06, "loss": 0.0, "step": 5268 }, { "epoch": 0.8862164662349676, "grad_norm": NaN, "learning_rate": 3.264232289483621e-06, "loss": 0.0, "step": 5269 }, { "epoch": 0.8863846606677319, "grad_norm": NaN, "learning_rate": 3.2546880278709725e-06, "loss": 0.0, "step": 5270 }, { "epoch": 0.8865528551004962, "grad_norm": NaN, "learning_rate": 3.245157270455379e-06, "loss": 0.0, "step": 5271 }, { "epoch": 0.8867210495332605, "grad_norm": NaN, "learning_rate": 3.235640019990166e-06, "loss": 0.0, "step": 5272 }, { "epoch": 0.8868892439660248, "grad_norm": NaN, "learning_rate": 3.226136279224762e-06, "loss": 0.0, "step": 5273 }, { "epoch": 0.887057438398789, "grad_norm": NaN, "learning_rate": 3.2166460509046814e-06, "loss": 0.0, "step": 5274 }, { "epoch": 0.8872256328315533, "grad_norm": NaN, "learning_rate": 3.207169337771565e-06, "loss": 0.0, "step": 5275 }, { "epoch": 0.8873938272643176, "grad_norm": NaN, "learning_rate": 3.1977061425631117e-06, "loss": 0.0, "step": 5276 }, { "epoch": 0.8875620216970819, "grad_norm": NaN, "learning_rate": 3.18825646801314e-06, "loss": 0.0, "step": 5277 }, { "epoch": 0.8877302161298462, "grad_norm": NaN, "learning_rate": 3.1788203168515497e-06, "loss": 0.0, "step": 5278 }, { "epoch": 0.8878984105626104, "grad_norm": NaN, "learning_rate": 3.169397691804343e-06, "loss": 0.0, "step": 5279 }, { "epoch": 0.8880666049953746, "grad_norm": NaN, "learning_rate": 3.159988595593616e-06, "loss": 0.0, "step": 5280 }, { "epoch": 0.8882347994281389, "grad_norm": NaN, "learning_rate": 3.150593030937543e-06, "loss": 0.0, "step": 5281 }, { "epoch": 0.8884029938609032, "grad_norm": NaN, "learning_rate": 3.141211000550398e-06, "loss": 0.0, "step": 5282 }, { "epoch": 0.8885711882936674, "grad_norm": NaN, "learning_rate": 3.1318425071425463e-06, "loss": 0.0, "step": 5283 }, { "epoch": 0.8887393827264317, "grad_norm": NaN, "learning_rate": 3.1224875534204445e-06, "loss": 0.0, "step": 5284 }, { "epoch": 0.888907577159196, "grad_norm": NaN, "learning_rate": 3.1131461420866313e-06, "loss": 0.0, "step": 5285 }, { "epoch": 0.8890757715919603, "grad_norm": NaN, "learning_rate": 3.1038182758397207e-06, "loss": 0.0, "step": 5286 }, { "epoch": 0.8892439660247246, "grad_norm": NaN, "learning_rate": 3.094503957374456e-06, "loss": 0.0, "step": 5287 }, { "epoch": 0.8894121604574888, "grad_norm": NaN, "learning_rate": 3.0852031893816245e-06, "loss": 0.0, "step": 5288 }, { "epoch": 0.8895803548902531, "grad_norm": NaN, "learning_rate": 3.075915974548116e-06, "loss": 0.0, "step": 5289 }, { "epoch": 0.8897485493230174, "grad_norm": NaN, "learning_rate": 3.066642315556895e-06, "loss": 0.0, "step": 5290 }, { "epoch": 0.8899167437557817, "grad_norm": NaN, "learning_rate": 3.0573822150870344e-06, "loss": 0.0, "step": 5291 }, { "epoch": 0.890084938188546, "grad_norm": NaN, "learning_rate": 3.0481356758136624e-06, "loss": 0.0, "step": 5292 }, { "epoch": 0.8902531326213102, "grad_norm": NaN, "learning_rate": 3.038902700408003e-06, "loss": 0.0, "step": 5293 }, { "epoch": 0.8904213270540745, "grad_norm": NaN, "learning_rate": 3.0296832915373497e-06, "loss": 0.0, "step": 5294 }, { "epoch": 0.8905895214868388, "grad_norm": NaN, "learning_rate": 3.020477451865106e-06, "loss": 0.0, "step": 5295 }, { "epoch": 0.8907577159196031, "grad_norm": NaN, "learning_rate": 3.0112851840507215e-06, "loss": 0.0, "step": 5296 }, { "epoch": 0.8909259103523673, "grad_norm": NaN, "learning_rate": 3.002106490749751e-06, "loss": 0.0, "step": 5297 }, { "epoch": 0.8910941047851316, "grad_norm": NaN, "learning_rate": 2.992941374613806e-06, "loss": 0.0, "step": 5298 }, { "epoch": 0.8912622992178959, "grad_norm": NaN, "learning_rate": 2.9837898382905847e-06, "loss": 0.0, "step": 5299 }, { "epoch": 0.8914304936506602, "grad_norm": NaN, "learning_rate": 2.974651884423868e-06, "loss": 0.0, "step": 5300 }, { "epoch": 0.8915986880834245, "grad_norm": NaN, "learning_rate": 2.9655275156535103e-06, "loss": 0.0, "step": 5301 }, { "epoch": 0.8917668825161887, "grad_norm": NaN, "learning_rate": 2.956416734615436e-06, "loss": 0.0, "step": 5302 }, { "epoch": 0.891935076948953, "grad_norm": NaN, "learning_rate": 2.9473195439416457e-06, "loss": 0.0, "step": 5303 }, { "epoch": 0.8921032713817173, "grad_norm": NaN, "learning_rate": 2.9382359462602206e-06, "loss": 0.0, "step": 5304 }, { "epoch": 0.8922714658144816, "grad_norm": NaN, "learning_rate": 2.9291659441953114e-06, "loss": 0.0, "step": 5305 }, { "epoch": 0.8924396602472459, "grad_norm": NaN, "learning_rate": 2.920109540367133e-06, "loss": 0.0, "step": 5306 }, { "epoch": 0.8926078546800101, "grad_norm": NaN, "learning_rate": 2.911066737391982e-06, "loss": 0.0, "step": 5307 }, { "epoch": 0.8927760491127744, "grad_norm": NaN, "learning_rate": 2.9020375378822297e-06, "loss": 0.0, "step": 5308 }, { "epoch": 0.8929442435455387, "grad_norm": NaN, "learning_rate": 2.8930219444463005e-06, "loss": 0.0, "step": 5309 }, { "epoch": 0.8931124379783029, "grad_norm": NaN, "learning_rate": 2.8840199596887105e-06, "loss": 0.0, "step": 5310 }, { "epoch": 0.8932806324110671, "grad_norm": NaN, "learning_rate": 2.8750315862100142e-06, "loss": 0.0, "step": 5311 }, { "epoch": 0.8934488268438314, "grad_norm": NaN, "learning_rate": 2.8660568266068723e-06, "loss": 0.0, "step": 5312 }, { "epoch": 0.8936170212765957, "grad_norm": NaN, "learning_rate": 2.8570956834719776e-06, "loss": 0.0, "step": 5313 }, { "epoch": 0.89378521570936, "grad_norm": NaN, "learning_rate": 2.848148159394115e-06, "loss": 0.0, "step": 5314 }, { "epoch": 0.8939534101421243, "grad_norm": NaN, "learning_rate": 2.839214256958106e-06, "loss": 0.0, "step": 5315 }, { "epoch": 0.8941216045748885, "grad_norm": NaN, "learning_rate": 2.8302939787448746e-06, "loss": 0.0, "step": 5316 }, { "epoch": 0.8942897990076528, "grad_norm": NaN, "learning_rate": 2.8213873273313873e-06, "loss": 0.0, "step": 5317 }, { "epoch": 0.8944579934404171, "grad_norm": NaN, "learning_rate": 2.8124943052906638e-06, "loss": 0.0, "step": 5318 }, { "epoch": 0.8946261878731814, "grad_norm": NaN, "learning_rate": 2.80361491519181e-06, "loss": 0.0, "step": 5319 }, { "epoch": 0.8947943823059457, "grad_norm": NaN, "learning_rate": 2.794749159599974e-06, "loss": 0.0, "step": 5320 }, { "epoch": 0.8949625767387099, "grad_norm": NaN, "learning_rate": 2.7858970410763795e-06, "loss": 0.0, "step": 5321 }, { "epoch": 0.8951307711714742, "grad_norm": NaN, "learning_rate": 2.7770585621782973e-06, "loss": 0.0, "step": 5322 }, { "epoch": 0.8952989656042385, "grad_norm": NaN, "learning_rate": 2.7682337254590684e-06, "loss": 0.0, "step": 5323 }, { "epoch": 0.8954671600370028, "grad_norm": NaN, "learning_rate": 2.759422533468092e-06, "loss": 0.0, "step": 5324 }, { "epoch": 0.895635354469767, "grad_norm": NaN, "learning_rate": 2.750624988750822e-06, "loss": 0.0, "step": 5325 }, { "epoch": 0.8958035489025313, "grad_norm": NaN, "learning_rate": 2.7418410938487736e-06, "loss": 0.0, "step": 5326 }, { "epoch": 0.8959717433352956, "grad_norm": NaN, "learning_rate": 2.7330708512994964e-06, "loss": 0.0, "step": 5327 }, { "epoch": 0.8961399377680599, "grad_norm": NaN, "learning_rate": 2.7243142636366457e-06, "loss": 0.0, "step": 5328 }, { "epoch": 0.8963081322008242, "grad_norm": NaN, "learning_rate": 2.7155713333898825e-06, "loss": 0.0, "step": 5329 }, { "epoch": 0.8964763266335884, "grad_norm": NaN, "learning_rate": 2.706842063084941e-06, "loss": 0.0, "step": 5330 }, { "epoch": 0.8966445210663527, "grad_norm": NaN, "learning_rate": 2.6981264552436105e-06, "loss": 0.0, "step": 5331 }, { "epoch": 0.896812715499117, "grad_norm": NaN, "learning_rate": 2.689424512383748e-06, "loss": 0.0, "step": 5332 }, { "epoch": 0.8969809099318813, "grad_norm": NaN, "learning_rate": 2.680736237019227e-06, "loss": 0.0, "step": 5333 }, { "epoch": 0.8971491043646456, "grad_norm": NaN, "learning_rate": 2.6720616316600056e-06, "loss": 0.0, "step": 5334 }, { "epoch": 0.8973172987974098, "grad_norm": NaN, "learning_rate": 2.663400698812074e-06, "loss": 0.0, "step": 5335 }, { "epoch": 0.8974854932301741, "grad_norm": NaN, "learning_rate": 2.654753440977481e-06, "loss": 0.0, "step": 5336 }, { "epoch": 0.8976536876629384, "grad_norm": NaN, "learning_rate": 2.646119860654317e-06, "loss": 0.0, "step": 5337 }, { "epoch": 0.8978218820957027, "grad_norm": NaN, "learning_rate": 2.6374999603367367e-06, "loss": 0.0, "step": 5338 }, { "epoch": 0.897990076528467, "grad_norm": NaN, "learning_rate": 2.6288937425149205e-06, "loss": 0.0, "step": 5339 }, { "epoch": 0.8981582709612311, "grad_norm": NaN, "learning_rate": 2.620301209675119e-06, "loss": 0.0, "step": 5340 }, { "epoch": 0.8983264653939954, "grad_norm": NaN, "learning_rate": 2.611722364299618e-06, "loss": 0.0, "step": 5341 }, { "epoch": 0.8984946598267597, "grad_norm": NaN, "learning_rate": 2.6031572088667465e-06, "loss": 0.0, "step": 5342 }, { "epoch": 0.898662854259524, "grad_norm": NaN, "learning_rate": 2.5946057458508756e-06, "loss": 0.0, "step": 5343 }, { "epoch": 0.8988310486922882, "grad_norm": NaN, "learning_rate": 2.5860679777224394e-06, "loss": 0.0, "step": 5344 }, { "epoch": 0.8989992431250525, "grad_norm": NaN, "learning_rate": 2.577543906947899e-06, "loss": 0.0, "step": 5345 }, { "epoch": 0.8991674375578168, "grad_norm": NaN, "learning_rate": 2.5690335359897564e-06, "loss": 0.0, "step": 5346 }, { "epoch": 0.8993356319905811, "grad_norm": NaN, "learning_rate": 2.5605368673065733e-06, "loss": 0.0, "step": 5347 }, { "epoch": 0.8995038264233454, "grad_norm": NaN, "learning_rate": 2.55205390335293e-06, "loss": 0.0, "step": 5348 }, { "epoch": 0.8996720208561096, "grad_norm": NaN, "learning_rate": 2.5435846465794723e-06, "loss": 0.0, "step": 5349 }, { "epoch": 0.8998402152888739, "grad_norm": NaN, "learning_rate": 2.53512909943287e-06, "loss": 0.0, "step": 5350 }, { "epoch": 0.9000084097216382, "grad_norm": NaN, "learning_rate": 2.5266872643558316e-06, "loss": 0.0, "step": 5351 }, { "epoch": 0.9001766041544025, "grad_norm": NaN, "learning_rate": 2.518259143787105e-06, "loss": 0.0, "step": 5352 }, { "epoch": 0.9003447985871668, "grad_norm": NaN, "learning_rate": 2.5098447401614934e-06, "loss": 0.0, "step": 5353 }, { "epoch": 0.900512993019931, "grad_norm": NaN, "learning_rate": 2.501444055909813e-06, "loss": 0.0, "step": 5354 }, { "epoch": 0.9006811874526953, "grad_norm": NaN, "learning_rate": 2.493057093458934e-06, "loss": 0.0, "step": 5355 }, { "epoch": 0.9008493818854596, "grad_norm": NaN, "learning_rate": 2.484683855231751e-06, "loss": 0.0, "step": 5356 }, { "epoch": 0.9010175763182239, "grad_norm": NaN, "learning_rate": 2.4763243436472016e-06, "loss": 0.0, "step": 5357 }, { "epoch": 0.9011857707509882, "grad_norm": NaN, "learning_rate": 2.467978561120249e-06, "loss": 0.0, "step": 5358 }, { "epoch": 0.9013539651837524, "grad_norm": NaN, "learning_rate": 2.459646510061908e-06, "loss": 0.0, "step": 5359 }, { "epoch": 0.9015221596165167, "grad_norm": NaN, "learning_rate": 2.4513281928791985e-06, "loss": 0.0, "step": 5360 }, { "epoch": 0.901690354049281, "grad_norm": NaN, "learning_rate": 2.443023611975204e-06, "loss": 0.0, "step": 5361 }, { "epoch": 0.9018585484820453, "grad_norm": NaN, "learning_rate": 2.434732769749015e-06, "loss": 0.0, "step": 5362 }, { "epoch": 0.9020267429148096, "grad_norm": NaN, "learning_rate": 2.426455668595773e-06, "loss": 0.0, "step": 5363 }, { "epoch": 0.9021949373475738, "grad_norm": NaN, "learning_rate": 2.418192310906625e-06, "loss": 0.0, "step": 5364 }, { "epoch": 0.9023631317803381, "grad_norm": NaN, "learning_rate": 2.409942699068779e-06, "loss": 0.0, "step": 5365 }, { "epoch": 0.9025313262131024, "grad_norm": NaN, "learning_rate": 2.4017068354654503e-06, "loss": 0.0, "step": 5366 }, { "epoch": 0.9026995206458667, "grad_norm": NaN, "learning_rate": 2.3934847224758804e-06, "loss": 0.0, "step": 5367 }, { "epoch": 0.902867715078631, "grad_norm": NaN, "learning_rate": 2.385276362475347e-06, "loss": 0.0, "step": 5368 }, { "epoch": 0.9030359095113951, "grad_norm": NaN, "learning_rate": 2.3770817578351646e-06, "loss": 0.0, "step": 5369 }, { "epoch": 0.9032041039441594, "grad_norm": NaN, "learning_rate": 2.3689009109226556e-06, "loss": 0.0, "step": 5370 }, { "epoch": 0.9033722983769237, "grad_norm": NaN, "learning_rate": 2.3607338241011747e-06, "loss": 0.0, "step": 5371 }, { "epoch": 0.903540492809688, "grad_norm": NaN, "learning_rate": 2.3525804997300893e-06, "loss": 0.0, "step": 5372 }, { "epoch": 0.9037086872424522, "grad_norm": NaN, "learning_rate": 2.344440940164827e-06, "loss": 0.0, "step": 5373 }, { "epoch": 0.9038768816752165, "grad_norm": NaN, "learning_rate": 2.336315147756807e-06, "loss": 0.0, "step": 5374 }, { "epoch": 0.9040450761079808, "grad_norm": NaN, "learning_rate": 2.328203124853473e-06, "loss": 0.0, "step": 5375 }, { "epoch": 0.9042132705407451, "grad_norm": NaN, "learning_rate": 2.3201048737983013e-06, "loss": 0.0, "step": 5376 }, { "epoch": 0.9043814649735094, "grad_norm": NaN, "learning_rate": 2.3120203969307862e-06, "loss": 0.0, "step": 5377 }, { "epoch": 0.9045496594062736, "grad_norm": NaN, "learning_rate": 2.3039496965864436e-06, "loss": 0.0, "step": 5378 }, { "epoch": 0.9047178538390379, "grad_norm": NaN, "learning_rate": 2.2958927750968083e-06, "loss": 0.0, "step": 5379 }, { "epoch": 0.9048860482718022, "grad_norm": NaN, "learning_rate": 2.287849634789424e-06, "loss": 0.0, "step": 5380 }, { "epoch": 0.9050542427045665, "grad_norm": NaN, "learning_rate": 2.2798202779878818e-06, "loss": 0.0, "step": 5381 }, { "epoch": 0.9052224371373307, "grad_norm": NaN, "learning_rate": 2.2718047070117655e-06, "loss": 0.0, "step": 5382 }, { "epoch": 0.905390631570095, "grad_norm": NaN, "learning_rate": 2.2638029241766833e-06, "loss": 0.0, "step": 5383 }, { "epoch": 0.9055588260028593, "grad_norm": NaN, "learning_rate": 2.2558149317942536e-06, "loss": 0.0, "step": 5384 }, { "epoch": 0.9057270204356236, "grad_norm": NaN, "learning_rate": 2.2478407321721296e-06, "loss": 0.0, "step": 5385 }, { "epoch": 0.9058952148683879, "grad_norm": NaN, "learning_rate": 2.2398803276139636e-06, "loss": 0.0, "step": 5386 }, { "epoch": 0.9060634093011521, "grad_norm": NaN, "learning_rate": 2.2319337204194267e-06, "loss": 0.0, "step": 5387 }, { "epoch": 0.9062316037339164, "grad_norm": NaN, "learning_rate": 2.2240009128842e-06, "loss": 0.0, "step": 5388 }, { "epoch": 0.9063997981666807, "grad_norm": NaN, "learning_rate": 2.2160819072999885e-06, "loss": 0.0, "step": 5389 }, { "epoch": 0.906567992599445, "grad_norm": NaN, "learning_rate": 2.208176705954512e-06, "loss": 0.0, "step": 5390 }, { "epoch": 0.9067361870322093, "grad_norm": NaN, "learning_rate": 2.2002853111314783e-06, "loss": 0.0, "step": 5391 }, { "epoch": 0.9069043814649735, "grad_norm": NaN, "learning_rate": 2.1924077251106347e-06, "loss": 0.0, "step": 5392 }, { "epoch": 0.9070725758977378, "grad_norm": NaN, "learning_rate": 2.1845439501677222e-06, "loss": 0.0, "step": 5393 }, { "epoch": 0.9072407703305021, "grad_norm": NaN, "learning_rate": 2.176693988574502e-06, "loss": 0.0, "step": 5394 }, { "epoch": 0.9074089647632664, "grad_norm": NaN, "learning_rate": 2.168857842598737e-06, "loss": 0.0, "step": 5395 }, { "epoch": 0.9075771591960307, "grad_norm": NaN, "learning_rate": 2.161035514504195e-06, "loss": 0.0, "step": 5396 }, { "epoch": 0.9077453536287949, "grad_norm": NaN, "learning_rate": 2.1532270065506675e-06, "loss": 0.0, "step": 5397 }, { "epoch": 0.9079135480615592, "grad_norm": NaN, "learning_rate": 2.1454323209939455e-06, "loss": 0.0, "step": 5398 }, { "epoch": 0.9080817424943234, "grad_norm": NaN, "learning_rate": 2.137651460085821e-06, "loss": 0.0, "step": 5399 }, { "epoch": 0.9082499369270877, "grad_norm": NaN, "learning_rate": 2.129884426074108e-06, "loss": 0.0, "step": 5400 }, { "epoch": 0.9084181313598519, "grad_norm": NaN, "learning_rate": 2.1221312212025947e-06, "loss": 0.0, "step": 5401 }, { "epoch": 0.9085863257926162, "grad_norm": NaN, "learning_rate": 2.114391847711117e-06, "loss": 0.0, "step": 5402 }, { "epoch": 0.9087545202253805, "grad_norm": NaN, "learning_rate": 2.1066663078354866e-06, "loss": 0.0, "step": 5403 }, { "epoch": 0.9089227146581448, "grad_norm": NaN, "learning_rate": 2.0989546038075234e-06, "loss": 0.0, "step": 5404 }, { "epoch": 0.9090909090909091, "grad_norm": NaN, "learning_rate": 2.091256737855046e-06, "loss": 0.0, "step": 5405 }, { "epoch": 0.9092591035236733, "grad_norm": NaN, "learning_rate": 2.083572712201898e-06, "loss": 0.0, "step": 5406 }, { "epoch": 0.9094272979564376, "grad_norm": NaN, "learning_rate": 2.075902529067897e-06, "loss": 0.0, "step": 5407 }, { "epoch": 0.9095954923892019, "grad_norm": NaN, "learning_rate": 2.068246190668871e-06, "loss": 0.0, "step": 5408 }, { "epoch": 0.9097636868219662, "grad_norm": NaN, "learning_rate": 2.060603699216651e-06, "loss": 0.0, "step": 5409 }, { "epoch": 0.9099318812547305, "grad_norm": NaN, "learning_rate": 2.0529750569190763e-06, "loss": 0.0, "step": 5410 }, { "epoch": 0.9101000756874947, "grad_norm": NaN, "learning_rate": 2.0453602659799677e-06, "loss": 0.0, "step": 5411 }, { "epoch": 0.910268270120259, "grad_norm": NaN, "learning_rate": 2.0377593285991594e-06, "loss": 0.0, "step": 5412 }, { "epoch": 0.9104364645530233, "grad_norm": NaN, "learning_rate": 2.0301722469724726e-06, "loss": 0.0, "step": 5413 }, { "epoch": 0.9106046589857876, "grad_norm": NaN, "learning_rate": 2.022599023291727e-06, "loss": 0.0, "step": 5414 }, { "epoch": 0.9107728534185519, "grad_norm": NaN, "learning_rate": 2.0150396597447496e-06, "loss": 0.0, "step": 5415 }, { "epoch": 0.9109410478513161, "grad_norm": NaN, "learning_rate": 2.0074941585153497e-06, "loss": 0.0, "step": 5416 }, { "epoch": 0.9111092422840804, "grad_norm": NaN, "learning_rate": 1.9999625217833384e-06, "loss": 0.0, "step": 5417 }, { "epoch": 0.9112774367168447, "grad_norm": NaN, "learning_rate": 1.992444751724526e-06, "loss": 0.0, "step": 5418 }, { "epoch": 0.911445631149609, "grad_norm": NaN, "learning_rate": 1.984940850510708e-06, "loss": 0.0, "step": 5419 }, { "epoch": 0.9116138255823732, "grad_norm": NaN, "learning_rate": 1.977450820309684e-06, "loss": 0.0, "step": 5420 }, { "epoch": 0.9117820200151375, "grad_norm": NaN, "learning_rate": 1.9699746632852234e-06, "loss": 0.0, "step": 5421 }, { "epoch": 0.9119502144479018, "grad_norm": NaN, "learning_rate": 1.9625123815971203e-06, "loss": 0.0, "step": 5422 }, { "epoch": 0.9121184088806661, "grad_norm": NaN, "learning_rate": 1.955063977401145e-06, "loss": 0.0, "step": 5423 }, { "epoch": 0.9122866033134304, "grad_norm": NaN, "learning_rate": 1.947629452849048e-06, "loss": 0.0, "step": 5424 }, { "epoch": 0.9124547977461946, "grad_norm": NaN, "learning_rate": 1.940208810088584e-06, "loss": 0.0, "step": 5425 }, { "epoch": 0.9126229921789589, "grad_norm": NaN, "learning_rate": 1.9328020512634936e-06, "loss": 0.0, "step": 5426 }, { "epoch": 0.9127911866117232, "grad_norm": NaN, "learning_rate": 1.9254091785135153e-06, "loss": 0.0, "step": 5427 }, { "epoch": 0.9129593810444874, "grad_norm": NaN, "learning_rate": 1.9180301939743516e-06, "loss": 0.0, "step": 5428 }, { "epoch": 0.9131275754772517, "grad_norm": NaN, "learning_rate": 1.9106650997777197e-06, "loss": 0.0, "step": 5429 }, { "epoch": 0.9132957699100159, "grad_norm": NaN, "learning_rate": 1.9033138980513066e-06, "loss": 0.0, "step": 5430 }, { "epoch": 0.9134639643427802, "grad_norm": NaN, "learning_rate": 1.8959765909187965e-06, "loss": 0.0, "step": 5431 }, { "epoch": 0.9136321587755445, "grad_norm": NaN, "learning_rate": 1.8886531804998553e-06, "loss": 0.0, "step": 5432 }, { "epoch": 0.9138003532083088, "grad_norm": NaN, "learning_rate": 1.8813436689101239e-06, "loss": 0.0, "step": 5433 }, { "epoch": 0.913968547641073, "grad_norm": NaN, "learning_rate": 1.8740480582612519e-06, "loss": 0.0, "step": 5434 }, { "epoch": 0.9141367420738373, "grad_norm": NaN, "learning_rate": 1.8667663506608534e-06, "loss": 0.0, "step": 5435 }, { "epoch": 0.9143049365066016, "grad_norm": NaN, "learning_rate": 1.859498548212535e-06, "loss": 0.0, "step": 5436 }, { "epoch": 0.9144731309393659, "grad_norm": NaN, "learning_rate": 1.8522446530158778e-06, "loss": 0.0, "step": 5437 }, { "epoch": 0.9146413253721302, "grad_norm": NaN, "learning_rate": 1.8450046671664555e-06, "loss": 0.0, "step": 5438 }, { "epoch": 0.9148095198048944, "grad_norm": NaN, "learning_rate": 1.8377785927558232e-06, "loss": 0.0, "step": 5439 }, { "epoch": 0.9149777142376587, "grad_norm": NaN, "learning_rate": 1.8305664318715054e-06, "loss": 0.0, "step": 5440 }, { "epoch": 0.915145908670423, "grad_norm": NaN, "learning_rate": 1.8233681865970077e-06, "loss": 0.0, "step": 5441 }, { "epoch": 0.9153141031031873, "grad_norm": NaN, "learning_rate": 1.8161838590118384e-06, "loss": 0.0, "step": 5442 }, { "epoch": 0.9154822975359516, "grad_norm": NaN, "learning_rate": 1.8090134511914658e-06, "loss": 0.0, "step": 5443 }, { "epoch": 0.9156504919687158, "grad_norm": NaN, "learning_rate": 1.8018569652073381e-06, "loss": 0.0, "step": 5444 }, { "epoch": 0.9158186864014801, "grad_norm": NaN, "learning_rate": 1.7947144031268737e-06, "loss": 0.0, "step": 5445 }, { "epoch": 0.9159868808342444, "grad_norm": NaN, "learning_rate": 1.7875857670134943e-06, "loss": 0.0, "step": 5446 }, { "epoch": 0.9161550752670087, "grad_norm": NaN, "learning_rate": 1.7804710589265805e-06, "loss": 0.0, "step": 5447 }, { "epoch": 0.916323269699773, "grad_norm": NaN, "learning_rate": 1.7733702809214825e-06, "loss": 0.0, "step": 5448 }, { "epoch": 0.9164914641325372, "grad_norm": NaN, "learning_rate": 1.7662834350495428e-06, "loss": 0.0, "step": 5449 }, { "epoch": 0.9166596585653015, "grad_norm": NaN, "learning_rate": 1.7592105233580736e-06, "loss": 0.0, "step": 5450 }, { "epoch": 0.9168278529980658, "grad_norm": NaN, "learning_rate": 1.7521515478903517e-06, "loss": 0.0, "step": 5451 }, { "epoch": 0.9169960474308301, "grad_norm": NaN, "learning_rate": 1.7451065106856458e-06, "loss": 0.0, "step": 5452 }, { "epoch": 0.9171642418635944, "grad_norm": NaN, "learning_rate": 1.7380754137791778e-06, "loss": 0.0, "step": 5453 }, { "epoch": 0.9173324362963586, "grad_norm": NaN, "learning_rate": 1.7310582592021562e-06, "loss": 0.0, "step": 5454 }, { "epoch": 0.9175006307291229, "grad_norm": NaN, "learning_rate": 1.7240550489817653e-06, "loss": 0.0, "step": 5455 }, { "epoch": 0.9176688251618872, "grad_norm": NaN, "learning_rate": 1.7170657851411476e-06, "loss": 0.0, "step": 5456 }, { "epoch": 0.9178370195946515, "grad_norm": NaN, "learning_rate": 1.7100904696994269e-06, "loss": 0.0, "step": 5457 }, { "epoch": 0.9180052140274156, "grad_norm": NaN, "learning_rate": 1.7031291046716856e-06, "loss": 0.0, "step": 5458 }, { "epoch": 0.9181734084601799, "grad_norm": NaN, "learning_rate": 1.6961816920689932e-06, "loss": 0.0, "step": 5459 }, { "epoch": 0.9183416028929442, "grad_norm": NaN, "learning_rate": 1.6892482338983828e-06, "loss": 0.0, "step": 5460 }, { "epoch": 0.9185097973257085, "grad_norm": NaN, "learning_rate": 1.6823287321628412e-06, "loss": 0.0, "step": 5461 }, { "epoch": 0.9186779917584728, "grad_norm": NaN, "learning_rate": 1.6754231888613304e-06, "loss": 0.0, "step": 5462 }, { "epoch": 0.918846186191237, "grad_norm": NaN, "learning_rate": 1.6685316059888046e-06, "loss": 0.0, "step": 5463 }, { "epoch": 0.9190143806240013, "grad_norm": NaN, "learning_rate": 1.6616539855361547e-06, "loss": 0.0, "step": 5464 }, { "epoch": 0.9191825750567656, "grad_norm": NaN, "learning_rate": 1.6547903294902468e-06, "loss": 0.0, "step": 5465 }, { "epoch": 0.9193507694895299, "grad_norm": NaN, "learning_rate": 1.647940639833917e-06, "loss": 0.0, "step": 5466 }, { "epoch": 0.9195189639222942, "grad_norm": NaN, "learning_rate": 1.6411049185459605e-06, "loss": 0.0, "step": 5467 }, { "epoch": 0.9196871583550584, "grad_norm": NaN, "learning_rate": 1.6342831676011416e-06, "loss": 0.0, "step": 5468 }, { "epoch": 0.9198553527878227, "grad_norm": NaN, "learning_rate": 1.62747538897019e-06, "loss": 0.0, "step": 5469 }, { "epoch": 0.920023547220587, "grad_norm": NaN, "learning_rate": 1.6206815846197877e-06, "loss": 0.0, "step": 5470 }, { "epoch": 0.9201917416533513, "grad_norm": NaN, "learning_rate": 1.6139017565126035e-06, "loss": 0.0, "step": 5471 }, { "epoch": 0.9203599360861155, "grad_norm": NaN, "learning_rate": 1.6071359066072433e-06, "loss": 0.0, "step": 5472 }, { "epoch": 0.9205281305188798, "grad_norm": NaN, "learning_rate": 1.6003840368582935e-06, "loss": 0.0, "step": 5473 }, { "epoch": 0.9206963249516441, "grad_norm": NaN, "learning_rate": 1.5936461492162823e-06, "loss": 0.0, "step": 5474 }, { "epoch": 0.9208645193844084, "grad_norm": NaN, "learning_rate": 1.58692224562772e-06, "loss": 0.0, "step": 5475 }, { "epoch": 0.9210327138171727, "grad_norm": NaN, "learning_rate": 1.5802123280350633e-06, "loss": 0.0, "step": 5476 }, { "epoch": 0.921200908249937, "grad_norm": NaN, "learning_rate": 1.5735163983767342e-06, "loss": 0.0, "step": 5477 }, { "epoch": 0.9213691026827012, "grad_norm": NaN, "learning_rate": 1.566834458587102e-06, "loss": 0.0, "step": 5478 }, { "epoch": 0.9215372971154655, "grad_norm": NaN, "learning_rate": 1.5601665105965168e-06, "loss": 0.0, "step": 5479 }, { "epoch": 0.9217054915482298, "grad_norm": NaN, "learning_rate": 1.5535125563312713e-06, "loss": 0.0, "step": 5480 }, { "epoch": 0.9218736859809941, "grad_norm": NaN, "learning_rate": 1.5468725977136168e-06, "loss": 0.0, "step": 5481 }, { "epoch": 0.9220418804137583, "grad_norm": NaN, "learning_rate": 1.5402466366617575e-06, "loss": 0.0, "step": 5482 }, { "epoch": 0.9222100748465226, "grad_norm": NaN, "learning_rate": 1.5336346750898678e-06, "loss": 0.0, "step": 5483 }, { "epoch": 0.9223782692792869, "grad_norm": NaN, "learning_rate": 1.52703671490807e-06, "loss": 0.0, "step": 5484 }, { "epoch": 0.9225464637120512, "grad_norm": NaN, "learning_rate": 1.5204527580224337e-06, "loss": 0.0, "step": 5485 }, { "epoch": 0.9227146581448155, "grad_norm": NaN, "learning_rate": 1.513882806334993e-06, "loss": 0.0, "step": 5486 }, { "epoch": 0.9228828525775797, "grad_norm": NaN, "learning_rate": 1.5073268617437352e-06, "loss": 0.0, "step": 5487 }, { "epoch": 0.9230510470103439, "grad_norm": NaN, "learning_rate": 1.500784926142601e-06, "loss": 0.0, "step": 5488 }, { "epoch": 0.9232192414431082, "grad_norm": NaN, "learning_rate": 1.4942570014214785e-06, "loss": 0.0, "step": 5489 }, { "epoch": 0.9233874358758725, "grad_norm": NaN, "learning_rate": 1.4877430894662036e-06, "loss": 0.0, "step": 5490 }, { "epoch": 0.9235556303086367, "grad_norm": NaN, "learning_rate": 1.481243192158588e-06, "loss": 0.0, "step": 5491 }, { "epoch": 0.923723824741401, "grad_norm": NaN, "learning_rate": 1.4747573113763735e-06, "loss": 0.0, "step": 5492 }, { "epoch": 0.9238920191741653, "grad_norm": NaN, "learning_rate": 1.4682854489932562e-06, "loss": 0.0, "step": 5493 }, { "epoch": 0.9240602136069296, "grad_norm": NaN, "learning_rate": 1.4618276068788849e-06, "loss": 0.0, "step": 5494 }, { "epoch": 0.9242284080396939, "grad_norm": NaN, "learning_rate": 1.4553837868988618e-06, "loss": 0.0, "step": 5495 }, { "epoch": 0.9243966024724581, "grad_norm": NaN, "learning_rate": 1.4489539909147365e-06, "loss": 0.0, "step": 5496 }, { "epoch": 0.9245647969052224, "grad_norm": NaN, "learning_rate": 1.4425382207839954e-06, "loss": 0.0, "step": 5497 }, { "epoch": 0.9247329913379867, "grad_norm": NaN, "learning_rate": 1.4361364783600895e-06, "loss": 0.0, "step": 5498 }, { "epoch": 0.924901185770751, "grad_norm": NaN, "learning_rate": 1.4297487654924002e-06, "loss": 0.0, "step": 5499 }, { "epoch": 0.9250693802035153, "grad_norm": NaN, "learning_rate": 1.423375084026285e-06, "loss": 0.0, "step": 5500 }, { "epoch": 0.9252375746362795, "grad_norm": NaN, "learning_rate": 1.4170154358030151e-06, "loss": 0.0, "step": 5501 }, { "epoch": 0.9254057690690438, "grad_norm": NaN, "learning_rate": 1.410669822659827e-06, "loss": 0.0, "step": 5502 }, { "epoch": 0.9255739635018081, "grad_norm": NaN, "learning_rate": 1.404338246429887e-06, "loss": 0.0, "step": 5503 }, { "epoch": 0.9257421579345724, "grad_norm": NaN, "learning_rate": 1.3980207089423326e-06, "loss": 0.0, "step": 5504 }, { "epoch": 0.9259103523673367, "grad_norm": NaN, "learning_rate": 1.39171721202222e-06, "loss": 0.0, "step": 5505 }, { "epoch": 0.9260785468001009, "grad_norm": NaN, "learning_rate": 1.385427757490565e-06, "loss": 0.0, "step": 5506 }, { "epoch": 0.9262467412328652, "grad_norm": NaN, "learning_rate": 1.3791523471643141e-06, "loss": 0.0, "step": 5507 }, { "epoch": 0.9264149356656295, "grad_norm": NaN, "learning_rate": 1.3728909828563619e-06, "loss": 0.0, "step": 5508 }, { "epoch": 0.9265831300983938, "grad_norm": NaN, "learning_rate": 1.3666436663755555e-06, "loss": 0.0, "step": 5509 }, { "epoch": 0.926751324531158, "grad_norm": NaN, "learning_rate": 1.3604103995266682e-06, "loss": 0.0, "step": 5510 }, { "epoch": 0.9269195189639223, "grad_norm": NaN, "learning_rate": 1.3541911841104149e-06, "loss": 0.0, "step": 5511 }, { "epoch": 0.9270877133966866, "grad_norm": NaN, "learning_rate": 1.3479860219234697e-06, "loss": 0.0, "step": 5512 }, { "epoch": 0.9272559078294509, "grad_norm": NaN, "learning_rate": 1.3417949147584318e-06, "loss": 0.0, "step": 5513 }, { "epoch": 0.9274241022622152, "grad_norm": NaN, "learning_rate": 1.335617864403832e-06, "loss": 0.0, "step": 5514 }, { "epoch": 0.9275922966949794, "grad_norm": NaN, "learning_rate": 1.3294548726441592e-06, "loss": 0.0, "step": 5515 }, { "epoch": 0.9277604911277437, "grad_norm": NaN, "learning_rate": 1.3233059412598392e-06, "loss": 0.0, "step": 5516 }, { "epoch": 0.9279286855605079, "grad_norm": NaN, "learning_rate": 1.3171710720272234e-06, "loss": 0.0, "step": 5517 }, { "epoch": 0.9280968799932722, "grad_norm": NaN, "learning_rate": 1.3110502667185997e-06, "loss": 0.0, "step": 5518 }, { "epoch": 0.9282650744260365, "grad_norm": NaN, "learning_rate": 1.3049435271022037e-06, "loss": 0.0, "step": 5519 }, { "epoch": 0.9284332688588007, "grad_norm": NaN, "learning_rate": 1.298850854942213e-06, "loss": 0.0, "step": 5520 }, { "epoch": 0.928601463291565, "grad_norm": NaN, "learning_rate": 1.2927722519987306e-06, "loss": 0.0, "step": 5521 }, { "epoch": 0.9287696577243293, "grad_norm": NaN, "learning_rate": 1.2867077200277856e-06, "loss": 0.0, "step": 5522 }, { "epoch": 0.9289378521570936, "grad_norm": NaN, "learning_rate": 1.2806572607813649e-06, "loss": 0.0, "step": 5523 }, { "epoch": 0.9291060465898578, "grad_norm": NaN, "learning_rate": 1.2746208760073708e-06, "loss": 0.0, "step": 5524 }, { "epoch": 0.9292742410226221, "grad_norm": NaN, "learning_rate": 1.268598567449647e-06, "loss": 0.0, "step": 5525 }, { "epoch": 0.9294424354553864, "grad_norm": NaN, "learning_rate": 1.2625903368479796e-06, "loss": 0.0, "step": 5526 }, { "epoch": 0.9296106298881507, "grad_norm": NaN, "learning_rate": 1.2565961859380693e-06, "loss": 0.0, "step": 5527 }, { "epoch": 0.929778824320915, "grad_norm": NaN, "learning_rate": 1.250616116451564e-06, "loss": 0.0, "step": 5528 }, { "epoch": 0.9299470187536792, "grad_norm": NaN, "learning_rate": 1.2446501301160374e-06, "loss": 0.0, "step": 5529 }, { "epoch": 0.9301152131864435, "grad_norm": NaN, "learning_rate": 1.2386982286549998e-06, "loss": 0.0, "step": 5530 }, { "epoch": 0.9302834076192078, "grad_norm": NaN, "learning_rate": 1.2327604137878812e-06, "loss": 0.0, "step": 5531 }, { "epoch": 0.9304516020519721, "grad_norm": NaN, "learning_rate": 1.2268366872300597e-06, "loss": 0.0, "step": 5532 }, { "epoch": 0.9306197964847364, "grad_norm": NaN, "learning_rate": 1.2209270506928271e-06, "loss": 0.0, "step": 5533 }, { "epoch": 0.9307879909175006, "grad_norm": NaN, "learning_rate": 1.2150315058834184e-06, "loss": 0.0, "step": 5534 }, { "epoch": 0.9309561853502649, "grad_norm": NaN, "learning_rate": 1.2091500545049706e-06, "loss": 0.0, "step": 5535 }, { "epoch": 0.9311243797830292, "grad_norm": NaN, "learning_rate": 1.203282698256597e-06, "loss": 0.0, "step": 5536 }, { "epoch": 0.9312925742157935, "grad_norm": NaN, "learning_rate": 1.1974294388332918e-06, "loss": 0.0, "step": 5537 }, { "epoch": 0.9314607686485578, "grad_norm": NaN, "learning_rate": 1.1915902779260024e-06, "loss": 0.0, "step": 5538 }, { "epoch": 0.931628963081322, "grad_norm": NaN, "learning_rate": 1.1857652172215905e-06, "loss": 0.0, "step": 5539 }, { "epoch": 0.9317971575140863, "grad_norm": NaN, "learning_rate": 1.1799542584028656e-06, "loss": 0.0, "step": 5540 }, { "epoch": 0.9319653519468506, "grad_norm": NaN, "learning_rate": 1.1741574031485347e-06, "loss": 0.0, "step": 5541 }, { "epoch": 0.9321335463796149, "grad_norm": NaN, "learning_rate": 1.1683746531332529e-06, "loss": 0.0, "step": 5542 }, { "epoch": 0.9323017408123792, "grad_norm": NaN, "learning_rate": 1.1626060100275837e-06, "loss": 0.0, "step": 5543 }, { "epoch": 0.9324699352451434, "grad_norm": NaN, "learning_rate": 1.156851475498033e-06, "loss": 0.0, "step": 5544 }, { "epoch": 0.9326381296779077, "grad_norm": NaN, "learning_rate": 1.1511110512070155e-06, "loss": 0.0, "step": 5545 }, { "epoch": 0.932806324110672, "grad_norm": NaN, "learning_rate": 1.1453847388128712e-06, "loss": 0.0, "step": 5546 }, { "epoch": 0.9329745185434362, "grad_norm": NaN, "learning_rate": 1.1396725399698772e-06, "loss": 0.0, "step": 5547 }, { "epoch": 0.9331427129762004, "grad_norm": NaN, "learning_rate": 1.133974456328213e-06, "loss": 0.0, "step": 5548 }, { "epoch": 0.9333109074089647, "grad_norm": NaN, "learning_rate": 1.1282904895340064e-06, "loss": 0.0, "step": 5549 }, { "epoch": 0.933479101841729, "grad_norm": NaN, "learning_rate": 1.1226206412292773e-06, "loss": 0.0, "step": 5550 }, { "epoch": 0.9336472962744933, "grad_norm": NaN, "learning_rate": 1.1169649130519932e-06, "loss": 0.0, "step": 5551 }, { "epoch": 0.9338154907072576, "grad_norm": NaN, "learning_rate": 1.111323306636014e-06, "loss": 0.0, "step": 5552 }, { "epoch": 0.9339836851400218, "grad_norm": NaN, "learning_rate": 1.1056958236111525e-06, "loss": 0.0, "step": 5553 }, { "epoch": 0.9341518795727861, "grad_norm": NaN, "learning_rate": 1.1000824656031195e-06, "loss": 0.0, "step": 5554 }, { "epoch": 0.9343200740055504, "grad_norm": NaN, "learning_rate": 1.0944832342335564e-06, "loss": 0.0, "step": 5555 }, { "epoch": 0.9344882684383147, "grad_norm": NaN, "learning_rate": 1.0888981311200031e-06, "loss": 0.0, "step": 5556 }, { "epoch": 0.934656462871079, "grad_norm": NaN, "learning_rate": 1.083327157875952e-06, "loss": 0.0, "step": 5557 }, { "epoch": 0.9348246573038432, "grad_norm": NaN, "learning_rate": 1.0777703161107877e-06, "loss": 0.0, "step": 5558 }, { "epoch": 0.9349928517366075, "grad_norm": NaN, "learning_rate": 1.0722276074298154e-06, "loss": 0.0, "step": 5559 }, { "epoch": 0.9351610461693718, "grad_norm": NaN, "learning_rate": 1.0666990334342707e-06, "loss": 0.0, "step": 5560 }, { "epoch": 0.9353292406021361, "grad_norm": NaN, "learning_rate": 1.0611845957212873e-06, "loss": 0.0, "step": 5561 }, { "epoch": 0.9354974350349003, "grad_norm": NaN, "learning_rate": 1.0556842958839242e-06, "loss": 0.0, "step": 5562 }, { "epoch": 0.9356656294676646, "grad_norm": NaN, "learning_rate": 1.0501981355111656e-06, "loss": 0.0, "step": 5563 }, { "epoch": 0.9358338239004289, "grad_norm": NaN, "learning_rate": 1.0447261161878884e-06, "loss": 0.0, "step": 5564 }, { "epoch": 0.9360020183331932, "grad_norm": NaN, "learning_rate": 1.0392682394949116e-06, "loss": 0.0, "step": 5565 }, { "epoch": 0.9361702127659575, "grad_norm": NaN, "learning_rate": 1.0338245070089515e-06, "loss": 0.0, "step": 5566 }, { "epoch": 0.9363384071987217, "grad_norm": NaN, "learning_rate": 1.0283949203026332e-06, "loss": 0.0, "step": 5567 }, { "epoch": 0.936506601631486, "grad_norm": NaN, "learning_rate": 1.0229794809445081e-06, "loss": 0.0, "step": 5568 }, { "epoch": 0.9366747960642503, "grad_norm": NaN, "learning_rate": 1.0175781904990412e-06, "loss": 0.0, "step": 5569 }, { "epoch": 0.9368429904970146, "grad_norm": NaN, "learning_rate": 1.0121910505266008e-06, "loss": 0.0, "step": 5570 }, { "epoch": 0.9370111849297789, "grad_norm": NaN, "learning_rate": 1.00681806258347e-06, "loss": 0.0, "step": 5571 }, { "epoch": 0.9371793793625431, "grad_norm": NaN, "learning_rate": 1.0014592282218404e-06, "loss": 0.0, "step": 5572 }, { "epoch": 0.9373475737953074, "grad_norm": NaN, "learning_rate": 9.96114548989835e-07, "loss": 0.0, "step": 5573 }, { "epoch": 0.9375157682280717, "grad_norm": NaN, "learning_rate": 9.907840264314572e-07, "loss": 0.0, "step": 5574 }, { "epoch": 0.937683962660836, "grad_norm": NaN, "learning_rate": 9.854676620866366e-07, "loss": 0.0, "step": 5575 }, { "epoch": 0.9378521570936001, "grad_norm": NaN, "learning_rate": 9.80165457491211e-07, "loss": 0.0, "step": 5576 }, { "epoch": 0.9380203515263644, "grad_norm": NaN, "learning_rate": 9.74877414176939e-07, "loss": 0.0, "step": 5577 }, { "epoch": 0.9381885459591287, "grad_norm": NaN, "learning_rate": 9.696035336714648e-07, "loss": 0.0, "step": 5578 }, { "epoch": 0.938356740391893, "grad_norm": NaN, "learning_rate": 9.643438174983589e-07, "loss": 0.0, "step": 5579 }, { "epoch": 0.9385249348246573, "grad_norm": NaN, "learning_rate": 9.590982671770943e-07, "loss": 0.0, "step": 5580 }, { "epoch": 0.9386931292574215, "grad_norm": NaN, "learning_rate": 9.538668842230537e-07, "loss": 0.0, "step": 5581 }, { "epoch": 0.9388613236901858, "grad_norm": NaN, "learning_rate": 9.486496701475167e-07, "loss": 0.0, "step": 5582 }, { "epoch": 0.9390295181229501, "grad_norm": NaN, "learning_rate": 9.434466264576892e-07, "loss": 0.0, "step": 5583 }, { "epoch": 0.9391977125557144, "grad_norm": NaN, "learning_rate": 9.382577546566574e-07, "loss": 0.0, "step": 5584 }, { "epoch": 0.9393659069884787, "grad_norm": NaN, "learning_rate": 9.330830562434445e-07, "loss": 0.0, "step": 5585 }, { "epoch": 0.9395341014212429, "grad_norm": NaN, "learning_rate": 9.279225327129548e-07, "loss": 0.0, "step": 5586 }, { "epoch": 0.9397022958540072, "grad_norm": NaN, "learning_rate": 9.227761855560069e-07, "loss": 0.0, "step": 5587 }, { "epoch": 0.9398704902867715, "grad_norm": NaN, "learning_rate": 9.176440162593169e-07, "loss": 0.0, "step": 5588 }, { "epoch": 0.9400386847195358, "grad_norm": NaN, "learning_rate": 9.125260263055213e-07, "loss": 0.0, "step": 5589 }, { "epoch": 0.9402068791523001, "grad_norm": NaN, "learning_rate": 9.074222171731427e-07, "loss": 0.0, "step": 5590 }, { "epoch": 0.9403750735850643, "grad_norm": NaN, "learning_rate": 9.023325903366242e-07, "loss": 0.0, "step": 5591 }, { "epoch": 0.9405432680178286, "grad_norm": NaN, "learning_rate": 8.972571472662838e-07, "loss": 0.0, "step": 5592 }, { "epoch": 0.9407114624505929, "grad_norm": NaN, "learning_rate": 8.921958894283767e-07, "loss": 0.0, "step": 5593 }, { "epoch": 0.9408796568833572, "grad_norm": NaN, "learning_rate": 8.871488182850441e-07, "loss": 0.0, "step": 5594 }, { "epoch": 0.9410478513161215, "grad_norm": NaN, "learning_rate": 8.821159352943143e-07, "loss": 0.0, "step": 5595 }, { "epoch": 0.9412160457488857, "grad_norm": NaN, "learning_rate": 8.770972419101464e-07, "loss": 0.0, "step": 5596 }, { "epoch": 0.94138424018165, "grad_norm": NaN, "learning_rate": 8.720927395823697e-07, "loss": 0.0, "step": 5597 }, { "epoch": 0.9415524346144143, "grad_norm": NaN, "learning_rate": 8.671024297567388e-07, "loss": 0.0, "step": 5598 }, { "epoch": 0.9417206290471786, "grad_norm": NaN, "learning_rate": 8.621263138749002e-07, "loss": 0.0, "step": 5599 }, { "epoch": 0.9418888234799428, "grad_norm": NaN, "learning_rate": 8.571643933743879e-07, "loss": 0.0, "step": 5600 }, { "epoch": 0.9420570179127071, "grad_norm": NaN, "learning_rate": 8.522166696886547e-07, "loss": 0.0, "step": 5601 }, { "epoch": 0.9422252123454714, "grad_norm": NaN, "learning_rate": 8.472831442470408e-07, "loss": 0.0, "step": 5602 }, { "epoch": 0.9423934067782357, "grad_norm": NaN, "learning_rate": 8.423638184747784e-07, "loss": 0.0, "step": 5603 }, { "epoch": 0.942561601211, "grad_norm": NaN, "learning_rate": 8.374586937930196e-07, "loss": 0.0, "step": 5604 }, { "epoch": 0.9427297956437642, "grad_norm": NaN, "learning_rate": 8.325677716187807e-07, "loss": 0.0, "step": 5605 }, { "epoch": 0.9428979900765284, "grad_norm": NaN, "learning_rate": 8.276910533650151e-07, "loss": 0.0, "step": 5606 }, { "epoch": 0.9430661845092927, "grad_norm": NaN, "learning_rate": 8.2282854044054e-07, "loss": 0.0, "step": 5607 }, { "epoch": 0.943234378942057, "grad_norm": NaN, "learning_rate": 8.179802342500876e-07, "loss": 0.0, "step": 5608 }, { "epoch": 0.9434025733748213, "grad_norm": NaN, "learning_rate": 8.13146136194265e-07, "loss": 0.0, "step": 5609 }, { "epoch": 0.9435707678075855, "grad_norm": NaN, "learning_rate": 8.083262476696051e-07, "loss": 0.0, "step": 5610 }, { "epoch": 0.9437389622403498, "grad_norm": NaN, "learning_rate": 8.035205700685167e-07, "loss": 0.0, "step": 5611 }, { "epoch": 0.9439071566731141, "grad_norm": NaN, "learning_rate": 7.987291047793056e-07, "loss": 0.0, "step": 5612 }, { "epoch": 0.9440753511058784, "grad_norm": NaN, "learning_rate": 7.93951853186159e-07, "loss": 0.0, "step": 5613 }, { "epoch": 0.9442435455386426, "grad_norm": NaN, "learning_rate": 7.891888166691952e-07, "loss": 0.0, "step": 5614 }, { "epoch": 0.9444117399714069, "grad_norm": NaN, "learning_rate": 7.844399966043802e-07, "loss": 0.0, "step": 5615 }, { "epoch": 0.9445799344041712, "grad_norm": NaN, "learning_rate": 7.797053943636112e-07, "loss": 0.0, "step": 5616 }, { "epoch": 0.9447481288369355, "grad_norm": NaN, "learning_rate": 7.749850113146551e-07, "loss": 0.0, "step": 5617 }, { "epoch": 0.9449163232696998, "grad_norm": NaN, "learning_rate": 7.70278848821171e-07, "loss": 0.0, "step": 5618 }, { "epoch": 0.945084517702464, "grad_norm": NaN, "learning_rate": 7.655869082427269e-07, "loss": 0.0, "step": 5619 }, { "epoch": 0.9452527121352283, "grad_norm": NaN, "learning_rate": 7.609091909347721e-07, "loss": 0.0, "step": 5620 }, { "epoch": 0.9454209065679926, "grad_norm": NaN, "learning_rate": 7.56245698248631e-07, "loss": 0.0, "step": 5621 }, { "epoch": 0.9455891010007569, "grad_norm": NaN, "learning_rate": 7.515964315315538e-07, "loss": 0.0, "step": 5622 }, { "epoch": 0.9457572954335212, "grad_norm": NaN, "learning_rate": 7.46961392126655e-07, "loss": 0.0, "step": 5623 }, { "epoch": 0.9459254898662854, "grad_norm": NaN, "learning_rate": 7.423405813729467e-07, "loss": 0.0, "step": 5624 }, { "epoch": 0.9460936842990497, "grad_norm": NaN, "learning_rate": 7.377340006053169e-07, "loss": 0.0, "step": 5625 }, { "epoch": 0.946261878731814, "grad_norm": NaN, "learning_rate": 7.33141651154573e-07, "loss": 0.0, "step": 5626 }, { "epoch": 0.9464300731645783, "grad_norm": NaN, "learning_rate": 7.285635343473818e-07, "loss": 0.0, "step": 5627 }, { "epoch": 0.9465982675973426, "grad_norm": NaN, "learning_rate": 7.239996515063186e-07, "loss": 0.0, "step": 5628 }, { "epoch": 0.9467664620301068, "grad_norm": NaN, "learning_rate": 7.194500039498286e-07, "loss": 0.0, "step": 5629 }, { "epoch": 0.9469346564628711, "grad_norm": NaN, "learning_rate": 7.149145929922607e-07, "loss": 0.0, "step": 5630 }, { "epoch": 0.9471028508956354, "grad_norm": NaN, "learning_rate": 7.103934199438444e-07, "loss": 0.0, "step": 5631 }, { "epoch": 0.9472710453283997, "grad_norm": NaN, "learning_rate": 7.058864861106907e-07, "loss": 0.0, "step": 5632 }, { "epoch": 0.947439239761164, "grad_norm": NaN, "learning_rate": 7.013937927948022e-07, "loss": 0.0, "step": 5633 }, { "epoch": 0.9476074341939282, "grad_norm": NaN, "learning_rate": 6.969153412940743e-07, "loss": 0.0, "step": 5634 }, { "epoch": 0.9477756286266924, "grad_norm": NaN, "learning_rate": 6.924511329022831e-07, "loss": 0.0, "step": 5635 }, { "epoch": 0.9479438230594567, "grad_norm": NaN, "learning_rate": 6.880011689090804e-07, "loss": 0.0, "step": 5636 }, { "epoch": 0.948112017492221, "grad_norm": NaN, "learning_rate": 6.8356545060001e-07, "loss": 0.0, "step": 5637 }, { "epoch": 0.9482802119249852, "grad_norm": NaN, "learning_rate": 6.79143979256508e-07, "loss": 0.0, "step": 5638 }, { "epoch": 0.9484484063577495, "grad_norm": NaN, "learning_rate": 6.747367561558859e-07, "loss": 0.0, "step": 5639 }, { "epoch": 0.9486166007905138, "grad_norm": NaN, "learning_rate": 6.703437825713421e-07, "loss": 0.0, "step": 5640 }, { "epoch": 0.9487847952232781, "grad_norm": NaN, "learning_rate": 6.659650597719502e-07, "loss": 0.0, "step": 5641 }, { "epoch": 0.9489529896560424, "grad_norm": NaN, "learning_rate": 6.616005890226817e-07, "loss": 0.0, "step": 5642 }, { "epoch": 0.9491211840888066, "grad_norm": NaN, "learning_rate": 6.572503715843836e-07, "loss": 0.0, "step": 5643 }, { "epoch": 0.9492893785215709, "grad_norm": NaN, "learning_rate": 6.52914408713784e-07, "loss": 0.0, "step": 5644 }, { "epoch": 0.9494575729543352, "grad_norm": NaN, "learning_rate": 6.485927016634863e-07, "loss": 0.0, "step": 5645 }, { "epoch": 0.9496257673870995, "grad_norm": NaN, "learning_rate": 6.44285251681992e-07, "loss": 0.0, "step": 5646 }, { "epoch": 0.9497939618198638, "grad_norm": NaN, "learning_rate": 6.399920600136722e-07, "loss": 0.0, "step": 5647 }, { "epoch": 0.949962156252628, "grad_norm": NaN, "learning_rate": 6.357131278987849e-07, "loss": 0.0, "step": 5648 }, { "epoch": 0.9501303506853923, "grad_norm": NaN, "learning_rate": 6.314484565734636e-07, "loss": 0.0, "step": 5649 }, { "epoch": 0.9502985451181566, "grad_norm": NaN, "learning_rate": 6.271980472697225e-07, "loss": 0.0, "step": 5650 }, { "epoch": 0.9504667395509209, "grad_norm": NaN, "learning_rate": 6.229619012154575e-07, "loss": 0.0, "step": 5651 }, { "epoch": 0.9506349339836851, "grad_norm": NaN, "learning_rate": 6.18740019634445e-07, "loss": 0.0, "step": 5652 }, { "epoch": 0.9508031284164494, "grad_norm": NaN, "learning_rate": 6.145324037463429e-07, "loss": 0.0, "step": 5653 }, { "epoch": 0.9509713228492137, "grad_norm": NaN, "learning_rate": 6.103390547666788e-07, "loss": 0.0, "step": 5654 }, { "epoch": 0.951139517281978, "grad_norm": NaN, "learning_rate": 6.061599739068668e-07, "loss": 0.0, "step": 5655 }, { "epoch": 0.9513077117147423, "grad_norm": NaN, "learning_rate": 6.019951623741916e-07, "loss": 0.0, "step": 5656 }, { "epoch": 0.9514759061475065, "grad_norm": NaN, "learning_rate": 5.978446213718291e-07, "loss": 0.0, "step": 5657 }, { "epoch": 0.9516441005802708, "grad_norm": NaN, "learning_rate": 5.937083520988151e-07, "loss": 0.0, "step": 5658 }, { "epoch": 0.9518122950130351, "grad_norm": NaN, "learning_rate": 5.895863557500769e-07, "loss": 0.0, "step": 5659 }, { "epoch": 0.9519804894457994, "grad_norm": NaN, "learning_rate": 5.85478633516412e-07, "loss": 0.0, "step": 5660 }, { "epoch": 0.9521486838785637, "grad_norm": NaN, "learning_rate": 5.813851865844988e-07, "loss": 0.0, "step": 5661 }, { "epoch": 0.9523168783113279, "grad_norm": NaN, "learning_rate": 5.773060161368804e-07, "loss": 0.0, "step": 5662 }, { "epoch": 0.9524850727440922, "grad_norm": NaN, "learning_rate": 5.732411233519919e-07, "loss": 0.0, "step": 5663 }, { "epoch": 0.9526532671768565, "grad_norm": NaN, "learning_rate": 5.691905094041272e-07, "loss": 0.0, "step": 5664 }, { "epoch": 0.9528214616096207, "grad_norm": NaN, "learning_rate": 5.651541754634726e-07, "loss": 0.0, "step": 5665 }, { "epoch": 0.952989656042385, "grad_norm": NaN, "learning_rate": 5.611321226960675e-07, "loss": 0.0, "step": 5666 }, { "epoch": 0.9531578504751492, "grad_norm": NaN, "learning_rate": 5.571243522638547e-07, "loss": 0.0, "step": 5667 }, { "epoch": 0.9533260449079135, "grad_norm": NaN, "learning_rate": 5.531308653246192e-07, "loss": 0.0, "step": 5668 }, { "epoch": 0.9534942393406778, "grad_norm": NaN, "learning_rate": 5.491516630320381e-07, "loss": 0.0, "step": 5669 }, { "epoch": 0.9536624337734421, "grad_norm": NaN, "learning_rate": 5.451867465356641e-07, "loss": 0.0, "step": 5670 }, { "epoch": 0.9538306282062063, "grad_norm": NaN, "learning_rate": 5.412361169809088e-07, "loss": 0.0, "step": 5671 }, { "epoch": 0.9539988226389706, "grad_norm": NaN, "learning_rate": 5.372997755090759e-07, "loss": 0.0, "step": 5672 }, { "epoch": 0.9541670170717349, "grad_norm": NaN, "learning_rate": 5.333777232573223e-07, "loss": 0.0, "step": 5673 }, { "epoch": 0.9543352115044992, "grad_norm": NaN, "learning_rate": 5.294699613586862e-07, "loss": 0.0, "step": 5674 }, { "epoch": 0.9545034059372635, "grad_norm": NaN, "learning_rate": 5.255764909420757e-07, "loss": 0.0, "step": 5675 }, { "epoch": 0.9546716003700277, "grad_norm": NaN, "learning_rate": 5.216973131322689e-07, "loss": 0.0, "step": 5676 }, { "epoch": 0.954839794802792, "grad_norm": NaN, "learning_rate": 5.178324290499248e-07, "loss": 0.0, "step": 5677 }, { "epoch": 0.9550079892355563, "grad_norm": NaN, "learning_rate": 5.139818398115559e-07, "loss": 0.0, "step": 5678 }, { "epoch": 0.9551761836683206, "grad_norm": NaN, "learning_rate": 5.101455465295557e-07, "loss": 0.0, "step": 5679 }, { "epoch": 0.9553443781010849, "grad_norm": NaN, "learning_rate": 5.063235503121933e-07, "loss": 0.0, "step": 5680 }, { "epoch": 0.9555125725338491, "grad_norm": NaN, "learning_rate": 5.025158522635964e-07, "loss": 0.0, "step": 5681 }, { "epoch": 0.9556807669666134, "grad_norm": NaN, "learning_rate": 4.987224534837631e-07, "loss": 0.0, "step": 5682 }, { "epoch": 0.9558489613993777, "grad_norm": NaN, "learning_rate": 4.949433550685722e-07, "loss": 0.0, "step": 5683 }, { "epoch": 0.956017155832142, "grad_norm": NaN, "learning_rate": 4.911785581097561e-07, "loss": 0.0, "step": 5684 }, { "epoch": 0.9561853502649063, "grad_norm": NaN, "learning_rate": 4.874280636949225e-07, "loss": 0.0, "step": 5685 }, { "epoch": 0.9563535446976705, "grad_norm": NaN, "learning_rate": 4.836918729075435e-07, "loss": 0.0, "step": 5686 }, { "epoch": 0.9565217391304348, "grad_norm": NaN, "learning_rate": 4.79969986826978e-07, "loss": 0.0, "step": 5687 }, { "epoch": 0.9566899335631991, "grad_norm": NaN, "learning_rate": 4.7626240652842155e-07, "loss": 0.0, "step": 5688 }, { "epoch": 0.9568581279959634, "grad_norm": NaN, "learning_rate": 4.7256913308295627e-07, "loss": 0.0, "step": 5689 }, { "epoch": 0.9570263224287276, "grad_norm": NaN, "learning_rate": 4.688901675575341e-07, "loss": 0.0, "step": 5690 }, { "epoch": 0.9571945168614919, "grad_norm": NaN, "learning_rate": 4.6522551101496057e-07, "loss": 0.0, "step": 5691 }, { "epoch": 0.9573627112942562, "grad_norm": NaN, "learning_rate": 4.6157516451391656e-07, "loss": 0.0, "step": 5692 }, { "epoch": 0.9575309057270205, "grad_norm": NaN, "learning_rate": 4.579391291089419e-07, "loss": 0.0, "step": 5693 }, { "epoch": 0.9576991001597848, "grad_norm": NaN, "learning_rate": 4.543174058504518e-07, "loss": 0.0, "step": 5694 }, { "epoch": 0.9578672945925489, "grad_norm": NaN, "learning_rate": 4.507099957847205e-07, "loss": 0.0, "step": 5695 }, { "epoch": 0.9580354890253132, "grad_norm": NaN, "learning_rate": 4.4711689995389216e-07, "loss": 0.0, "step": 5696 }, { "epoch": 0.9582036834580775, "grad_norm": NaN, "learning_rate": 4.435381193959587e-07, "loss": 0.0, "step": 5697 }, { "epoch": 0.9583718778908418, "grad_norm": NaN, "learning_rate": 4.3997365514480416e-07, "loss": 0.0, "step": 5698 }, { "epoch": 0.958540072323606, "grad_norm": NaN, "learning_rate": 4.364235082301549e-07, "loss": 0.0, "step": 5699 }, { "epoch": 0.9587082667563703, "grad_norm": NaN, "learning_rate": 4.3288767967760715e-07, "loss": 0.0, "step": 5700 }, { "epoch": 0.9588764611891346, "grad_norm": NaN, "learning_rate": 4.293661705086327e-07, "loss": 0.0, "step": 5701 }, { "epoch": 0.9590446556218989, "grad_norm": NaN, "learning_rate": 4.258589817405401e-07, "loss": 0.0, "step": 5702 }, { "epoch": 0.9592128500546632, "grad_norm": NaN, "learning_rate": 4.2236611438652986e-07, "loss": 0.0, "step": 5703 }, { "epoch": 0.9593810444874274, "grad_norm": NaN, "learning_rate": 4.188875694556449e-07, "loss": 0.0, "step": 5704 }, { "epoch": 0.9595492389201917, "grad_norm": NaN, "learning_rate": 4.1542334795280355e-07, "loss": 0.0, "step": 5705 }, { "epoch": 0.959717433352956, "grad_norm": NaN, "learning_rate": 4.119734508787776e-07, "loss": 0.0, "step": 5706 }, { "epoch": 0.9598856277857203, "grad_norm": NaN, "learning_rate": 4.0853787923020303e-07, "loss": 0.0, "step": 5707 }, { "epoch": 0.9600538222184846, "grad_norm": NaN, "learning_rate": 4.0511663399958044e-07, "loss": 0.0, "step": 5708 }, { "epoch": 0.9602220166512488, "grad_norm": NaN, "learning_rate": 4.017097161752692e-07, "loss": 0.0, "step": 5709 }, { "epoch": 0.9603902110840131, "grad_norm": NaN, "learning_rate": 3.983171267414876e-07, "loss": 0.0, "step": 5710 }, { "epoch": 0.9605584055167774, "grad_norm": NaN, "learning_rate": 3.949388666783127e-07, "loss": 0.0, "step": 5711 }, { "epoch": 0.9607265999495417, "grad_norm": NaN, "learning_rate": 3.9157493696169724e-07, "loss": 0.0, "step": 5712 }, { "epoch": 0.960894794382306, "grad_norm": NaN, "learning_rate": 3.8822533856343044e-07, "loss": 0.0, "step": 5713 }, { "epoch": 0.9610629888150702, "grad_norm": NaN, "learning_rate": 3.848900724511828e-07, "loss": 0.0, "step": 5714 }, { "epoch": 0.9612311832478345, "grad_norm": NaN, "learning_rate": 3.815691395884724e-07, "loss": 0.0, "step": 5715 }, { "epoch": 0.9613993776805988, "grad_norm": NaN, "learning_rate": 3.782625409346763e-07, "loss": 0.0, "step": 5716 }, { "epoch": 0.9615675721133631, "grad_norm": NaN, "learning_rate": 3.749702774450414e-07, "loss": 0.0, "step": 5717 }, { "epoch": 0.9617357665461274, "grad_norm": NaN, "learning_rate": 3.7169235007065707e-07, "loss": 0.0, "step": 5718 }, { "epoch": 0.9619039609788916, "grad_norm": NaN, "learning_rate": 3.684287597584879e-07, "loss": 0.0, "step": 5719 }, { "epoch": 0.9620721554116559, "grad_norm": NaN, "learning_rate": 3.651795074513409e-07, "loss": 0.0, "step": 5720 }, { "epoch": 0.9622403498444202, "grad_norm": NaN, "learning_rate": 3.619445940878929e-07, "loss": 0.0, "step": 5721 }, { "epoch": 0.9624085442771845, "grad_norm": NaN, "learning_rate": 3.587240206026743e-07, "loss": 0.0, "step": 5722 }, { "epoch": 0.9625767387099488, "grad_norm": NaN, "learning_rate": 3.555177879260685e-07, "loss": 0.0, "step": 5723 }, { "epoch": 0.9627449331427129, "grad_norm": NaN, "learning_rate": 3.5232589698432907e-07, "loss": 0.0, "step": 5724 }, { "epoch": 0.9629131275754772, "grad_norm": NaN, "learning_rate": 3.491483486995517e-07, "loss": 0.0, "step": 5725 }, { "epoch": 0.9630813220082415, "grad_norm": NaN, "learning_rate": 3.459851439896966e-07, "loss": 0.0, "step": 5726 }, { "epoch": 0.9632495164410058, "grad_norm": NaN, "learning_rate": 3.428362837685717e-07, "loss": 0.0, "step": 5727 }, { "epoch": 0.96341771087377, "grad_norm": NaN, "learning_rate": 3.397017689458548e-07, "loss": 0.0, "step": 5728 }, { "epoch": 0.9635859053065343, "grad_norm": NaN, "learning_rate": 3.365816004270661e-07, "loss": 0.0, "step": 5729 }, { "epoch": 0.9637540997392986, "grad_norm": NaN, "learning_rate": 3.334757791135956e-07, "loss": 0.0, "step": 5730 }, { "epoch": 0.9639222941720629, "grad_norm": NaN, "learning_rate": 3.303843059026757e-07, "loss": 0.0, "step": 5731 }, { "epoch": 0.9640904886048272, "grad_norm": NaN, "learning_rate": 3.273071816873974e-07, "loss": 0.0, "step": 5732 }, { "epoch": 0.9642586830375914, "grad_norm": NaN, "learning_rate": 3.2424440735670526e-07, "loss": 0.0, "step": 5733 }, { "epoch": 0.9644268774703557, "grad_norm": NaN, "learning_rate": 3.211959837954026e-07, "loss": 0.0, "step": 5734 }, { "epoch": 0.96459507190312, "grad_norm": NaN, "learning_rate": 3.1816191188415166e-07, "loss": 0.0, "step": 5735 }, { "epoch": 0.9647632663358843, "grad_norm": NaN, "learning_rate": 3.151421924994513e-07, "loss": 0.0, "step": 5736 }, { "epoch": 0.9649314607686486, "grad_norm": NaN, "learning_rate": 3.121368265136704e-07, "loss": 0.0, "step": 5737 }, { "epoch": 0.9650996552014128, "grad_norm": NaN, "learning_rate": 3.091458147950255e-07, "loss": 0.0, "step": 5738 }, { "epoch": 0.9652678496341771, "grad_norm": NaN, "learning_rate": 3.0616915820758095e-07, "loss": 0.0, "step": 5739 }, { "epoch": 0.9654360440669414, "grad_norm": NaN, "learning_rate": 3.0320685761127123e-07, "loss": 0.0, "step": 5740 }, { "epoch": 0.9656042384997057, "grad_norm": NaN, "learning_rate": 3.002589138618561e-07, "loss": 0.0, "step": 5741 }, { "epoch": 0.96577243293247, "grad_norm": NaN, "learning_rate": 2.973253278109767e-07, "loss": 0.0, "step": 5742 }, { "epoch": 0.9659406273652342, "grad_norm": NaN, "learning_rate": 2.9440610030610494e-07, "loss": 0.0, "step": 5743 }, { "epoch": 0.9661088217979985, "grad_norm": NaN, "learning_rate": 2.91501232190583e-07, "loss": 0.0, "step": 5744 }, { "epoch": 0.9662770162307628, "grad_norm": NaN, "learning_rate": 2.8861072430358404e-07, "loss": 0.0, "step": 5745 }, { "epoch": 0.9664452106635271, "grad_norm": NaN, "learning_rate": 2.8573457748014564e-07, "loss": 0.0, "step": 5746 }, { "epoch": 0.9666134050962913, "grad_norm": NaN, "learning_rate": 2.8287279255115873e-07, "loss": 0.0, "step": 5747 }, { "epoch": 0.9667815995290556, "grad_norm": NaN, "learning_rate": 2.800253703433564e-07, "loss": 0.0, "step": 5748 }, { "epoch": 0.9669497939618199, "grad_norm": NaN, "learning_rate": 2.7719231167933067e-07, "loss": 0.0, "step": 5749 }, { "epoch": 0.9671179883945842, "grad_norm": NaN, "learning_rate": 2.743736173775213e-07, "loss": 0.0, "step": 5750 }, { "epoch": 0.9672861828273485, "grad_norm": NaN, "learning_rate": 2.715692882522103e-07, "loss": 0.0, "step": 5751 }, { "epoch": 0.9674543772601127, "grad_norm": NaN, "learning_rate": 2.687793251135384e-07, "loss": 0.0, "step": 5752 }, { "epoch": 0.967622571692877, "grad_norm": NaN, "learning_rate": 2.6600372876750544e-07, "loss": 0.0, "step": 5753 }, { "epoch": 0.9677907661256412, "grad_norm": NaN, "learning_rate": 2.6324250001593664e-07, "loss": 0.0, "step": 5754 }, { "epoch": 0.9679589605584055, "grad_norm": NaN, "learning_rate": 2.604956396565328e-07, "loss": 0.0, "step": 5755 }, { "epoch": 0.9681271549911697, "grad_norm": NaN, "learning_rate": 2.577631484828147e-07, "loss": 0.0, "step": 5756 }, { "epoch": 0.968295349423934, "grad_norm": NaN, "learning_rate": 2.550450272841842e-07, "loss": 0.0, "step": 5757 }, { "epoch": 0.9684635438566983, "grad_norm": NaN, "learning_rate": 2.523412768458688e-07, "loss": 0.0, "step": 5758 }, { "epoch": 0.9686317382894626, "grad_norm": NaN, "learning_rate": 2.4965189794895485e-07, "loss": 0.0, "step": 5759 }, { "epoch": 0.9687999327222269, "grad_norm": NaN, "learning_rate": 2.469768913703707e-07, "loss": 0.0, "step": 5760 }, { "epoch": 0.9689681271549911, "grad_norm": NaN, "learning_rate": 2.443162578828928e-07, "loss": 0.0, "step": 5761 }, { "epoch": 0.9691363215877554, "grad_norm": NaN, "learning_rate": 2.4166999825515625e-07, "loss": 0.0, "step": 5762 }, { "epoch": 0.9693045160205197, "grad_norm": NaN, "learning_rate": 2.3903811325163283e-07, "loss": 0.0, "step": 5763 }, { "epoch": 0.969472710453284, "grad_norm": NaN, "learning_rate": 2.3642060363264217e-07, "loss": 0.0, "step": 5764 }, { "epoch": 0.9696409048860483, "grad_norm": NaN, "learning_rate": 2.3381747015435163e-07, "loss": 0.0, "step": 5765 }, { "epoch": 0.9698090993188125, "grad_norm": NaN, "learning_rate": 2.312287135687874e-07, "loss": 0.0, "step": 5766 }, { "epoch": 0.9699772937515768, "grad_norm": NaN, "learning_rate": 2.2865433462380125e-07, "loss": 0.0, "step": 5767 }, { "epoch": 0.9701454881843411, "grad_norm": NaN, "learning_rate": 2.2609433406310941e-07, "loss": 0.0, "step": 5768 }, { "epoch": 0.9703136826171054, "grad_norm": NaN, "learning_rate": 2.2354871262626477e-07, "loss": 0.0, "step": 5769 }, { "epoch": 0.9704818770498697, "grad_norm": NaN, "learning_rate": 2.2101747104866788e-07, "loss": 0.0, "step": 5770 }, { "epoch": 0.9706500714826339, "grad_norm": NaN, "learning_rate": 2.185006100615672e-07, "loss": 0.0, "step": 5771 }, { "epoch": 0.9708182659153982, "grad_norm": NaN, "learning_rate": 2.159981303920533e-07, "loss": 0.0, "step": 5772 }, { "epoch": 0.9709864603481625, "grad_norm": NaN, "learning_rate": 2.1351003276307014e-07, "loss": 0.0, "step": 5773 }, { "epoch": 0.9711546547809268, "grad_norm": NaN, "learning_rate": 2.1103631789339272e-07, "loss": 0.0, "step": 5774 }, { "epoch": 0.971322849213691, "grad_norm": NaN, "learning_rate": 2.0857698649766055e-07, "loss": 0.0, "step": 5775 }, { "epoch": 0.9714910436464553, "grad_norm": NaN, "learning_rate": 2.061320392863386e-07, "loss": 0.0, "step": 5776 }, { "epoch": 0.9716592380792196, "grad_norm": NaN, "learning_rate": 2.0370147696574526e-07, "loss": 0.0, "step": 5777 }, { "epoch": 0.9718274325119839, "grad_norm": NaN, "learning_rate": 2.012853002380466e-07, "loss": 0.0, "step": 5778 }, { "epoch": 0.9719956269447482, "grad_norm": NaN, "learning_rate": 1.988835098012509e-07, "loss": 0.0, "step": 5779 }, { "epoch": 0.9721638213775124, "grad_norm": NaN, "learning_rate": 1.9649610634919767e-07, "loss": 0.0, "step": 5780 }, { "epoch": 0.9723320158102767, "grad_norm": NaN, "learning_rate": 1.9412309057159073e-07, "loss": 0.0, "step": 5781 }, { "epoch": 0.972500210243041, "grad_norm": NaN, "learning_rate": 1.9176446315397056e-07, "loss": 0.0, "step": 5782 }, { "epoch": 0.9726684046758052, "grad_norm": NaN, "learning_rate": 1.894202247777088e-07, "loss": 0.0, "step": 5783 }, { "epoch": 0.9728365991085695, "grad_norm": NaN, "learning_rate": 1.8709037612003045e-07, "loss": 0.0, "step": 5784 }, { "epoch": 0.9730047935413337, "grad_norm": NaN, "learning_rate": 1.8477491785400813e-07, "loss": 0.0, "step": 5785 }, { "epoch": 0.973172987974098, "grad_norm": NaN, "learning_rate": 1.8247385064855127e-07, "loss": 0.0, "step": 5786 }, { "epoch": 0.9733411824068623, "grad_norm": NaN, "learning_rate": 1.8018717516841143e-07, "loss": 0.0, "step": 5787 }, { "epoch": 0.9735093768396266, "grad_norm": NaN, "learning_rate": 1.779148920741769e-07, "loss": 0.0, "step": 5788 }, { "epoch": 0.9736775712723909, "grad_norm": NaN, "learning_rate": 1.7565700202229473e-07, "loss": 0.0, "step": 5789 }, { "epoch": 0.9738457657051551, "grad_norm": NaN, "learning_rate": 1.7341350566504323e-07, "loss": 0.0, "step": 5790 }, { "epoch": 0.9740139601379194, "grad_norm": NaN, "learning_rate": 1.7118440365053722e-07, "loss": 0.0, "step": 5791 }, { "epoch": 0.9741821545706837, "grad_norm": NaN, "learning_rate": 1.6896969662273944e-07, "loss": 0.0, "step": 5792 }, { "epoch": 0.974350349003448, "grad_norm": NaN, "learning_rate": 1.6676938522146023e-07, "loss": 0.0, "step": 5793 }, { "epoch": 0.9745185434362122, "grad_norm": NaN, "learning_rate": 1.645834700823412e-07, "loss": 0.0, "step": 5794 }, { "epoch": 0.9746867378689765, "grad_norm": NaN, "learning_rate": 1.6241195183686608e-07, "loss": 0.0, "step": 5795 }, { "epoch": 0.9748549323017408, "grad_norm": NaN, "learning_rate": 1.6025483111236638e-07, "loss": 0.0, "step": 5796 }, { "epoch": 0.9750231267345051, "grad_norm": NaN, "learning_rate": 1.581121085320103e-07, "loss": 0.0, "step": 5797 }, { "epoch": 0.9751913211672694, "grad_norm": NaN, "learning_rate": 1.5598378471480267e-07, "loss": 0.0, "step": 5798 }, { "epoch": 0.9753595156000336, "grad_norm": NaN, "learning_rate": 1.5386986027559613e-07, "loss": 0.0, "step": 5799 }, { "epoch": 0.9755277100327979, "grad_norm": NaN, "learning_rate": 1.5177033582507993e-07, "loss": 0.0, "step": 5800 }, { "epoch": 0.9756959044655622, "grad_norm": NaN, "learning_rate": 1.4968521196978002e-07, "loss": 0.0, "step": 5801 }, { "epoch": 0.9758640988983265, "grad_norm": NaN, "learning_rate": 1.4761448931206455e-07, "loss": 0.0, "step": 5802 }, { "epoch": 0.9760322933310908, "grad_norm": NaN, "learning_rate": 1.4555816845014948e-07, "loss": 0.0, "step": 5803 }, { "epoch": 0.976200487763855, "grad_norm": NaN, "learning_rate": 1.4351624997807623e-07, "loss": 0.0, "step": 5804 }, { "epoch": 0.9763686821966193, "grad_norm": NaN, "learning_rate": 1.4148873448573408e-07, "loss": 0.0, "step": 5805 }, { "epoch": 0.9765368766293836, "grad_norm": NaN, "learning_rate": 1.3947562255884338e-07, "loss": 0.0, "step": 5806 }, { "epoch": 0.9767050710621479, "grad_norm": NaN, "learning_rate": 1.374769147789834e-07, "loss": 0.0, "step": 5807 }, { "epoch": 0.9768732654949122, "grad_norm": NaN, "learning_rate": 1.3549261172354777e-07, "loss": 0.0, "step": 5808 }, { "epoch": 0.9770414599276764, "grad_norm": NaN, "learning_rate": 1.3352271396577798e-07, "loss": 0.0, "step": 5809 }, { "epoch": 0.9772096543604407, "grad_norm": NaN, "learning_rate": 1.3156722207476324e-07, "loss": 0.0, "step": 5810 }, { "epoch": 0.977377848793205, "grad_norm": NaN, "learning_rate": 1.2962613661541834e-07, "loss": 0.0, "step": 5811 }, { "epoch": 0.9775460432259693, "grad_norm": NaN, "learning_rate": 1.2769945814850582e-07, "loss": 0.0, "step": 5812 }, { "epoch": 0.9777142376587334, "grad_norm": NaN, "learning_rate": 1.2578718723061378e-07, "loss": 0.0, "step": 5813 }, { "epoch": 0.9778824320914977, "grad_norm": NaN, "learning_rate": 1.2388932441418367e-07, "loss": 0.0, "step": 5814 }, { "epoch": 0.978050626524262, "grad_norm": NaN, "learning_rate": 1.220058702474769e-07, "loss": 0.0, "step": 5815 }, { "epoch": 0.9782188209570263, "grad_norm": NaN, "learning_rate": 1.2013682527461379e-07, "loss": 0.0, "step": 5816 }, { "epoch": 0.9783870153897906, "grad_norm": NaN, "learning_rate": 1.1828219003553465e-07, "loss": 0.0, "step": 5817 }, { "epoch": 0.9785552098225548, "grad_norm": NaN, "learning_rate": 1.16441965066022e-07, "loss": 0.0, "step": 5818 }, { "epoch": 0.9787234042553191, "grad_norm": NaN, "learning_rate": 1.1461615089770062e-07, "loss": 0.0, "step": 5819 }, { "epoch": 0.9788915986880834, "grad_norm": NaN, "learning_rate": 1.1280474805802632e-07, "loss": 0.0, "step": 5820 }, { "epoch": 0.9790597931208477, "grad_norm": NaN, "learning_rate": 1.110077570702861e-07, "loss": 0.0, "step": 5821 }, { "epoch": 0.979227987553612, "grad_norm": NaN, "learning_rate": 1.0922517845362023e-07, "loss": 0.0, "step": 5822 }, { "epoch": 0.9793961819863762, "grad_norm": NaN, "learning_rate": 1.0745701272298902e-07, "loss": 0.0, "step": 5823 }, { "epoch": 0.9795643764191405, "grad_norm": NaN, "learning_rate": 1.0570326038920053e-07, "loss": 0.0, "step": 5824 }, { "epoch": 0.9797325708519048, "grad_norm": NaN, "learning_rate": 1.0396392195889393e-07, "loss": 0.0, "step": 5825 }, { "epoch": 0.9799007652846691, "grad_norm": NaN, "learning_rate": 1.022389979345395e-07, "loss": 0.0, "step": 5826 }, { "epoch": 0.9800689597174334, "grad_norm": NaN, "learning_rate": 1.0052848881444976e-07, "loss": 0.0, "step": 5827 }, { "epoch": 0.9802371541501976, "grad_norm": NaN, "learning_rate": 9.883239509277942e-08, "loss": 0.0, "step": 5828 }, { "epoch": 0.9804053485829619, "grad_norm": NaN, "learning_rate": 9.715071725949765e-08, "loss": 0.0, "step": 5829 }, { "epoch": 0.9805735430157262, "grad_norm": NaN, "learning_rate": 9.54834558004325e-08, "loss": 0.0, "step": 5830 }, { "epoch": 0.9807417374484905, "grad_norm": NaN, "learning_rate": 9.383061119723757e-08, "loss": 0.0, "step": 5831 }, { "epoch": 0.9809099318812547, "grad_norm": NaN, "learning_rate": 9.219218392739759e-08, "loss": 0.0, "step": 5832 }, { "epoch": 0.981078126314019, "grad_norm": NaN, "learning_rate": 9.056817446422839e-08, "loss": 0.0, "step": 5833 }, { "epoch": 0.9812463207467833, "grad_norm": NaN, "learning_rate": 8.895858327690464e-08, "loss": 0.0, "step": 5834 }, { "epoch": 0.9814145151795476, "grad_norm": NaN, "learning_rate": 8.736341083041e-08, "loss": 0.0, "step": 5835 }, { "epoch": 0.9815827096123119, "grad_norm": NaN, "learning_rate": 8.578265758557024e-08, "loss": 0.0, "step": 5836 }, { "epoch": 0.9817509040450761, "grad_norm": NaN, "learning_rate": 8.421632399904788e-08, "loss": 0.0, "step": 5837 }, { "epoch": 0.9819190984778404, "grad_norm": NaN, "learning_rate": 8.266441052334206e-08, "loss": 0.0, "step": 5838 }, { "epoch": 0.9820872929106047, "grad_norm": NaN, "learning_rate": 8.112691760677749e-08, "loss": 0.0, "step": 5839 }, { "epoch": 0.982255487343369, "grad_norm": NaN, "learning_rate": 7.960384569353219e-08, "loss": 0.0, "step": 5840 }, { "epoch": 0.9824236817761333, "grad_norm": NaN, "learning_rate": 7.809519522358755e-08, "loss": 0.0, "step": 5841 }, { "epoch": 0.9825918762088974, "grad_norm": NaN, "learning_rate": 7.660096663278938e-08, "loss": 0.0, "step": 5842 }, { "epoch": 0.9827600706416617, "grad_norm": NaN, "learning_rate": 7.512116035279237e-08, "loss": 0.0, "step": 5843 }, { "epoch": 0.982928265074426, "grad_norm": NaN, "learning_rate": 7.365577681110458e-08, "loss": 0.0, "step": 5844 }, { "epoch": 0.9830964595071903, "grad_norm": NaN, "learning_rate": 7.220481643105403e-08, "loss": 0.0, "step": 5845 }, { "epoch": 0.9832646539399545, "grad_norm": NaN, "learning_rate": 7.076827963181099e-08, "loss": 0.0, "step": 5846 }, { "epoch": 0.9834328483727188, "grad_norm": NaN, "learning_rate": 6.934616682837125e-08, "loss": 0.0, "step": 5847 }, { "epoch": 0.9836010428054831, "grad_norm": NaN, "learning_rate": 6.79384784315673e-08, "loss": 0.0, "step": 5848 }, { "epoch": 0.9837692372382474, "grad_norm": NaN, "learning_rate": 6.65452148480683e-08, "loss": 0.0, "step": 5849 }, { "epoch": 0.9839374316710117, "grad_norm": NaN, "learning_rate": 6.516637648036894e-08, "loss": 0.0, "step": 5850 }, { "epoch": 0.9841056261037759, "grad_norm": NaN, "learning_rate": 6.380196372680058e-08, "loss": 0.0, "step": 5851 }, { "epoch": 0.9842738205365402, "grad_norm": NaN, "learning_rate": 6.245197698152571e-08, "loss": 0.0, "step": 5852 }, { "epoch": 0.9844420149693045, "grad_norm": NaN, "learning_rate": 6.111641663454903e-08, "loss": 0.0, "step": 5853 }, { "epoch": 0.9846102094020688, "grad_norm": NaN, "learning_rate": 5.979528307168414e-08, "loss": 0.0, "step": 5854 }, { "epoch": 0.9847784038348331, "grad_norm": NaN, "learning_rate": 5.84885766746035e-08, "loss": 0.0, "step": 5855 }, { "epoch": 0.9849465982675973, "grad_norm": NaN, "learning_rate": 5.7196297820794054e-08, "loss": 0.0, "step": 5856 }, { "epoch": 0.9851147927003616, "grad_norm": NaN, "learning_rate": 5.591844688358494e-08, "loss": 0.0, "step": 5857 }, { "epoch": 0.9852829871331259, "grad_norm": NaN, "learning_rate": 5.465502423213087e-08, "loss": 0.0, "step": 5858 }, { "epoch": 0.9854511815658902, "grad_norm": NaN, "learning_rate": 5.340603023141766e-08, "loss": 0.0, "step": 5859 }, { "epoch": 0.9856193759986545, "grad_norm": NaN, "learning_rate": 5.217146524226779e-08, "loss": 0.0, "step": 5860 }, { "epoch": 0.9857875704314187, "grad_norm": NaN, "learning_rate": 5.0951329621340416e-08, "loss": 0.0, "step": 5861 }, { "epoch": 0.985955764864183, "grad_norm": NaN, "learning_rate": 4.9745623721109135e-08, "loss": 0.0, "step": 5862 }, { "epoch": 0.9861239592969473, "grad_norm": NaN, "learning_rate": 4.855434788988977e-08, "loss": 0.0, "step": 5863 }, { "epoch": 0.9862921537297116, "grad_norm": NaN, "learning_rate": 4.737750247183481e-08, "loss": 0.0, "step": 5864 }, { "epoch": 0.9864603481624759, "grad_norm": NaN, "learning_rate": 4.621508780691119e-08, "loss": 0.0, "step": 5865 }, { "epoch": 0.9866285425952401, "grad_norm": NaN, "learning_rate": 4.506710423093918e-08, "loss": 0.0, "step": 5866 }, { "epoch": 0.9867967370280044, "grad_norm": NaN, "learning_rate": 4.39335520755535e-08, "loss": 0.0, "step": 5867 }, { "epoch": 0.9869649314607687, "grad_norm": NaN, "learning_rate": 4.281443166822552e-08, "loss": 0.0, "step": 5868 }, { "epoch": 0.987133125893533, "grad_norm": NaN, "learning_rate": 4.1709743332252196e-08, "loss": 0.0, "step": 5869 }, { "epoch": 0.9873013203262972, "grad_norm": NaN, "learning_rate": 4.061948738677268e-08, "loss": 0.0, "step": 5870 }, { "epoch": 0.9874695147590615, "grad_norm": NaN, "learning_rate": 3.9543664146746154e-08, "loss": 0.0, "step": 5871 }, { "epoch": 0.9876377091918257, "grad_norm": NaN, "learning_rate": 3.8482273922962884e-08, "loss": 0.0, "step": 5872 }, { "epoch": 0.98780590362459, "grad_norm": NaN, "learning_rate": 3.743531702204983e-08, "loss": 0.0, "step": 5873 }, { "epoch": 0.9879740980573543, "grad_norm": NaN, "learning_rate": 3.6402793746465045e-08, "loss": 0.0, "step": 5874 }, { "epoch": 0.9881422924901185, "grad_norm": NaN, "learning_rate": 3.538470439448105e-08, "loss": 0.0, "step": 5875 }, { "epoch": 0.9883104869228828, "grad_norm": NaN, "learning_rate": 3.438104926022923e-08, "loss": 0.0, "step": 5876 }, { "epoch": 0.9884786813556471, "grad_norm": NaN, "learning_rate": 3.339182863363877e-08, "loss": 0.0, "step": 5877 }, { "epoch": 0.9886468757884114, "grad_norm": NaN, "learning_rate": 3.241704280049218e-08, "loss": 0.0, "step": 5878 }, { "epoch": 0.9888150702211757, "grad_norm": NaN, "learning_rate": 3.145669204239754e-08, "loss": 0.0, "step": 5879 }, { "epoch": 0.9889832646539399, "grad_norm": NaN, "learning_rate": 3.051077663677737e-08, "loss": 0.0, "step": 5880 }, { "epoch": 0.9891514590867042, "grad_norm": NaN, "learning_rate": 2.9579296856907523e-08, "loss": 0.0, "step": 5881 }, { "epoch": 0.9893196535194685, "grad_norm": NaN, "learning_rate": 2.86622529718783e-08, "loss": 0.0, "step": 5882 }, { "epoch": 0.9894878479522328, "grad_norm": NaN, "learning_rate": 2.775964524661667e-08, "loss": 0.0, "step": 5883 }, { "epoch": 0.989656042384997, "grad_norm": NaN, "learning_rate": 2.6871473941864067e-08, "loss": 0.0, "step": 5884 }, { "epoch": 0.9898242368177613, "grad_norm": NaN, "learning_rate": 2.599773931422078e-08, "loss": 0.0, "step": 5885 }, { "epoch": 0.9899924312505256, "grad_norm": NaN, "learning_rate": 2.5138441616079367e-08, "loss": 0.0, "step": 5886 }, { "epoch": 0.9901606256832899, "grad_norm": NaN, "learning_rate": 2.4293581095696794e-08, "loss": 0.0, "step": 5887 }, { "epoch": 0.9903288201160542, "grad_norm": NaN, "learning_rate": 2.346315799713894e-08, "loss": 0.0, "step": 5888 }, { "epoch": 0.9904970145488184, "grad_norm": NaN, "learning_rate": 2.264717256030835e-08, "loss": 0.0, "step": 5889 }, { "epoch": 0.9906652089815827, "grad_norm": NaN, "learning_rate": 2.1845625020927572e-08, "loss": 0.0, "step": 5890 }, { "epoch": 0.990833403414347, "grad_norm": NaN, "learning_rate": 2.105851561056138e-08, "loss": 0.0, "step": 5891 }, { "epoch": 0.9910015978471113, "grad_norm": NaN, "learning_rate": 2.0285844556588996e-08, "loss": 0.0, "step": 5892 }, { "epoch": 0.9911697922798756, "grad_norm": NaN, "learning_rate": 1.952761208223186e-08, "loss": 0.0, "step": 5893 }, { "epoch": 0.9913379867126398, "grad_norm": NaN, "learning_rate": 1.878381840653698e-08, "loss": 0.0, "step": 5894 }, { "epoch": 0.9915061811454041, "grad_norm": NaN, "learning_rate": 1.8054463744376914e-08, "loss": 0.0, "step": 5895 }, { "epoch": 0.9916743755781684, "grad_norm": NaN, "learning_rate": 1.7339548306449794e-08, "loss": 0.0, "step": 5896 }, { "epoch": 0.9918425700109327, "grad_norm": NaN, "learning_rate": 1.6639072299284852e-08, "loss": 0.0, "step": 5897 }, { "epoch": 0.992010764443697, "grad_norm": NaN, "learning_rate": 1.5953035925253547e-08, "loss": 0.0, "step": 5898 }, { "epoch": 0.9921789588764612, "grad_norm": NaN, "learning_rate": 1.528143938253068e-08, "loss": 0.0, "step": 5899 }, { "epoch": 0.9923471533092255, "grad_norm": NaN, "learning_rate": 1.4624282865144389e-08, "loss": 0.0, "step": 5900 }, { "epoch": 0.9925153477419898, "grad_norm": NaN, "learning_rate": 1.3981566562931702e-08, "loss": 0.0, "step": 5901 }, { "epoch": 0.992683542174754, "grad_norm": NaN, "learning_rate": 1.3353290661571871e-08, "loss": 0.0, "step": 5902 }, { "epoch": 0.9928517366075182, "grad_norm": NaN, "learning_rate": 1.2739455342558603e-08, "loss": 0.0, "step": 5903 }, { "epoch": 0.9930199310402825, "grad_norm": NaN, "learning_rate": 1.2140060783227824e-08, "loss": 0.0, "step": 5904 }, { "epoch": 0.9931881254730468, "grad_norm": NaN, "learning_rate": 1.155510715674102e-08, "loss": 0.0, "step": 5905 }, { "epoch": 0.9933563199058111, "grad_norm": NaN, "learning_rate": 1.098459463207968e-08, "loss": 0.0, "step": 5906 }, { "epoch": 0.9935245143385754, "grad_norm": NaN, "learning_rate": 1.042852337406197e-08, "loss": 0.0, "step": 5907 }, { "epoch": 0.9936927087713396, "grad_norm": NaN, "learning_rate": 9.88689354332606e-09, "loss": 0.0, "step": 5908 }, { "epoch": 0.9938609032041039, "grad_norm": NaN, "learning_rate": 9.359705296346776e-09, "loss": 0.0, "step": 5909 }, { "epoch": 0.9940290976368682, "grad_norm": NaN, "learning_rate": 8.846958785418968e-09, "loss": 0.0, "step": 5910 }, { "epoch": 0.9941972920696325, "grad_norm": NaN, "learning_rate": 8.34865415867414e-09, "loss": 0.0, "step": 5911 }, { "epoch": 0.9943654865023968, "grad_norm": NaN, "learning_rate": 7.86479156006381e-09, "loss": 0.0, "step": 5912 }, { "epoch": 0.994533680935161, "grad_norm": NaN, "learning_rate": 7.3953711293706096e-09, "loss": 0.0, "step": 5913 }, { "epoch": 0.9947018753679253, "grad_norm": NaN, "learning_rate": 6.940393002202727e-09, "loss": 0.0, "step": 5914 }, { "epoch": 0.9948700698006896, "grad_norm": NaN, "learning_rate": 6.4998573100050195e-09, "loss": 0.0, "step": 5915 }, { "epoch": 0.9950382642334539, "grad_norm": NaN, "learning_rate": 6.0737641800368e-09, "loss": 0.0, "step": 5916 }, { "epoch": 0.9952064586662182, "grad_norm": NaN, "learning_rate": 5.662113735394048e-09, "loss": 0.0, "step": 5917 }, { "epoch": 0.9953746530989824, "grad_norm": NaN, "learning_rate": 5.264906095003852e-09, "loss": 0.0, "step": 5918 }, { "epoch": 0.9955428475317467, "grad_norm": NaN, "learning_rate": 4.8821413736022115e-09, "loss": 0.0, "step": 5919 }, { "epoch": 0.995711041964511, "grad_norm": NaN, "learning_rate": 4.51381968177289e-09, "loss": 0.0, "step": 5920 }, { "epoch": 0.9958792363972753, "grad_norm": NaN, "learning_rate": 4.159941125925215e-09, "loss": 0.0, "step": 5921 }, { "epoch": 0.9960474308300395, "grad_norm": NaN, "learning_rate": 3.820505808277419e-09, "loss": 0.0, "step": 5922 }, { "epoch": 0.9962156252628038, "grad_norm": NaN, "learning_rate": 3.4955138269010534e-09, "loss": 0.0, "step": 5923 }, { "epoch": 0.9963838196955681, "grad_norm": NaN, "learning_rate": 3.184965275676577e-09, "loss": 0.0, "step": 5924 }, { "epoch": 0.9965520141283324, "grad_norm": NaN, "learning_rate": 2.8888602443211122e-09, "loss": 0.0, "step": 5925 }, { "epoch": 0.9967202085610967, "grad_norm": NaN, "learning_rate": 2.607198818371792e-09, "loss": 0.0, "step": 5926 }, { "epoch": 0.996888402993861, "grad_norm": NaN, "learning_rate": 2.3399810792024133e-09, "loss": 0.0, "step": 5927 }, { "epoch": 0.9970565974266252, "grad_norm": NaN, "learning_rate": 2.087207104001232e-09, "loss": 0.0, "step": 5928 }, { "epoch": 0.9972247918593895, "grad_norm": NaN, "learning_rate": 1.8488769658042693e-09, "loss": 0.0, "step": 5929 }, { "epoch": 0.9973929862921538, "grad_norm": NaN, "learning_rate": 1.624990733450904e-09, "loss": 0.0, "step": 5930 }, { "epoch": 0.997561180724918, "grad_norm": NaN, "learning_rate": 1.4155484716227296e-09, "loss": 0.0, "step": 5931 }, { "epoch": 0.9977293751576822, "grad_norm": NaN, "learning_rate": 1.220550240826901e-09, "loss": 0.0, "step": 5932 }, { "epoch": 0.9978975695904465, "grad_norm": NaN, "learning_rate": 1.0399960974016854e-09, "loss": 0.0, "step": 5933 }, { "epoch": 0.9980657640232108, "grad_norm": NaN, "learning_rate": 8.738860934942584e-10, "loss": 0.0, "step": 5934 }, { "epoch": 0.9982339584559751, "grad_norm": NaN, "learning_rate": 7.222202770995612e-10, "loss": 0.0, "step": 5935 }, { "epoch": 0.9984021528887393, "grad_norm": NaN, "learning_rate": 5.849986920325456e-10, "loss": 0.0, "step": 5936 }, { "epoch": 0.9985703473215036, "grad_norm": NaN, "learning_rate": 4.622213779392759e-10, "loss": 0.0, "step": 5937 }, { "epoch": 0.9987385417542679, "grad_norm": NaN, "learning_rate": 3.538883702747242e-10, "loss": 0.0, "step": 5938 }, { "epoch": 0.9989067361870322, "grad_norm": NaN, "learning_rate": 2.599997003471799e-10, "loss": 0.0, "step": 5939 }, { "epoch": 0.9990749306197965, "grad_norm": NaN, "learning_rate": 1.8055539527939148e-10, "loss": 0.0, "step": 5940 }, { "epoch": 0.9992431250525607, "grad_norm": NaN, "learning_rate": 1.155554780141177e-10, "loss": 0.0, "step": 5941 }, { "epoch": 0.999411319485325, "grad_norm": NaN, "learning_rate": 6.499996733633218e-11, "loss": 0.0, "step": 5942 }, { "epoch": 0.9995795139180893, "grad_norm": NaN, "learning_rate": 2.8888877851018792e-11, "loss": 0.0, "step": 5943 }, { "epoch": 0.9997477083508536, "grad_norm": NaN, "learning_rate": 7.2222199831717406e-12, "loss": 0.0, "step": 5944 }, { "epoch": 0.9999159027836179, "grad_norm": NaN, "learning_rate": 0.0, "loss": 0.0, "step": 5945 } ], "logging_steps": 1, "max_steps": 5945, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 239, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1191100899590144e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }