{ "best_metric": 1.386866569519043, "best_model_checkpoint": "outputs_llama-2/checkpoint-240", "epoch": 0.3702275356729657, "eval_steps": 40, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-05, "loss": 3.4868, "step": 1 }, { "epoch": 0.0, "learning_rate": 4e-05, "loss": 3.7961, "step": 2 }, { "epoch": 0.0, "learning_rate": 6e-05, "loss": 3.9117, "step": 3 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 3.5057, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 3.2378, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 3.0761, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00014, "loss": 3.3794, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 2.728, "step": 8 }, { "epoch": 0.01, "learning_rate": 0.00018, "loss": 2.5244, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 2.47, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00019968652037617558, "loss": 2.5237, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.0001993730407523511, "loss": 2.3919, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.00019905956112852667, "loss": 1.9547, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.0001987460815047022, "loss": 1.8513, "step": 14 }, { "epoch": 0.02, "learning_rate": 0.00019843260188087775, "loss": 1.6401, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.0001981191222570533, "loss": 1.7872, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.00019780564263322884, "loss": 1.8782, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.0001974921630094044, "loss": 1.8139, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.00019717868338557995, "loss": 1.5255, "step": 19 }, { "epoch": 0.03, "learning_rate": 0.0001968652037617555, "loss": 1.326, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00019655172413793104, "loss": 1.7972, "step": 21 }, { "epoch": 0.03, "learning_rate": 0.0001962382445141066, "loss": 1.4295, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.00019592476489028212, "loss": 1.6369, "step": 23 }, { "epoch": 0.04, "learning_rate": 0.0001956112852664577, "loss": 1.7473, "step": 24 }, { "epoch": 0.04, "learning_rate": 0.00019529780564263324, "loss": 1.6524, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.00019498432601880878, "loss": 1.5889, "step": 26 }, { "epoch": 0.04, "learning_rate": 0.00019467084639498435, "loss": 1.3206, "step": 27 }, { "epoch": 0.04, "learning_rate": 0.00019435736677115987, "loss": 1.9595, "step": 28 }, { "epoch": 0.04, "learning_rate": 0.00019404388714733544, "loss": 1.5356, "step": 29 }, { "epoch": 0.05, "learning_rate": 0.00019373040752351098, "loss": 1.932, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.00019341692789968652, "loss": 1.3679, "step": 31 }, { "epoch": 0.05, "learning_rate": 0.0001931034482758621, "loss": 1.5176, "step": 32 }, { "epoch": 0.05, "learning_rate": 0.0001927899686520376, "loss": 1.774, "step": 33 }, { "epoch": 0.05, "learning_rate": 0.00019247648902821318, "loss": 1.4211, "step": 34 }, { "epoch": 0.05, "learning_rate": 0.00019216300940438872, "loss": 2.0095, "step": 35 }, { "epoch": 0.06, "learning_rate": 0.00019184952978056427, "loss": 1.1123, "step": 36 }, { "epoch": 0.06, "learning_rate": 0.0001915360501567398, "loss": 1.5061, "step": 37 }, { "epoch": 0.06, "learning_rate": 0.00019122257053291538, "loss": 1.8059, "step": 38 }, { "epoch": 0.06, "learning_rate": 0.00019090909090909092, "loss": 1.361, "step": 39 }, { "epoch": 0.06, "learning_rate": 0.00019059561128526647, "loss": 1.6624, "step": 40 }, { "epoch": 0.06, "eval_loss": 1.4508017301559448, "eval_runtime": 866.2645, "eval_samples_per_second": 1.16, "eval_steps_per_second": 1.16, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.000190282131661442, "loss": 1.0086, "step": 41 }, { "epoch": 0.06, "learning_rate": 0.00018996865203761755, "loss": 1.8394, "step": 42 }, { "epoch": 0.07, "learning_rate": 0.00018965517241379312, "loss": 1.595, "step": 43 }, { "epoch": 0.07, "learning_rate": 0.00018934169278996866, "loss": 1.4083, "step": 44 }, { "epoch": 0.07, "learning_rate": 0.0001890282131661442, "loss": 1.6845, "step": 45 }, { "epoch": 0.07, "learning_rate": 0.00018871473354231978, "loss": 1.2298, "step": 46 }, { "epoch": 0.07, "learning_rate": 0.0001884012539184953, "loss": 1.0909, "step": 47 }, { "epoch": 0.07, "learning_rate": 0.00018808777429467086, "loss": 1.1942, "step": 48 }, { "epoch": 0.08, "learning_rate": 0.0001877742946708464, "loss": 1.7951, "step": 49 }, { "epoch": 0.08, "learning_rate": 0.00018746081504702195, "loss": 1.5837, "step": 50 }, { "epoch": 0.08, "learning_rate": 0.00018714733542319752, "loss": 1.1171, "step": 51 }, { "epoch": 0.08, "learning_rate": 0.00018683385579937304, "loss": 1.5556, "step": 52 }, { "epoch": 0.08, "learning_rate": 0.0001865203761755486, "loss": 1.6377, "step": 53 }, { "epoch": 0.08, "learning_rate": 0.00018620689655172415, "loss": 1.7227, "step": 54 }, { "epoch": 0.08, "learning_rate": 0.0001858934169278997, "loss": 1.6148, "step": 55 }, { "epoch": 0.09, "learning_rate": 0.00018557993730407524, "loss": 1.1987, "step": 56 }, { "epoch": 0.09, "learning_rate": 0.0001852664576802508, "loss": 0.8116, "step": 57 }, { "epoch": 0.09, "learning_rate": 0.00018495297805642635, "loss": 1.627, "step": 58 }, { "epoch": 0.09, "learning_rate": 0.0001846394984326019, "loss": 1.3519, "step": 59 }, { "epoch": 0.09, "learning_rate": 0.00018432601880877744, "loss": 1.1224, "step": 60 }, { "epoch": 0.09, "learning_rate": 0.00018401253918495298, "loss": 1.4279, "step": 61 }, { "epoch": 0.1, "learning_rate": 0.00018369905956112855, "loss": 1.3011, "step": 62 }, { "epoch": 0.1, "learning_rate": 0.00018338557993730406, "loss": 1.654, "step": 63 }, { "epoch": 0.1, "learning_rate": 0.00018307210031347963, "loss": 0.8621, "step": 64 }, { "epoch": 0.1, "learning_rate": 0.00018275862068965518, "loss": 1.3778, "step": 65 }, { "epoch": 0.1, "learning_rate": 0.00018244514106583072, "loss": 1.7181, "step": 66 }, { "epoch": 0.1, "learning_rate": 0.0001821316614420063, "loss": 1.603, "step": 67 }, { "epoch": 0.1, "learning_rate": 0.00018181818181818183, "loss": 1.3475, "step": 68 }, { "epoch": 0.11, "learning_rate": 0.00018150470219435738, "loss": 1.7242, "step": 69 }, { "epoch": 0.11, "learning_rate": 0.00018119122257053292, "loss": 1.58, "step": 70 }, { "epoch": 0.11, "learning_rate": 0.00018087774294670846, "loss": 1.4371, "step": 71 }, { "epoch": 0.11, "learning_rate": 0.00018056426332288403, "loss": 1.3795, "step": 72 }, { "epoch": 0.11, "learning_rate": 0.00018025078369905958, "loss": 1.1421, "step": 73 }, { "epoch": 0.11, "learning_rate": 0.00017993730407523512, "loss": 1.1617, "step": 74 }, { "epoch": 0.12, "learning_rate": 0.00017962382445141066, "loss": 1.4031, "step": 75 }, { "epoch": 0.12, "learning_rate": 0.0001793103448275862, "loss": 2.0192, "step": 76 }, { "epoch": 0.12, "learning_rate": 0.00017899686520376175, "loss": 1.4762, "step": 77 }, { "epoch": 0.12, "learning_rate": 0.00017868338557993732, "loss": 1.4992, "step": 78 }, { "epoch": 0.12, "learning_rate": 0.00017836990595611286, "loss": 1.5983, "step": 79 }, { "epoch": 0.12, "learning_rate": 0.0001780564263322884, "loss": 1.3888, "step": 80 }, { "epoch": 0.12, "eval_loss": 1.416973352432251, "eval_runtime": 866.3924, "eval_samples_per_second": 1.16, "eval_steps_per_second": 1.16, "step": 80 }, { "epoch": 0.12, "learning_rate": 0.00017774294670846398, "loss": 1.0799, "step": 81 }, { "epoch": 0.13, "learning_rate": 0.0001774294670846395, "loss": 1.3961, "step": 82 }, { "epoch": 0.13, "learning_rate": 0.00017711598746081506, "loss": 1.5792, "step": 83 }, { "epoch": 0.13, "learning_rate": 0.0001768025078369906, "loss": 1.6384, "step": 84 }, { "epoch": 0.13, "learning_rate": 0.00017648902821316615, "loss": 1.3299, "step": 85 }, { "epoch": 0.13, "learning_rate": 0.00017617554858934172, "loss": 1.7483, "step": 86 }, { "epoch": 0.13, "learning_rate": 0.00017586206896551723, "loss": 1.7161, "step": 87 }, { "epoch": 0.14, "learning_rate": 0.0001755485893416928, "loss": 1.3523, "step": 88 }, { "epoch": 0.14, "learning_rate": 0.00017523510971786835, "loss": 1.5451, "step": 89 }, { "epoch": 0.14, "learning_rate": 0.0001749216300940439, "loss": 1.4589, "step": 90 }, { "epoch": 0.14, "learning_rate": 0.00017460815047021943, "loss": 1.4352, "step": 91 }, { "epoch": 0.14, "learning_rate": 0.000174294670846395, "loss": 1.5711, "step": 92 }, { "epoch": 0.14, "learning_rate": 0.00017398119122257055, "loss": 1.3834, "step": 93 }, { "epoch": 0.15, "learning_rate": 0.0001736677115987461, "loss": 1.3734, "step": 94 }, { "epoch": 0.15, "learning_rate": 0.00017335423197492163, "loss": 1.5402, "step": 95 }, { "epoch": 0.15, "learning_rate": 0.00017304075235109718, "loss": 1.5848, "step": 96 }, { "epoch": 0.15, "learning_rate": 0.00017272727272727275, "loss": 1.3129, "step": 97 }, { "epoch": 0.15, "learning_rate": 0.00017241379310344826, "loss": 1.3945, "step": 98 }, { "epoch": 0.15, "learning_rate": 0.00017210031347962383, "loss": 1.79, "step": 99 }, { "epoch": 0.15, "learning_rate": 0.0001717868338557994, "loss": 1.0874, "step": 100 }, { "epoch": 0.16, "learning_rate": 0.00017147335423197492, "loss": 1.617, "step": 101 }, { "epoch": 0.16, "learning_rate": 0.0001711598746081505, "loss": 1.259, "step": 102 }, { "epoch": 0.16, "learning_rate": 0.00017084639498432603, "loss": 1.577, "step": 103 }, { "epoch": 0.16, "learning_rate": 0.00017053291536050158, "loss": 1.3163, "step": 104 }, { "epoch": 0.16, "learning_rate": 0.00017021943573667712, "loss": 1.3077, "step": 105 }, { "epoch": 0.16, "learning_rate": 0.00016990595611285266, "loss": 1.2611, "step": 106 }, { "epoch": 0.17, "learning_rate": 0.00016959247648902823, "loss": 1.8003, "step": 107 }, { "epoch": 0.17, "learning_rate": 0.00016927899686520377, "loss": 1.3783, "step": 108 }, { "epoch": 0.17, "learning_rate": 0.00016896551724137932, "loss": 1.3896, "step": 109 }, { "epoch": 0.17, "learning_rate": 0.00016865203761755486, "loss": 1.4663, "step": 110 }, { "epoch": 0.17, "learning_rate": 0.0001683385579937304, "loss": 0.7607, "step": 111 }, { "epoch": 0.17, "learning_rate": 0.00016802507836990597, "loss": 0.9899, "step": 112 }, { "epoch": 0.17, "learning_rate": 0.00016771159874608152, "loss": 1.8002, "step": 113 }, { "epoch": 0.18, "learning_rate": 0.00016739811912225706, "loss": 1.5776, "step": 114 }, { "epoch": 0.18, "learning_rate": 0.0001670846394984326, "loss": 1.551, "step": 115 }, { "epoch": 0.18, "learning_rate": 0.00016677115987460817, "loss": 1.4058, "step": 116 }, { "epoch": 0.18, "learning_rate": 0.0001664576802507837, "loss": 1.0475, "step": 117 }, { "epoch": 0.18, "learning_rate": 0.00016614420062695926, "loss": 1.7153, "step": 118 }, { "epoch": 0.18, "learning_rate": 0.0001658307210031348, "loss": 1.6289, "step": 119 }, { "epoch": 0.19, "learning_rate": 0.00016551724137931035, "loss": 1.2282, "step": 120 }, { "epoch": 0.19, "eval_loss": 1.4044392108917236, "eval_runtime": 866.8231, "eval_samples_per_second": 1.159, "eval_steps_per_second": 1.159, "step": 120 }, { "epoch": 0.19, "learning_rate": 0.00016520376175548592, "loss": 1.6396, "step": 121 }, { "epoch": 0.19, "learning_rate": 0.00016489028213166143, "loss": 1.7545, "step": 122 }, { "epoch": 0.19, "learning_rate": 0.000164576802507837, "loss": 1.3772, "step": 123 }, { "epoch": 0.19, "learning_rate": 0.00016426332288401255, "loss": 1.2632, "step": 124 }, { "epoch": 0.19, "learning_rate": 0.0001639498432601881, "loss": 1.0961, "step": 125 }, { "epoch": 0.19, "learning_rate": 0.00016363636363636366, "loss": 1.228, "step": 126 }, { "epoch": 0.2, "learning_rate": 0.0001633228840125392, "loss": 1.8272, "step": 127 }, { "epoch": 0.2, "learning_rate": 0.00016300940438871475, "loss": 1.2743, "step": 128 }, { "epoch": 0.2, "learning_rate": 0.0001626959247648903, "loss": 1.3611, "step": 129 }, { "epoch": 0.2, "learning_rate": 0.00016238244514106583, "loss": 1.2495, "step": 130 }, { "epoch": 0.2, "learning_rate": 0.00016206896551724137, "loss": 1.2027, "step": 131 }, { "epoch": 0.2, "learning_rate": 0.00016175548589341694, "loss": 1.0558, "step": 132 }, { "epoch": 0.21, "learning_rate": 0.0001614420062695925, "loss": 1.1034, "step": 133 }, { "epoch": 0.21, "learning_rate": 0.00016112852664576803, "loss": 1.7864, "step": 134 }, { "epoch": 0.21, "learning_rate": 0.0001608150470219436, "loss": 1.3586, "step": 135 }, { "epoch": 0.21, "learning_rate": 0.00016050156739811912, "loss": 1.639, "step": 136 }, { "epoch": 0.21, "learning_rate": 0.0001601880877742947, "loss": 1.4719, "step": 137 }, { "epoch": 0.21, "learning_rate": 0.00015987460815047023, "loss": 1.7374, "step": 138 }, { "epoch": 0.21, "learning_rate": 0.00015956112852664577, "loss": 1.5463, "step": 139 }, { "epoch": 0.22, "learning_rate": 0.00015924764890282134, "loss": 1.2065, "step": 140 }, { "epoch": 0.22, "learning_rate": 0.00015893416927899686, "loss": 1.1328, "step": 141 }, { "epoch": 0.22, "learning_rate": 0.00015862068965517243, "loss": 1.2139, "step": 142 }, { "epoch": 0.22, "learning_rate": 0.00015830721003134797, "loss": 1.5045, "step": 143 }, { "epoch": 0.22, "learning_rate": 0.00015799373040752352, "loss": 1.676, "step": 144 }, { "epoch": 0.22, "learning_rate": 0.00015768025078369906, "loss": 1.3679, "step": 145 }, { "epoch": 0.23, "learning_rate": 0.0001573667711598746, "loss": 1.3023, "step": 146 }, { "epoch": 0.23, "learning_rate": 0.00015705329153605017, "loss": 0.6768, "step": 147 }, { "epoch": 0.23, "learning_rate": 0.00015673981191222572, "loss": 1.2916, "step": 148 }, { "epoch": 0.23, "learning_rate": 0.00015642633228840126, "loss": 1.343, "step": 149 }, { "epoch": 0.23, "learning_rate": 0.0001561128526645768, "loss": 1.1307, "step": 150 }, { "epoch": 0.23, "learning_rate": 0.00015579937304075237, "loss": 1.277, "step": 151 }, { "epoch": 0.23, "learning_rate": 0.0001554858934169279, "loss": 0.948, "step": 152 }, { "epoch": 0.24, "learning_rate": 0.00015517241379310346, "loss": 1.4819, "step": 153 }, { "epoch": 0.24, "learning_rate": 0.000154858934169279, "loss": 1.7333, "step": 154 }, { "epoch": 0.24, "learning_rate": 0.00015454545454545454, "loss": 1.302, "step": 155 }, { "epoch": 0.24, "learning_rate": 0.00015423197492163011, "loss": 1.3045, "step": 156 }, { "epoch": 0.24, "learning_rate": 0.00015391849529780563, "loss": 1.2773, "step": 157 }, { "epoch": 0.24, "learning_rate": 0.0001536050156739812, "loss": 0.4272, "step": 158 }, { "epoch": 0.25, "learning_rate": 0.00015329153605015674, "loss": 1.3533, "step": 159 }, { "epoch": 0.25, "learning_rate": 0.0001529780564263323, "loss": 1.2623, "step": 160 }, { "epoch": 0.25, "eval_loss": 1.399330735206604, "eval_runtime": 866.2143, "eval_samples_per_second": 1.16, "eval_steps_per_second": 1.16, "step": 160 }, { "epoch": 0.25, "learning_rate": 0.00015266457680250786, "loss": 1.2428, "step": 161 }, { "epoch": 0.25, "learning_rate": 0.0001523510971786834, "loss": 1.1193, "step": 162 }, { "epoch": 0.25, "learning_rate": 0.00015203761755485894, "loss": 1.7799, "step": 163 }, { "epoch": 0.25, "learning_rate": 0.00015172413793103449, "loss": 1.8581, "step": 164 }, { "epoch": 0.25, "learning_rate": 0.00015141065830721003, "loss": 1.3594, "step": 165 }, { "epoch": 0.26, "learning_rate": 0.00015109717868338557, "loss": 1.1355, "step": 166 }, { "epoch": 0.26, "learning_rate": 0.00015078369905956114, "loss": 1.2911, "step": 167 }, { "epoch": 0.26, "learning_rate": 0.00015047021943573669, "loss": 1.1215, "step": 168 }, { "epoch": 0.26, "learning_rate": 0.00015015673981191223, "loss": 1.3987, "step": 169 }, { "epoch": 0.26, "learning_rate": 0.0001498432601880878, "loss": 1.4536, "step": 170 }, { "epoch": 0.26, "learning_rate": 0.00014952978056426332, "loss": 1.3232, "step": 171 }, { "epoch": 0.27, "learning_rate": 0.00014921630094043889, "loss": 1.2199, "step": 172 }, { "epoch": 0.27, "learning_rate": 0.00014890282131661443, "loss": 1.5375, "step": 173 }, { "epoch": 0.27, "learning_rate": 0.00014858934169278997, "loss": 1.6982, "step": 174 }, { "epoch": 0.27, "learning_rate": 0.00014827586206896554, "loss": 1.6182, "step": 175 }, { "epoch": 0.27, "learning_rate": 0.00014796238244514106, "loss": 1.3373, "step": 176 }, { "epoch": 0.27, "learning_rate": 0.00014764890282131663, "loss": 1.4321, "step": 177 }, { "epoch": 0.27, "learning_rate": 0.00014733542319749217, "loss": 1.2147, "step": 178 }, { "epoch": 0.28, "learning_rate": 0.00014702194357366771, "loss": 1.6977, "step": 179 }, { "epoch": 0.28, "learning_rate": 0.00014670846394984328, "loss": 1.562, "step": 180 }, { "epoch": 0.28, "learning_rate": 0.0001463949843260188, "loss": 1.3612, "step": 181 }, { "epoch": 0.28, "learning_rate": 0.00014608150470219437, "loss": 1.4421, "step": 182 }, { "epoch": 0.28, "learning_rate": 0.0001457680250783699, "loss": 1.201, "step": 183 }, { "epoch": 0.28, "learning_rate": 0.00014545454545454546, "loss": 1.5015, "step": 184 }, { "epoch": 0.29, "learning_rate": 0.000145141065830721, "loss": 1.8102, "step": 185 }, { "epoch": 0.29, "learning_rate": 0.00014482758620689657, "loss": 1.1975, "step": 186 }, { "epoch": 0.29, "learning_rate": 0.0001445141065830721, "loss": 1.2328, "step": 187 }, { "epoch": 0.29, "learning_rate": 0.00014420062695924766, "loss": 1.8503, "step": 188 }, { "epoch": 0.29, "learning_rate": 0.0001438871473354232, "loss": 0.9917, "step": 189 }, { "epoch": 0.29, "learning_rate": 0.00014357366771159874, "loss": 1.1511, "step": 190 }, { "epoch": 0.29, "learning_rate": 0.0001432601880877743, "loss": 1.4227, "step": 191 }, { "epoch": 0.3, "learning_rate": 0.00014294670846394983, "loss": 1.5738, "step": 192 }, { "epoch": 0.3, "learning_rate": 0.0001426332288401254, "loss": 1.6898, "step": 193 }, { "epoch": 0.3, "learning_rate": 0.00014231974921630097, "loss": 1.2109, "step": 194 }, { "epoch": 0.3, "learning_rate": 0.00014200626959247648, "loss": 1.5157, "step": 195 }, { "epoch": 0.3, "learning_rate": 0.00014169278996865206, "loss": 1.4976, "step": 196 }, { "epoch": 0.3, "learning_rate": 0.0001413793103448276, "loss": 1.9071, "step": 197 }, { "epoch": 0.31, "learning_rate": 0.00014106583072100314, "loss": 1.436, "step": 198 }, { "epoch": 0.31, "learning_rate": 0.00014075235109717868, "loss": 1.4321, "step": 199 }, { "epoch": 0.31, "learning_rate": 0.00014043887147335423, "loss": 1.5108, "step": 200 }, { "epoch": 0.31, "eval_loss": 1.3920130729675293, "eval_runtime": 866.972, "eval_samples_per_second": 1.159, "eval_steps_per_second": 1.159, "step": 200 }, { "epoch": 0.31, "learning_rate": 0.0001401253918495298, "loss": 1.2756, "step": 201 }, { "epoch": 0.31, "learning_rate": 0.00013981191222570534, "loss": 1.4882, "step": 202 }, { "epoch": 0.31, "learning_rate": 0.00013949843260188088, "loss": 1.484, "step": 203 }, { "epoch": 0.31, "learning_rate": 0.00013918495297805643, "loss": 1.6371, "step": 204 }, { "epoch": 0.32, "learning_rate": 0.000138871473354232, "loss": 0.6986, "step": 205 }, { "epoch": 0.32, "learning_rate": 0.0001385579937304075, "loss": 1.469, "step": 206 }, { "epoch": 0.32, "learning_rate": 0.00013824451410658308, "loss": 1.4841, "step": 207 }, { "epoch": 0.32, "learning_rate": 0.00013793103448275863, "loss": 1.06, "step": 208 }, { "epoch": 0.32, "learning_rate": 0.00013761755485893417, "loss": 1.7007, "step": 209 }, { "epoch": 0.32, "learning_rate": 0.00013730407523510974, "loss": 1.5835, "step": 210 }, { "epoch": 0.33, "learning_rate": 0.00013699059561128526, "loss": 1.7115, "step": 211 }, { "epoch": 0.33, "learning_rate": 0.00013667711598746083, "loss": 1.1916, "step": 212 }, { "epoch": 0.33, "learning_rate": 0.00013636363636363637, "loss": 1.4264, "step": 213 }, { "epoch": 0.33, "learning_rate": 0.0001360501567398119, "loss": 1.6938, "step": 214 }, { "epoch": 0.33, "learning_rate": 0.00013573667711598748, "loss": 1.1683, "step": 215 }, { "epoch": 0.33, "learning_rate": 0.00013542319749216303, "loss": 1.6196, "step": 216 }, { "epoch": 0.33, "learning_rate": 0.00013510971786833857, "loss": 1.1304, "step": 217 }, { "epoch": 0.34, "learning_rate": 0.0001347962382445141, "loss": 0.9536, "step": 218 }, { "epoch": 0.34, "learning_rate": 0.00013448275862068965, "loss": 1.4981, "step": 219 }, { "epoch": 0.34, "learning_rate": 0.0001341692789968652, "loss": 1.5185, "step": 220 }, { "epoch": 0.34, "learning_rate": 0.00013385579937304077, "loss": 1.2294, "step": 221 }, { "epoch": 0.34, "learning_rate": 0.0001335423197492163, "loss": 1.7871, "step": 222 }, { "epoch": 0.34, "learning_rate": 0.00013322884012539185, "loss": 1.2107, "step": 223 }, { "epoch": 0.35, "learning_rate": 0.0001329153605015674, "loss": 1.4636, "step": 224 }, { "epoch": 0.35, "learning_rate": 0.00013260188087774294, "loss": 1.1326, "step": 225 }, { "epoch": 0.35, "learning_rate": 0.0001322884012539185, "loss": 1.4011, "step": 226 }, { "epoch": 0.35, "learning_rate": 0.00013197492163009403, "loss": 1.4236, "step": 227 }, { "epoch": 0.35, "learning_rate": 0.0001316614420062696, "loss": 1.35, "step": 228 }, { "epoch": 0.35, "learning_rate": 0.00013134796238244517, "loss": 1.3837, "step": 229 }, { "epoch": 0.35, "learning_rate": 0.00013103448275862068, "loss": 1.1546, "step": 230 }, { "epoch": 0.36, "learning_rate": 0.00013072100313479625, "loss": 1.4603, "step": 231 }, { "epoch": 0.36, "learning_rate": 0.0001304075235109718, "loss": 1.4694, "step": 232 }, { "epoch": 0.36, "learning_rate": 0.00013009404388714734, "loss": 0.9677, "step": 233 }, { "epoch": 0.36, "learning_rate": 0.0001297805642633229, "loss": 1.4057, "step": 234 }, { "epoch": 0.36, "learning_rate": 0.00012946708463949843, "loss": 1.5458, "step": 235 }, { "epoch": 0.36, "learning_rate": 0.000129153605015674, "loss": 1.4101, "step": 236 }, { "epoch": 0.37, "learning_rate": 0.00012884012539184954, "loss": 1.4087, "step": 237 }, { "epoch": 0.37, "learning_rate": 0.00012852664576802508, "loss": 1.2849, "step": 238 }, { "epoch": 0.37, "learning_rate": 0.00012821316614420062, "loss": 1.73, "step": 239 }, { "epoch": 0.37, "learning_rate": 0.0001278996865203762, "loss": 1.4046, "step": 240 }, { "epoch": 0.37, "eval_loss": 1.386866569519043, "eval_runtime": 866.0039, "eval_samples_per_second": 1.161, "eval_steps_per_second": 1.161, "step": 240 } ], "logging_steps": 1, "max_steps": 648, "num_train_epochs": 1, "save_steps": 40, "total_flos": 1.1654508112084992e+16, "trial_name": null, "trial_params": null }