{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15840619002650258, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.7142857142857135e-05, "loss": 1.9649, "step": 2 }, { "epoch": 0.0, "learning_rate": 0.00011428571428571427, "loss": 2.0221, "step": 4 }, { "epoch": 0.0, "learning_rate": 0.0001714285714285714, "loss": 1.9956, "step": 6 }, { "epoch": 0.0, "learning_rate": 0.00022857142857142854, "loss": 1.9373, "step": 8 }, { "epoch": 0.0, "learning_rate": 0.0002857142857142857, "loss": 2.0042, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0003428571428571428, "loss": 1.9593, "step": 12 }, { "epoch": 0.01, "learning_rate": 0.00039999999999999996, "loss": 2.031, "step": 14 }, { "epoch": 0.01, "learning_rate": 0.0004571428571428571, "loss": 1.9644, "step": 16 }, { "epoch": 0.01, "learning_rate": 0.0005142857142857142, "loss": 1.9968, "step": 18 }, { "epoch": 0.01, "learning_rate": 0.0005714285714285714, "loss": 1.9694, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.0005999996407482917, "loss": 1.9885, "step": 22 }, { "epoch": 0.01, "learning_rate": 0.0005999967667397879, "loss": 1.9295, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.0005999910187503132, "loss": 1.9646, "step": 26 }, { "epoch": 0.01, "learning_rate": 0.0005999823968349338, "loss": 1.9615, "step": 28 }, { "epoch": 0.01, "learning_rate": 0.000599970901076248, "loss": 1.9456, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.0005999565315843857, "loss": 2.014, "step": 32 }, { "epoch": 0.02, "learning_rate": 0.0005999392884970068, "loss": 1.9118, "step": 34 }, { "epoch": 0.02, "learning_rate": 0.0005999191719793011, "loss": 1.944, "step": 36 }, { "epoch": 0.02, "learning_rate": 0.0005998961822239856, "loss": 1.9475, "step": 38 }, { "epoch": 0.02, "learning_rate": 0.000599870319451303, "loss": 2.038, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0005998415839090198, "loss": 1.9438, "step": 42 }, { "epoch": 0.02, "learning_rate": 0.0005998099758724235, "loss": 2.0804, "step": 44 }, { "epoch": 0.02, "learning_rate": 0.0005997754956443205, "loss": 1.9767, "step": 46 }, { "epoch": 0.02, "learning_rate": 0.0005997381435550326, "loss": 1.9322, "step": 48 }, { "epoch": 0.02, "learning_rate": 0.0005996979199623944, "loss": 1.9276, "step": 50 }, { "epoch": 0.03, "learning_rate": 0.0005996548252517495, "loss": 1.9933, "step": 52 }, { "epoch": 0.03, "learning_rate": 0.0005996088598359469, "loss": 1.8901, "step": 54 }, { "epoch": 0.03, "learning_rate": 0.0005995600241553371, "loss": 1.9472, "step": 56 }, { "epoch": 0.03, "learning_rate": 0.000599508318677768, "loss": 1.9855, "step": 58 }, { "epoch": 0.03, "learning_rate": 0.00059945374389858, "loss": 1.9887, "step": 60 }, { "epoch": 0.03, "learning_rate": 0.0005993963003406018, "loss": 1.9798, "step": 62 }, { "epoch": 0.03, "learning_rate": 0.0005993359885541448, "loss": 1.9956, "step": 64 }, { "epoch": 0.03, "learning_rate": 0.0005992728091169984, "loss": 1.9411, "step": 66 }, { "epoch": 0.03, "learning_rate": 0.0005992067626344242, "loss": 1.9722, "step": 68 }, { "epoch": 0.03, "learning_rate": 0.00059913784973915, "loss": 1.97, "step": 70 }, { "epoch": 0.04, "learning_rate": 0.0005990660710913641, "loss": 1.9612, "step": 72 }, { "epoch": 0.04, "learning_rate": 0.0005989914273787089, "loss": 1.8503, "step": 74 }, { "epoch": 0.04, "learning_rate": 0.0005989139193162741, "loss": 1.992, "step": 76 }, { "epoch": 0.04, "learning_rate": 0.00059883354764659, "loss": 1.9675, "step": 78 }, { "epoch": 0.04, "learning_rate": 0.0005987503131396204, "loss": 1.9609, "step": 80 }, { "epoch": 0.04, "learning_rate": 0.0005986642165927551, "loss": 2.0349, "step": 82 }, { "epoch": 0.04, "learning_rate": 0.0005985752588308026, "loss": 1.9824, "step": 84 }, { "epoch": 0.04, "learning_rate": 0.0005984834407059817, "loss": 1.9017, "step": 86 }, { "epoch": 0.04, "learning_rate": 0.0005983887630979137, "loss": 1.903, "step": 88 }, { "epoch": 0.04, "learning_rate": 0.000598291226913614, "loss": 1.9067, "step": 90 }, { "epoch": 0.04, "learning_rate": 0.000598190833087483, "loss": 1.941, "step": 92 }, { "epoch": 0.05, "learning_rate": 0.0005980875825812974, "loss": 1.9856, "step": 94 }, { "epoch": 0.05, "learning_rate": 0.0005979814763842014, "loss": 1.9555, "step": 96 }, { "epoch": 0.05, "learning_rate": 0.0005978725155126967, "loss": 1.9408, "step": 98 }, { "epoch": 0.05, "learning_rate": 0.0005977607010106324, "loss": 2.0131, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.0005976460339491963, "loss": 1.9499, "step": 102 }, { "epoch": 0.05, "learning_rate": 0.000597528515426903, "loss": 1.9381, "step": 104 }, { "epoch": 0.05, "learning_rate": 0.0005974081465695849, "loss": 1.9805, "step": 106 }, { "epoch": 0.05, "learning_rate": 0.0005972849285303804, "loss": 1.8787, "step": 108 }, { "epoch": 0.05, "learning_rate": 0.0005971588624897232, "loss": 1.8912, "step": 110 }, { "epoch": 0.05, "learning_rate": 0.0005970299496553309, "loss": 1.9536, "step": 112 }, { "epoch": 0.06, "learning_rate": 0.0005968981912621937, "loss": 1.9388, "step": 114 }, { "epoch": 0.06, "learning_rate": 0.0005967635885725623, "loss": 2.0041, "step": 116 }, { "epoch": 0.06, "learning_rate": 0.0005966261428759357, "loss": 1.9447, "step": 118 }, { "epoch": 0.06, "learning_rate": 0.0005964858554890492, "loss": 2.0031, "step": 120 }, { "epoch": 0.06, "learning_rate": 0.0005963427277558616, "loss": 1.9063, "step": 122 }, { "epoch": 0.06, "learning_rate": 0.0005961967610475422, "loss": 1.9492, "step": 124 }, { "epoch": 0.06, "learning_rate": 0.0005960479567624578, "loss": 1.9956, "step": 126 }, { "epoch": 0.06, "learning_rate": 0.0005958963163261595, "loss": 1.9329, "step": 128 }, { "epoch": 0.06, "learning_rate": 0.0005957418411913688, "loss": 1.9424, "step": 130 }, { "epoch": 0.06, "learning_rate": 0.0005955845328379636, "loss": 1.9105, "step": 132 }, { "epoch": 0.07, "learning_rate": 0.000595424392772964, "loss": 1.9439, "step": 134 }, { "epoch": 0.07, "learning_rate": 0.0005952614225305184, "loss": 1.9586, "step": 136 }, { "epoch": 0.07, "learning_rate": 0.0005950956236718882, "loss": 1.8851, "step": 138 }, { "epoch": 0.07, "learning_rate": 0.0005949269977854329, "loss": 1.9031, "step": 140 }, { "epoch": 0.07, "learning_rate": 0.0005947555464865954, "loss": 1.9294, "step": 142 }, { "epoch": 0.07, "learning_rate": 0.000594581271417886, "loss": 1.9779, "step": 144 }, { "epoch": 0.07, "learning_rate": 0.0005944041742488665, "loss": 1.9515, "step": 146 }, { "epoch": 0.07, "learning_rate": 0.0005942242566761351, "loss": 1.9249, "step": 148 }, { "epoch": 0.07, "learning_rate": 0.0005940415204233092, "loss": 1.9104, "step": 150 }, { "epoch": 0.07, "learning_rate": 0.0005938559672410093, "loss": 1.9548, "step": 152 }, { "epoch": 0.08, "learning_rate": 0.0005936675989068425, "loss": 1.9314, "step": 154 }, { "epoch": 0.08, "learning_rate": 0.0005934764172253849, "loss": 1.9468, "step": 156 }, { "epoch": 0.08, "learning_rate": 0.0005932824240281645, "loss": 1.9821, "step": 158 }, { "epoch": 0.08, "learning_rate": 0.0005930856211736438, "loss": 1.9609, "step": 160 }, { "epoch": 0.08, "learning_rate": 0.0005928860105472022, "loss": 1.9261, "step": 162 }, { "epoch": 0.08, "learning_rate": 0.0005926835940611172, "loss": 1.9594, "step": 164 }, { "epoch": 0.08, "learning_rate": 0.000592478373654547, "loss": 1.8914, "step": 166 }, { "epoch": 0.08, "learning_rate": 0.0005922703512935113, "loss": 1.9509, "step": 168 }, { "epoch": 0.08, "learning_rate": 0.0005920595289708723, "loss": 1.9988, "step": 170 }, { "epoch": 0.08, "learning_rate": 0.0005918459087063165, "loss": 1.9886, "step": 172 }, { "epoch": 0.08, "learning_rate": 0.0005916294925463346, "loss": 2.0024, "step": 174 }, { "epoch": 0.09, "learning_rate": 0.0005914102825642018, "loss": 1.859, "step": 176 }, { "epoch": 0.09, "learning_rate": 0.0005911882808599586, "loss": 1.9439, "step": 178 }, { "epoch": 0.09, "learning_rate": 0.0005909634895603902, "loss": 1.9823, "step": 180 }, { "epoch": 0.09, "learning_rate": 0.000590735910819006, "loss": 1.9308, "step": 182 }, { "epoch": 0.09, "learning_rate": 0.0005905055468160197, "loss": 1.9459, "step": 184 }, { "epoch": 0.09, "learning_rate": 0.0005902723997583274, "loss": 1.9146, "step": 186 }, { "epoch": 0.09, "learning_rate": 0.0005900364718794873, "loss": 1.9036, "step": 188 }, { "epoch": 0.09, "learning_rate": 0.0005897977654396977, "loss": 1.9035, "step": 190 }, { "epoch": 0.09, "learning_rate": 0.000589556282725776, "loss": 1.9607, "step": 192 }, { "epoch": 0.09, "learning_rate": 0.0005893120260511362, "loss": 2.0468, "step": 194 }, { "epoch": 0.1, "learning_rate": 0.0005890649977557668, "loss": 1.9687, "step": 196 }, { "epoch": 0.1, "learning_rate": 0.0005888152002062089, "loss": 1.9958, "step": 198 }, { "epoch": 0.1, "learning_rate": 0.0005885626357955329, "loss": 1.9025, "step": 200 }, { "epoch": 0.1, "learning_rate": 0.0005883073069433159, "loss": 1.9077, "step": 202 }, { "epoch": 0.1, "learning_rate": 0.0005880492160956185, "loss": 1.9494, "step": 204 }, { "epoch": 0.1, "learning_rate": 0.0005877883657249612, "loss": 1.8716, "step": 206 }, { "epoch": 0.1, "learning_rate": 0.000587524758330301, "loss": 1.9075, "step": 208 }, { "epoch": 0.1, "learning_rate": 0.0005872583964370073, "loss": 1.9406, "step": 210 }, { "epoch": 0.1, "learning_rate": 0.0005869892825968375, "loss": 1.9179, "step": 212 }, { "epoch": 0.1, "learning_rate": 0.0005867174193879131, "loss": 1.9702, "step": 214 }, { "epoch": 0.11, "learning_rate": 0.0005864428094146943, "loss": 1.9297, "step": 216 }, { "epoch": 0.11, "learning_rate": 0.0005861654553079557, "loss": 1.8467, "step": 218 }, { "epoch": 0.11, "learning_rate": 0.0005858853597247606, "loss": 1.9145, "step": 220 }, { "epoch": 0.11, "learning_rate": 0.0005856025253484358, "loss": 1.944, "step": 222 }, { "epoch": 0.11, "learning_rate": 0.0005853169548885461, "loss": 1.9321, "step": 224 }, { "epoch": 0.11, "learning_rate": 0.0005850286510808675, "loss": 1.9838, "step": 226 }, { "epoch": 0.11, "learning_rate": 0.0005847376166873624, "loss": 1.9891, "step": 228 }, { "epoch": 0.11, "learning_rate": 0.0005844438544961515, "loss": 1.9384, "step": 230 }, { "epoch": 0.11, "learning_rate": 0.0005841473673214886, "loss": 1.8826, "step": 232 }, { "epoch": 0.11, "learning_rate": 0.0005838481580037324, "loss": 1.8983, "step": 234 }, { "epoch": 0.12, "learning_rate": 0.0005835462294093202, "loss": 1.8804, "step": 236 }, { "epoch": 0.12, "learning_rate": 0.00058324158443074, "loss": 1.8997, "step": 238 }, { "epoch": 0.12, "learning_rate": 0.0005829342259865026, "loss": 1.9478, "step": 240 }, { "epoch": 0.12, "learning_rate": 0.0005826241570211144, "loss": 1.9727, "step": 242 }, { "epoch": 0.12, "learning_rate": 0.0005823113805050482, "loss": 1.9216, "step": 244 }, { "epoch": 0.12, "learning_rate": 0.0005819958994347157, "loss": 1.9208, "step": 246 }, { "epoch": 0.12, "learning_rate": 0.000581677716832438, "loss": 1.9201, "step": 248 }, { "epoch": 0.12, "learning_rate": 0.0005813568357464172, "loss": 1.869, "step": 250 }, { "epoch": 0.12, "learning_rate": 0.0005810332592507066, "loss": 1.9111, "step": 252 }, { "epoch": 0.12, "learning_rate": 0.0005807069904451822, "loss": 1.8696, "step": 254 }, { "epoch": 0.12, "learning_rate": 0.0005803780324555121, "loss": 1.8946, "step": 256 }, { "epoch": 0.13, "learning_rate": 0.0005800463884331269, "loss": 1.9641, "step": 258 }, { "epoch": 0.13, "learning_rate": 0.0005797120615551896, "loss": 1.8923, "step": 260 }, { "epoch": 0.13, "learning_rate": 0.0005793750550245648, "loss": 1.8612, "step": 262 }, { "epoch": 0.13, "learning_rate": 0.0005790353720697887, "loss": 1.927, "step": 264 }, { "epoch": 0.13, "learning_rate": 0.0005786930159450374, "loss": 1.9709, "step": 266 }, { "epoch": 0.13, "learning_rate": 0.0005783479899300962, "loss": 1.9665, "step": 268 }, { "epoch": 0.13, "learning_rate": 0.0005780002973303283, "loss": 1.8657, "step": 270 }, { "epoch": 0.13, "learning_rate": 0.0005776499414766424, "loss": 2.0055, "step": 272 }, { "epoch": 0.13, "learning_rate": 0.0005772969257254615, "loss": 1.9147, "step": 274 }, { "epoch": 0.13, "learning_rate": 0.0005769412534586908, "loss": 1.9383, "step": 276 }, { "epoch": 0.14, "learning_rate": 0.0005765829280836846, "loss": 1.9575, "step": 278 }, { "epoch": 0.14, "learning_rate": 0.0005762219530332142, "loss": 1.9192, "step": 280 }, { "epoch": 0.14, "learning_rate": 0.0005758583317654352, "loss": 1.8842, "step": 282 }, { "epoch": 0.14, "learning_rate": 0.0005754920677638535, "loss": 1.9905, "step": 284 }, { "epoch": 0.14, "learning_rate": 0.000575123164537293, "loss": 1.9686, "step": 286 }, { "epoch": 0.14, "learning_rate": 0.0005747516256198616, "loss": 2.0003, "step": 288 }, { "epoch": 0.14, "learning_rate": 0.0005743774545709163, "loss": 1.9195, "step": 290 }, { "epoch": 0.14, "learning_rate": 0.000574000654975031, "loss": 1.8899, "step": 292 }, { "epoch": 0.14, "learning_rate": 0.0005736212304419609, "loss": 1.9143, "step": 294 }, { "epoch": 0.14, "learning_rate": 0.000573239184606608, "loss": 1.8431, "step": 296 }, { "epoch": 0.15, "learning_rate": 0.0005728545211289866, "loss": 1.8978, "step": 298 }, { "epoch": 0.15, "learning_rate": 0.0005724672436941882, "loss": 1.9017, "step": 300 }, { "epoch": 0.15, "learning_rate": 0.0005720773560123461, "loss": 1.8912, "step": 302 }, { "epoch": 0.15, "learning_rate": 0.0005716848618185996, "loss": 1.9412, "step": 304 }, { "epoch": 0.15, "learning_rate": 0.000571289764873059, "loss": 1.8843, "step": 306 }, { "epoch": 0.15, "learning_rate": 0.0005708920689607684, "loss": 1.8971, "step": 308 }, { "epoch": 0.15, "learning_rate": 0.0005704917778916709, "loss": 1.9243, "step": 310 }, { "epoch": 0.15, "learning_rate": 0.0005700888955005706, "loss": 1.8342, "step": 312 }, { "epoch": 0.15, "learning_rate": 0.000569683425647097, "loss": 1.8725, "step": 314 }, { "epoch": 0.15, "learning_rate": 0.0005692753722156673, "loss": 1.8597, "step": 316 }, { "epoch": 0.15, "learning_rate": 0.0005688647391154496, "loss": 1.954, "step": 318 }, { "epoch": 0.16, "learning_rate": 0.0005684515302803256, "loss": 1.9454, "step": 320 }, { "epoch": 0.16, "learning_rate": 0.000568035749668852, "loss": 1.9336, "step": 322 }, { "epoch": 0.16, "learning_rate": 0.000567617401264224, "loss": 1.96, "step": 324 } ], "max_steps": 2051, "num_train_epochs": 1, "total_flos": 4.557933379584e+17, "trial_name": null, "trial_params": null }