{ "best_metric": 0.4452793300151825, "best_model_checkpoint": "./models/adapters_mlm_cn/bg/checkpoint-36000", "epoch": 11.022657685241887, "eval_steps": 500, "global_step": 36000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 4.9500000000000004e-05, "loss": 1.5057, "step": 500 }, { "epoch": 0.15, "eval_accuracy": 0.8148620791204476, "eval_loss": 0.9846400618553162, "eval_runtime": 7.7173, "eval_samples_per_second": 752.339, "eval_steps_per_second": 47.037, "step": 500 }, { "epoch": 0.31, "learning_rate": 4.9e-05, "loss": 1.0172, "step": 1000 }, { "epoch": 0.31, "eval_accuracy": 0.82587890625, "eval_loss": 0.8394753932952881, "eval_runtime": 7.7319, "eval_samples_per_second": 750.918, "eval_steps_per_second": 46.949, "step": 1000 }, { "epoch": 0.46, "learning_rate": 4.85e-05, "loss": 0.8814, "step": 1500 }, { "epoch": 0.46, "eval_accuracy": 0.8368038740920097, "eval_loss": 0.7822620272636414, "eval_runtime": 7.7294, "eval_samples_per_second": 751.162, "eval_steps_per_second": 46.964, "step": 1500 }, { "epoch": 0.61, "learning_rate": 4.8e-05, "loss": 0.8405, "step": 2000 }, { "epoch": 0.61, "eval_accuracy": 0.8449071800412533, "eval_loss": 0.7436666488647461, "eval_runtime": 7.9259, "eval_samples_per_second": 732.538, "eval_steps_per_second": 45.799, "step": 2000 }, { "epoch": 0.77, "learning_rate": 4.75e-05, "loss": 0.7773, "step": 2500 }, { "epoch": 0.77, "eval_accuracy": 0.8386841062227507, "eval_loss": 0.7246997952461243, "eval_runtime": 7.7331, "eval_samples_per_second": 750.803, "eval_steps_per_second": 46.941, "step": 2500 }, { "epoch": 0.92, "learning_rate": 4.7e-05, "loss": 0.7762, "step": 3000 }, { "epoch": 0.92, "eval_accuracy": 0.8513044340839202, "eval_loss": 0.6520901322364807, "eval_runtime": 7.9369, "eval_samples_per_second": 731.516, "eval_steps_per_second": 45.736, "step": 3000 }, { "epoch": 1.07, "learning_rate": 4.6500000000000005e-05, "loss": 0.7186, "step": 3500 }, { "epoch": 1.07, "eval_accuracy": 0.8492265517916585, "eval_loss": 0.6834315061569214, "eval_runtime": 7.6738, "eval_samples_per_second": 756.596, "eval_steps_per_second": 47.304, "step": 3500 }, { "epoch": 1.22, "learning_rate": 4.600000000000001e-05, "loss": 0.7033, "step": 4000 }, { "epoch": 1.22, "eval_accuracy": 0.852271607371637, "eval_loss": 0.67154860496521, "eval_runtime": 7.7294, "eval_samples_per_second": 751.163, "eval_steps_per_second": 46.964, "step": 4000 }, { "epoch": 1.38, "learning_rate": 4.55e-05, "loss": 0.672, "step": 4500 }, { "epoch": 1.38, "eval_accuracy": 0.855973974763407, "eval_loss": 0.6539207100868225, "eval_runtime": 7.7117, "eval_samples_per_second": 752.881, "eval_steps_per_second": 47.071, "step": 4500 }, { "epoch": 1.53, "learning_rate": 4.5e-05, "loss": 0.6613, "step": 5000 }, { "epoch": 1.53, "eval_accuracy": 0.8567085131424088, "eval_loss": 0.638721227645874, "eval_runtime": 7.6505, "eval_samples_per_second": 758.9, "eval_steps_per_second": 47.448, "step": 5000 }, { "epoch": 1.68, "learning_rate": 4.4500000000000004e-05, "loss": 0.6712, "step": 5500 }, { "epoch": 1.68, "eval_accuracy": 0.862372613040467, "eval_loss": 0.6180465221405029, "eval_runtime": 7.7012, "eval_samples_per_second": 753.913, "eval_steps_per_second": 47.136, "step": 5500 }, { "epoch": 1.84, "learning_rate": 4.4000000000000006e-05, "loss": 0.6776, "step": 6000 }, { "epoch": 1.84, "eval_accuracy": 0.8537038849202466, "eval_loss": 0.6634594202041626, "eval_runtime": 7.7042, "eval_samples_per_second": 753.61, "eval_steps_per_second": 47.117, "step": 6000 }, { "epoch": 1.99, "learning_rate": 4.35e-05, "loss": 0.6484, "step": 6500 }, { "epoch": 1.99, "eval_accuracy": 0.8661394258933802, "eval_loss": 0.5945894122123718, "eval_runtime": 7.6974, "eval_samples_per_second": 754.283, "eval_steps_per_second": 47.159, "step": 6500 }, { "epoch": 2.14, "learning_rate": 4.3e-05, "loss": 0.6817, "step": 7000 }, { "epoch": 2.14, "eval_accuracy": 0.8654563297350344, "eval_loss": 0.6126104593276978, "eval_runtime": 8.509, "eval_samples_per_second": 682.334, "eval_steps_per_second": 42.661, "step": 7000 }, { "epoch": 2.3, "learning_rate": 4.25e-05, "loss": 0.6392, "step": 7500 }, { "epoch": 2.3, "eval_accuracy": 0.8613216715257531, "eval_loss": 0.613590657711029, "eval_runtime": 8.1378, "eval_samples_per_second": 713.458, "eval_steps_per_second": 44.606, "step": 7500 }, { "epoch": 2.45, "learning_rate": 4.2e-05, "loss": 0.6394, "step": 8000 }, { "epoch": 2.45, "eval_accuracy": 0.8620723749258453, "eval_loss": 0.6320650577545166, "eval_runtime": 7.7697, "eval_samples_per_second": 747.26, "eval_steps_per_second": 46.72, "step": 8000 }, { "epoch": 2.6, "learning_rate": 4.15e-05, "loss": 0.6273, "step": 8500 }, { "epoch": 2.6, "eval_accuracy": 0.8629402009560043, "eval_loss": 0.5997043251991272, "eval_runtime": 7.9947, "eval_samples_per_second": 726.232, "eval_steps_per_second": 45.405, "step": 8500 }, { "epoch": 2.76, "learning_rate": 4.1e-05, "loss": 0.5993, "step": 9000 }, { "epoch": 2.76, "eval_accuracy": 0.8645569620253165, "eval_loss": 0.6027613282203674, "eval_runtime": 8.0195, "eval_samples_per_second": 723.989, "eval_steps_per_second": 45.265, "step": 9000 }, { "epoch": 2.91, "learning_rate": 4.05e-05, "loss": 0.6527, "step": 9500 }, { "epoch": 2.91, "eval_accuracy": 0.8510214250124564, "eval_loss": 0.6583752632141113, "eval_runtime": 7.9195, "eval_samples_per_second": 733.131, "eval_steps_per_second": 45.836, "step": 9500 }, { "epoch": 3.06, "learning_rate": 4e-05, "loss": 0.5897, "step": 10000 }, { "epoch": 3.06, "eval_accuracy": 0.8676120587068623, "eval_loss": 0.5727556943893433, "eval_runtime": 7.9746, "eval_samples_per_second": 728.065, "eval_steps_per_second": 45.52, "step": 10000 }, { "epoch": 3.21, "learning_rate": 3.9500000000000005e-05, "loss": 0.574, "step": 10500 }, { "epoch": 3.21, "eval_accuracy": 0.8670824400701618, "eval_loss": 0.5869864821434021, "eval_runtime": 7.8716, "eval_samples_per_second": 737.59, "eval_steps_per_second": 46.115, "step": 10500 }, { "epoch": 3.37, "learning_rate": 3.9000000000000006e-05, "loss": 0.6026, "step": 11000 }, { "epoch": 3.37, "eval_accuracy": 0.8676513458361675, "eval_loss": 0.6066599488258362, "eval_runtime": 7.8242, "eval_samples_per_second": 742.057, "eval_steps_per_second": 46.395, "step": 11000 }, { "epoch": 3.52, "learning_rate": 3.85e-05, "loss": 0.5896, "step": 11500 }, { "epoch": 3.52, "eval_accuracy": 0.8638327806250629, "eval_loss": 0.6000019311904907, "eval_runtime": 8.0139, "eval_samples_per_second": 724.49, "eval_steps_per_second": 45.296, "step": 11500 }, { "epoch": 3.67, "learning_rate": 3.8e-05, "loss": 0.566, "step": 12000 }, { "epoch": 3.67, "eval_accuracy": 0.8711821948164563, "eval_loss": 0.5566375851631165, "eval_runtime": 7.8868, "eval_samples_per_second": 736.17, "eval_steps_per_second": 46.026, "step": 12000 }, { "epoch": 3.83, "learning_rate": 3.7500000000000003e-05, "loss": 0.5928, "step": 12500 }, { "epoch": 3.83, "eval_accuracy": 0.8675352877307275, "eval_loss": 0.5621004700660706, "eval_runtime": 7.9912, "eval_samples_per_second": 726.553, "eval_steps_per_second": 45.425, "step": 12500 }, { "epoch": 3.98, "learning_rate": 3.7e-05, "loss": 0.597, "step": 13000 }, { "epoch": 3.98, "eval_accuracy": 0.8771320904403015, "eval_loss": 0.5161893963813782, "eval_runtime": 7.9771, "eval_samples_per_second": 727.833, "eval_steps_per_second": 45.505, "step": 13000 }, { "epoch": 4.13, "learning_rate": 3.65e-05, "loss": 0.5836, "step": 13500 }, { "epoch": 4.13, "eval_accuracy": 0.8696463654223968, "eval_loss": 0.5498046278953552, "eval_runtime": 7.8463, "eval_samples_per_second": 739.966, "eval_steps_per_second": 46.264, "step": 13500 }, { "epoch": 4.29, "learning_rate": 3.6e-05, "loss": 0.5864, "step": 14000 }, { "epoch": 4.29, "eval_accuracy": 0.8639773945240183, "eval_loss": 0.5728442072868347, "eval_runtime": 7.8404, "eval_samples_per_second": 740.524, "eval_steps_per_second": 46.299, "step": 14000 }, { "epoch": 4.44, "learning_rate": 3.55e-05, "loss": 0.5562, "step": 14500 }, { "epoch": 4.44, "eval_accuracy": 0.8623497479643273, "eval_loss": 0.6000498533248901, "eval_runtime": 7.8135, "eval_samples_per_second": 743.077, "eval_steps_per_second": 46.458, "step": 14500 }, { "epoch": 4.59, "learning_rate": 3.5e-05, "loss": 0.5999, "step": 15000 }, { "epoch": 4.59, "eval_accuracy": 0.8679152291769344, "eval_loss": 0.5589025020599365, "eval_runtime": 7.7959, "eval_samples_per_second": 744.749, "eval_steps_per_second": 46.563, "step": 15000 }, { "epoch": 4.75, "learning_rate": 3.45e-05, "loss": 0.5767, "step": 15500 }, { "epoch": 4.75, "eval_accuracy": 0.8680821783151479, "eval_loss": 0.5713112354278564, "eval_runtime": 8.9874, "eval_samples_per_second": 646.014, "eval_steps_per_second": 40.39, "step": 15500 }, { "epoch": 4.9, "learning_rate": 3.4000000000000007e-05, "loss": 0.5574, "step": 16000 }, { "epoch": 4.9, "eval_accuracy": 0.8739122026687295, "eval_loss": 0.5337920784950256, "eval_runtime": 10.8383, "eval_samples_per_second": 535.691, "eval_steps_per_second": 33.492, "step": 16000 }, { "epoch": 5.05, "learning_rate": 3.35e-05, "loss": 0.568, "step": 16500 }, { "epoch": 5.05, "eval_accuracy": 0.87250098000784, "eval_loss": 0.552727222442627, "eval_runtime": 7.8124, "eval_samples_per_second": 743.18, "eval_steps_per_second": 46.465, "step": 16500 }, { "epoch": 5.21, "learning_rate": 3.3e-05, "loss": 0.5568, "step": 17000 }, { "epoch": 5.21, "eval_accuracy": 0.8776927722971612, "eval_loss": 0.5058096051216125, "eval_runtime": 7.8143, "eval_samples_per_second": 742.993, "eval_steps_per_second": 46.453, "step": 17000 }, { "epoch": 5.36, "learning_rate": 3.2500000000000004e-05, "loss": 0.5369, "step": 17500 }, { "epoch": 5.36, "eval_accuracy": 0.8719769673704415, "eval_loss": 0.5599194169044495, "eval_runtime": 7.8287, "eval_samples_per_second": 741.628, "eval_steps_per_second": 46.368, "step": 17500 }, { "epoch": 5.51, "learning_rate": 3.2000000000000005e-05, "loss": 0.518, "step": 18000 }, { "epoch": 5.51, "eval_accuracy": 0.8720388349514563, "eval_loss": 0.561033308506012, "eval_runtime": 7.8241, "eval_samples_per_second": 742.071, "eval_steps_per_second": 46.395, "step": 18000 }, { "epoch": 5.66, "learning_rate": 3.15e-05, "loss": 0.5637, "step": 18500 }, { "epoch": 5.66, "eval_accuracy": 0.8727518855153742, "eval_loss": 0.5467284917831421, "eval_runtime": 8.0155, "eval_samples_per_second": 724.347, "eval_steps_per_second": 45.287, "step": 18500 }, { "epoch": 5.82, "learning_rate": 3.1e-05, "loss": 0.557, "step": 19000 }, { "epoch": 5.82, "eval_accuracy": 0.8713813872158539, "eval_loss": 0.5348953604698181, "eval_runtime": 8.0121, "eval_samples_per_second": 724.653, "eval_steps_per_second": 45.306, "step": 19000 }, { "epoch": 5.97, "learning_rate": 3.05e-05, "loss": 0.5499, "step": 19500 }, { "epoch": 5.97, "eval_accuracy": 0.8724001160878398, "eval_loss": 0.5467893481254578, "eval_runtime": 7.7511, "eval_samples_per_second": 749.05, "eval_steps_per_second": 46.832, "step": 19500 }, { "epoch": 6.12, "learning_rate": 3e-05, "loss": 0.5304, "step": 20000 }, { "epoch": 6.12, "eval_accuracy": 0.8740521910388971, "eval_loss": 0.5243064761161804, "eval_runtime": 7.8201, "eval_samples_per_second": 742.45, "eval_steps_per_second": 46.419, "step": 20000 }, { "epoch": 6.28, "learning_rate": 2.95e-05, "loss": 0.5431, "step": 20500 }, { "epoch": 6.28, "eval_accuracy": 0.8783942176206291, "eval_loss": 0.4997641146183014, "eval_runtime": 8.0018, "eval_samples_per_second": 725.585, "eval_steps_per_second": 45.365, "step": 20500 }, { "epoch": 6.43, "learning_rate": 2.9e-05, "loss": 0.5508, "step": 21000 }, { "epoch": 6.43, "eval_accuracy": 0.8763812154696132, "eval_loss": 0.5366745591163635, "eval_runtime": 7.8074, "eval_samples_per_second": 743.654, "eval_steps_per_second": 46.494, "step": 21000 }, { "epoch": 6.58, "learning_rate": 2.8499999999999998e-05, "loss": 0.5701, "step": 21500 }, { "epoch": 6.58, "eval_accuracy": 0.8734250823803063, "eval_loss": 0.5364522337913513, "eval_runtime": 7.9868, "eval_samples_per_second": 726.947, "eval_steps_per_second": 45.45, "step": 21500 }, { "epoch": 6.74, "learning_rate": 2.8000000000000003e-05, "loss": 0.521, "step": 22000 }, { "epoch": 6.74, "eval_accuracy": 0.8818635607321131, "eval_loss": 0.4879148006439209, "eval_runtime": 7.9938, "eval_samples_per_second": 726.31, "eval_steps_per_second": 45.41, "step": 22000 }, { "epoch": 6.89, "learning_rate": 2.7500000000000004e-05, "loss": 0.5514, "step": 22500 }, { "epoch": 6.89, "eval_accuracy": 0.8786950074147306, "eval_loss": 0.5105842351913452, "eval_runtime": 7.8325, "eval_samples_per_second": 741.269, "eval_steps_per_second": 46.345, "step": 22500 }, { "epoch": 7.04, "learning_rate": 2.7000000000000002e-05, "loss": 0.547, "step": 23000 }, { "epoch": 7.04, "eval_accuracy": 0.8747058823529412, "eval_loss": 0.5258113741874695, "eval_runtime": 7.8237, "eval_samples_per_second": 742.1, "eval_steps_per_second": 46.397, "step": 23000 }, { "epoch": 7.2, "learning_rate": 2.6500000000000004e-05, "loss": 0.5512, "step": 23500 }, { "epoch": 7.2, "eval_accuracy": 0.877830692973078, "eval_loss": 0.49750423431396484, "eval_runtime": 7.9086, "eval_samples_per_second": 734.135, "eval_steps_per_second": 45.899, "step": 23500 }, { "epoch": 7.35, "learning_rate": 2.6000000000000002e-05, "loss": 0.5407, "step": 24000 }, { "epoch": 7.35, "eval_accuracy": 0.8785601265822784, "eval_loss": 0.494391530752182, "eval_runtime": 8.2168, "eval_samples_per_second": 706.599, "eval_steps_per_second": 44.178, "step": 24000 }, { "epoch": 7.5, "learning_rate": 2.5500000000000003e-05, "loss": 0.5181, "step": 24500 }, { "epoch": 7.5, "eval_accuracy": 0.8794734275962945, "eval_loss": 0.4911736845970154, "eval_runtime": 8.2044, "eval_samples_per_second": 707.673, "eval_steps_per_second": 44.245, "step": 24500 }, { "epoch": 7.65, "learning_rate": 2.5e-05, "loss": 0.5493, "step": 25000 }, { "epoch": 7.65, "eval_accuracy": 0.87302207462395, "eval_loss": 0.5187950730323792, "eval_runtime": 8.0486, "eval_samples_per_second": 721.366, "eval_steps_per_second": 45.101, "step": 25000 }, { "epoch": 7.81, "learning_rate": 2.45e-05, "loss": 0.5388, "step": 25500 }, { "epoch": 7.81, "eval_accuracy": 0.8831105473704751, "eval_loss": 0.5000073313713074, "eval_runtime": 8.0362, "eval_samples_per_second": 722.481, "eval_steps_per_second": 45.171, "step": 25500 }, { "epoch": 7.96, "learning_rate": 2.4e-05, "loss": 0.5284, "step": 26000 }, { "epoch": 7.96, "eval_accuracy": 0.8737309019221291, "eval_loss": 0.5161213278770447, "eval_runtime": 8.1271, "eval_samples_per_second": 714.401, "eval_steps_per_second": 44.665, "step": 26000 }, { "epoch": 8.11, "learning_rate": 2.35e-05, "loss": 0.5116, "step": 26500 }, { "epoch": 8.11, "eval_accuracy": 0.8759842519685039, "eval_loss": 0.5262829065322876, "eval_runtime": 8.1731, "eval_samples_per_second": 710.381, "eval_steps_per_second": 44.414, "step": 26500 }, { "epoch": 8.27, "learning_rate": 2.3000000000000003e-05, "loss": 0.5161, "step": 27000 }, { "epoch": 8.27, "eval_accuracy": 0.8786888397577621, "eval_loss": 0.500228762626648, "eval_runtime": 8.034, "eval_samples_per_second": 722.681, "eval_steps_per_second": 45.183, "step": 27000 }, { "epoch": 8.42, "learning_rate": 2.25e-05, "loss": 0.5185, "step": 27500 }, { "epoch": 8.42, "eval_accuracy": 0.8744550138723741, "eval_loss": 0.5127227902412415, "eval_runtime": 7.7182, "eval_samples_per_second": 752.252, "eval_steps_per_second": 47.032, "step": 27500 }, { "epoch": 8.57, "learning_rate": 2.2000000000000003e-05, "loss": 0.5291, "step": 28000 }, { "epoch": 8.57, "eval_accuracy": 0.8782496527088708, "eval_loss": 0.5115563273429871, "eval_runtime": 8.0802, "eval_samples_per_second": 718.543, "eval_steps_per_second": 44.924, "step": 28000 }, { "epoch": 8.73, "learning_rate": 2.15e-05, "loss": 0.5061, "step": 28500 }, { "epoch": 8.73, "eval_accuracy": 0.8773942634905202, "eval_loss": 0.4972003996372223, "eval_runtime": 7.7937, "eval_samples_per_second": 744.959, "eval_steps_per_second": 46.576, "step": 28500 }, { "epoch": 8.88, "learning_rate": 2.1e-05, "loss": 0.479, "step": 29000 }, { "epoch": 8.88, "eval_accuracy": 0.8797814207650273, "eval_loss": 0.49780747294425964, "eval_runtime": 7.8838, "eval_samples_per_second": 736.449, "eval_steps_per_second": 46.044, "step": 29000 }, { "epoch": 9.03, "learning_rate": 2.05e-05, "loss": 0.5154, "step": 29500 }, { "epoch": 9.03, "eval_accuracy": 0.877119904790241, "eval_loss": 0.5088150501251221, "eval_runtime": 7.7019, "eval_samples_per_second": 753.843, "eval_steps_per_second": 47.131, "step": 29500 }, { "epoch": 9.19, "learning_rate": 2e-05, "loss": 0.4989, "step": 30000 }, { "epoch": 9.19, "eval_accuracy": 0.8744332741967278, "eval_loss": 0.5118668079376221, "eval_runtime": 7.7316, "eval_samples_per_second": 750.942, "eval_steps_per_second": 46.95, "step": 30000 }, { "epoch": 9.34, "learning_rate": 1.9500000000000003e-05, "loss": 0.5098, "step": 30500 }, { "epoch": 9.34, "eval_accuracy": 0.8825979176995538, "eval_loss": 0.4915599524974823, "eval_runtime": 7.7036, "eval_samples_per_second": 753.676, "eval_steps_per_second": 47.121, "step": 30500 }, { "epoch": 9.49, "learning_rate": 1.9e-05, "loss": 0.4777, "step": 31000 }, { "epoch": 9.49, "eval_accuracy": 0.882445449184195, "eval_loss": 0.49568039178848267, "eval_runtime": 7.7025, "eval_samples_per_second": 753.779, "eval_steps_per_second": 47.127, "step": 31000 }, { "epoch": 9.64, "learning_rate": 1.85e-05, "loss": 0.5462, "step": 31500 }, { "epoch": 9.64, "eval_accuracy": 0.8778625954198473, "eval_loss": 0.48457399010658264, "eval_runtime": 7.7115, "eval_samples_per_second": 752.903, "eval_steps_per_second": 47.073, "step": 31500 }, { "epoch": 9.8, "learning_rate": 1.8e-05, "loss": 0.509, "step": 32000 }, { "epoch": 9.8, "eval_accuracy": 0.8810146190337884, "eval_loss": 0.48734790086746216, "eval_runtime": 7.7302, "eval_samples_per_second": 751.078, "eval_steps_per_second": 46.959, "step": 32000 }, { "epoch": 9.95, "learning_rate": 1.75e-05, "loss": 0.5181, "step": 32500 }, { "epoch": 9.95, "eval_accuracy": 0.8710217755443886, "eval_loss": 0.5227355360984802, "eval_runtime": 7.7073, "eval_samples_per_second": 753.31, "eval_steps_per_second": 47.098, "step": 32500 }, { "epoch": 10.1, "learning_rate": 1.7000000000000003e-05, "loss": 0.5269, "step": 33000 }, { "epoch": 10.1, "eval_accuracy": 0.8802636757182212, "eval_loss": 0.49287834763526917, "eval_runtime": 8.3473, "eval_samples_per_second": 695.551, "eval_steps_per_second": 43.487, "step": 33000 }, { "epoch": 10.26, "learning_rate": 1.65e-05, "loss": 0.5094, "step": 33500 }, { "epoch": 10.26, "eval_accuracy": 0.8877481840193705, "eval_loss": 0.4840761125087738, "eval_runtime": 8.4693, "eval_samples_per_second": 685.535, "eval_steps_per_second": 42.861, "step": 33500 }, { "epoch": 10.41, "learning_rate": 1.6000000000000003e-05, "loss": 0.5033, "step": 34000 }, { "epoch": 10.41, "eval_accuracy": 0.8805490654205608, "eval_loss": 0.5128547549247742, "eval_runtime": 8.1006, "eval_samples_per_second": 716.736, "eval_steps_per_second": 44.811, "step": 34000 }, { "epoch": 10.56, "learning_rate": 1.55e-05, "loss": 0.4913, "step": 34500 }, { "epoch": 10.56, "eval_accuracy": 0.8789432939810334, "eval_loss": 0.4978225529193878, "eval_runtime": 8.4845, "eval_samples_per_second": 684.304, "eval_steps_per_second": 42.784, "step": 34500 }, { "epoch": 10.72, "learning_rate": 1.5e-05, "loss": 0.4938, "step": 35000 }, { "epoch": 10.72, "eval_accuracy": 0.8838202465301368, "eval_loss": 0.46402791142463684, "eval_runtime": 8.2894, "eval_samples_per_second": 700.417, "eval_steps_per_second": 43.791, "step": 35000 }, { "epoch": 10.87, "learning_rate": 1.45e-05, "loss": 0.4954, "step": 35500 }, { "epoch": 10.87, "eval_accuracy": 0.8793576184880533, "eval_loss": 0.4990694522857666, "eval_runtime": 8.2824, "eval_samples_per_second": 701.003, "eval_steps_per_second": 43.828, "step": 35500 }, { "epoch": 11.02, "learning_rate": 1.4000000000000001e-05, "loss": 0.458, "step": 36000 }, { "epoch": 11.02, "eval_accuracy": 0.8885711468297012, "eval_loss": 0.4452793300151825, "eval_runtime": 8.4212, "eval_samples_per_second": 689.451, "eval_steps_per_second": 43.106, "step": 36000 } ], "logging_steps": 500, "max_steps": 50000, "num_train_epochs": 16, "save_steps": 500, "total_flos": 5426535158775808.0, "trial_name": null, "trial_params": null }