|
{ |
|
"best_metric": 0.4452793300151825, |
|
"best_model_checkpoint": "./models/adapters_mlm_cn/bg/checkpoint-36000", |
|
"epoch": 11.022657685241887, |
|
"eval_steps": 500, |
|
"global_step": 36000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.9500000000000004e-05, |
|
"loss": 1.5057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.8148620791204476, |
|
"eval_loss": 0.9846400618553162, |
|
"eval_runtime": 7.7173, |
|
"eval_samples_per_second": 752.339, |
|
"eval_steps_per_second": 47.037, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0172, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.82587890625, |
|
"eval_loss": 0.8394753932952881, |
|
"eval_runtime": 7.7319, |
|
"eval_samples_per_second": 750.918, |
|
"eval_steps_per_second": 46.949, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.85e-05, |
|
"loss": 0.8814, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.8368038740920097, |
|
"eval_loss": 0.7822620272636414, |
|
"eval_runtime": 7.7294, |
|
"eval_samples_per_second": 751.162, |
|
"eval_steps_per_second": 46.964, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.8405, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.8449071800412533, |
|
"eval_loss": 0.7436666488647461, |
|
"eval_runtime": 7.9259, |
|
"eval_samples_per_second": 732.538, |
|
"eval_steps_per_second": 45.799, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.7773, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.8386841062227507, |
|
"eval_loss": 0.7246997952461243, |
|
"eval_runtime": 7.7331, |
|
"eval_samples_per_second": 750.803, |
|
"eval_steps_per_second": 46.941, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.7762, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.8513044340839202, |
|
"eval_loss": 0.6520901322364807, |
|
"eval_runtime": 7.9369, |
|
"eval_samples_per_second": 731.516, |
|
"eval_steps_per_second": 45.736, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.6500000000000005e-05, |
|
"loss": 0.7186, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.8492265517916585, |
|
"eval_loss": 0.6834315061569214, |
|
"eval_runtime": 7.6738, |
|
"eval_samples_per_second": 756.596, |
|
"eval_steps_per_second": 47.304, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.7033, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.852271607371637, |
|
"eval_loss": 0.67154860496521, |
|
"eval_runtime": 7.7294, |
|
"eval_samples_per_second": 751.163, |
|
"eval_steps_per_second": 46.964, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.55e-05, |
|
"loss": 0.672, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.855973974763407, |
|
"eval_loss": 0.6539207100868225, |
|
"eval_runtime": 7.7117, |
|
"eval_samples_per_second": 752.881, |
|
"eval_steps_per_second": 47.071, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.6613, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.8567085131424088, |
|
"eval_loss": 0.638721227645874, |
|
"eval_runtime": 7.6505, |
|
"eval_samples_per_second": 758.9, |
|
"eval_steps_per_second": 47.448, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 4.4500000000000004e-05, |
|
"loss": 0.6712, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_accuracy": 0.862372613040467, |
|
"eval_loss": 0.6180465221405029, |
|
"eval_runtime": 7.7012, |
|
"eval_samples_per_second": 753.913, |
|
"eval_steps_per_second": 47.136, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.6776, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_accuracy": 0.8537038849202466, |
|
"eval_loss": 0.6634594202041626, |
|
"eval_runtime": 7.7042, |
|
"eval_samples_per_second": 753.61, |
|
"eval_steps_per_second": 47.117, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.35e-05, |
|
"loss": 0.6484, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_accuracy": 0.8661394258933802, |
|
"eval_loss": 0.5945894122123718, |
|
"eval_runtime": 7.6974, |
|
"eval_samples_per_second": 754.283, |
|
"eval_steps_per_second": 47.159, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.6817, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.8654563297350344, |
|
"eval_loss": 0.6126104593276978, |
|
"eval_runtime": 8.509, |
|
"eval_samples_per_second": 682.334, |
|
"eval_steps_per_second": 42.661, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.6392, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.8613216715257531, |
|
"eval_loss": 0.613590657711029, |
|
"eval_runtime": 8.1378, |
|
"eval_samples_per_second": 713.458, |
|
"eval_steps_per_second": 44.606, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.6394, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.8620723749258453, |
|
"eval_loss": 0.6320650577545166, |
|
"eval_runtime": 7.7697, |
|
"eval_samples_per_second": 747.26, |
|
"eval_steps_per_second": 46.72, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 4.15e-05, |
|
"loss": 0.6273, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.8629402009560043, |
|
"eval_loss": 0.5997043251991272, |
|
"eval_runtime": 7.9947, |
|
"eval_samples_per_second": 726.232, |
|
"eval_steps_per_second": 45.405, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.5993, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_accuracy": 0.8645569620253165, |
|
"eval_loss": 0.6027613282203674, |
|
"eval_runtime": 8.0195, |
|
"eval_samples_per_second": 723.989, |
|
"eval_steps_per_second": 45.265, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 4.05e-05, |
|
"loss": 0.6527, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_accuracy": 0.8510214250124564, |
|
"eval_loss": 0.6583752632141113, |
|
"eval_runtime": 7.9195, |
|
"eval_samples_per_second": 733.131, |
|
"eval_steps_per_second": 45.836, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 4e-05, |
|
"loss": 0.5897, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.8676120587068623, |
|
"eval_loss": 0.5727556943893433, |
|
"eval_runtime": 7.9746, |
|
"eval_samples_per_second": 728.065, |
|
"eval_steps_per_second": 45.52, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.9500000000000005e-05, |
|
"loss": 0.574, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.8670824400701618, |
|
"eval_loss": 0.5869864821434021, |
|
"eval_runtime": 7.8716, |
|
"eval_samples_per_second": 737.59, |
|
"eval_steps_per_second": 46.115, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.6026, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_accuracy": 0.8676513458361675, |
|
"eval_loss": 0.6066599488258362, |
|
"eval_runtime": 7.8242, |
|
"eval_samples_per_second": 742.057, |
|
"eval_steps_per_second": 46.395, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 3.85e-05, |
|
"loss": 0.5896, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"eval_accuracy": 0.8638327806250629, |
|
"eval_loss": 0.6000019311904907, |
|
"eval_runtime": 8.0139, |
|
"eval_samples_per_second": 724.49, |
|
"eval_steps_per_second": 45.296, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.566, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.8711821948164563, |
|
"eval_loss": 0.5566375851631165, |
|
"eval_runtime": 7.8868, |
|
"eval_samples_per_second": 736.17, |
|
"eval_steps_per_second": 46.026, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.5928, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_accuracy": 0.8675352877307275, |
|
"eval_loss": 0.5621004700660706, |
|
"eval_runtime": 7.9912, |
|
"eval_samples_per_second": 726.553, |
|
"eval_steps_per_second": 45.425, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.7e-05, |
|
"loss": 0.597, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_accuracy": 0.8771320904403015, |
|
"eval_loss": 0.5161893963813782, |
|
"eval_runtime": 7.9771, |
|
"eval_samples_per_second": 727.833, |
|
"eval_steps_per_second": 45.505, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 3.65e-05, |
|
"loss": 0.5836, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_accuracy": 0.8696463654223968, |
|
"eval_loss": 0.5498046278953552, |
|
"eval_runtime": 7.8463, |
|
"eval_samples_per_second": 739.966, |
|
"eval_steps_per_second": 46.264, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.5864, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"eval_accuracy": 0.8639773945240183, |
|
"eval_loss": 0.5728442072868347, |
|
"eval_runtime": 7.8404, |
|
"eval_samples_per_second": 740.524, |
|
"eval_steps_per_second": 46.299, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 3.55e-05, |
|
"loss": 0.5562, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 0.8623497479643273, |
|
"eval_loss": 0.6000498533248901, |
|
"eval_runtime": 7.8135, |
|
"eval_samples_per_second": 743.077, |
|
"eval_steps_per_second": 46.458, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.5999, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_accuracy": 0.8679152291769344, |
|
"eval_loss": 0.5589025020599365, |
|
"eval_runtime": 7.7959, |
|
"eval_samples_per_second": 744.749, |
|
"eval_steps_per_second": 46.563, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 3.45e-05, |
|
"loss": 0.5767, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 0.8680821783151479, |
|
"eval_loss": 0.5713112354278564, |
|
"eval_runtime": 8.9874, |
|
"eval_samples_per_second": 646.014, |
|
"eval_steps_per_second": 40.39, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 0.5574, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.8739122026687295, |
|
"eval_loss": 0.5337920784950256, |
|
"eval_runtime": 10.8383, |
|
"eval_samples_per_second": 535.691, |
|
"eval_steps_per_second": 33.492, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 3.35e-05, |
|
"loss": 0.568, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_accuracy": 0.87250098000784, |
|
"eval_loss": 0.552727222442627, |
|
"eval_runtime": 7.8124, |
|
"eval_samples_per_second": 743.18, |
|
"eval_steps_per_second": 46.465, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 3.3e-05, |
|
"loss": 0.5568, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"eval_accuracy": 0.8776927722971612, |
|
"eval_loss": 0.5058096051216125, |
|
"eval_runtime": 7.8143, |
|
"eval_samples_per_second": 742.993, |
|
"eval_steps_per_second": 46.453, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.5369, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_accuracy": 0.8719769673704415, |
|
"eval_loss": 0.5599194169044495, |
|
"eval_runtime": 7.8287, |
|
"eval_samples_per_second": 741.628, |
|
"eval_steps_per_second": 46.368, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 0.518, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_accuracy": 0.8720388349514563, |
|
"eval_loss": 0.561033308506012, |
|
"eval_runtime": 7.8241, |
|
"eval_samples_per_second": 742.071, |
|
"eval_steps_per_second": 46.395, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 3.15e-05, |
|
"loss": 0.5637, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_accuracy": 0.8727518855153742, |
|
"eval_loss": 0.5467284917831421, |
|
"eval_runtime": 8.0155, |
|
"eval_samples_per_second": 724.347, |
|
"eval_steps_per_second": 45.287, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 3.1e-05, |
|
"loss": 0.557, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": 0.8713813872158539, |
|
"eval_loss": 0.5348953604698181, |
|
"eval_runtime": 8.0121, |
|
"eval_samples_per_second": 724.653, |
|
"eval_steps_per_second": 45.306, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.05e-05, |
|
"loss": 0.5499, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"eval_accuracy": 0.8724001160878398, |
|
"eval_loss": 0.5467893481254578, |
|
"eval_runtime": 7.7511, |
|
"eval_samples_per_second": 749.05, |
|
"eval_steps_per_second": 46.832, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3e-05, |
|
"loss": 0.5304, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"eval_accuracy": 0.8740521910388971, |
|
"eval_loss": 0.5243064761161804, |
|
"eval_runtime": 7.8201, |
|
"eval_samples_per_second": 742.45, |
|
"eval_steps_per_second": 46.419, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 2.95e-05, |
|
"loss": 0.5431, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_accuracy": 0.8783942176206291, |
|
"eval_loss": 0.4997641146183014, |
|
"eval_runtime": 8.0018, |
|
"eval_samples_per_second": 725.585, |
|
"eval_steps_per_second": 45.365, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 2.9e-05, |
|
"loss": 0.5508, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_accuracy": 0.8763812154696132, |
|
"eval_loss": 0.5366745591163635, |
|
"eval_runtime": 7.8074, |
|
"eval_samples_per_second": 743.654, |
|
"eval_steps_per_second": 46.494, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"learning_rate": 2.8499999999999998e-05, |
|
"loss": 0.5701, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.58, |
|
"eval_accuracy": 0.8734250823803063, |
|
"eval_loss": 0.5364522337913513, |
|
"eval_runtime": 7.9868, |
|
"eval_samples_per_second": 726.947, |
|
"eval_steps_per_second": 45.45, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 0.521, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_accuracy": 0.8818635607321131, |
|
"eval_loss": 0.4879148006439209, |
|
"eval_runtime": 7.9938, |
|
"eval_samples_per_second": 726.31, |
|
"eval_steps_per_second": 45.41, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.5514, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"eval_accuracy": 0.8786950074147306, |
|
"eval_loss": 0.5105842351913452, |
|
"eval_runtime": 7.8325, |
|
"eval_samples_per_second": 741.269, |
|
"eval_steps_per_second": 46.345, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 2.7000000000000002e-05, |
|
"loss": 0.547, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_accuracy": 0.8747058823529412, |
|
"eval_loss": 0.5258113741874695, |
|
"eval_runtime": 7.8237, |
|
"eval_samples_per_second": 742.1, |
|
"eval_steps_per_second": 46.397, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 2.6500000000000004e-05, |
|
"loss": 0.5512, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_accuracy": 0.877830692973078, |
|
"eval_loss": 0.49750423431396484, |
|
"eval_runtime": 7.9086, |
|
"eval_samples_per_second": 734.135, |
|
"eval_steps_per_second": 45.899, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 0.5407, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.35, |
|
"eval_accuracy": 0.8785601265822784, |
|
"eval_loss": 0.494391530752182, |
|
"eval_runtime": 8.2168, |
|
"eval_samples_per_second": 706.599, |
|
"eval_steps_per_second": 44.178, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 2.5500000000000003e-05, |
|
"loss": 0.5181, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_accuracy": 0.8794734275962945, |
|
"eval_loss": 0.4911736845970154, |
|
"eval_runtime": 8.2044, |
|
"eval_samples_per_second": 707.673, |
|
"eval_steps_per_second": 44.245, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.5493, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_accuracy": 0.87302207462395, |
|
"eval_loss": 0.5187950730323792, |
|
"eval_runtime": 8.0486, |
|
"eval_samples_per_second": 721.366, |
|
"eval_steps_per_second": 45.101, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 2.45e-05, |
|
"loss": 0.5388, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_accuracy": 0.8831105473704751, |
|
"eval_loss": 0.5000073313713074, |
|
"eval_runtime": 8.0362, |
|
"eval_samples_per_second": 722.481, |
|
"eval_steps_per_second": 45.171, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 2.4e-05, |
|
"loss": 0.5284, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_accuracy": 0.8737309019221291, |
|
"eval_loss": 0.5161213278770447, |
|
"eval_runtime": 8.1271, |
|
"eval_samples_per_second": 714.401, |
|
"eval_steps_per_second": 44.665, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 2.35e-05, |
|
"loss": 0.5116, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"eval_accuracy": 0.8759842519685039, |
|
"eval_loss": 0.5262829065322876, |
|
"eval_runtime": 8.1731, |
|
"eval_samples_per_second": 710.381, |
|
"eval_steps_per_second": 44.414, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 0.5161, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"eval_accuracy": 0.8786888397577621, |
|
"eval_loss": 0.500228762626648, |
|
"eval_runtime": 8.034, |
|
"eval_samples_per_second": 722.681, |
|
"eval_steps_per_second": 45.183, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.5185, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_accuracy": 0.8744550138723741, |
|
"eval_loss": 0.5127227902412415, |
|
"eval_runtime": 7.7182, |
|
"eval_samples_per_second": 752.252, |
|
"eval_steps_per_second": 47.032, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 0.5291, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"eval_accuracy": 0.8782496527088708, |
|
"eval_loss": 0.5115563273429871, |
|
"eval_runtime": 8.0802, |
|
"eval_samples_per_second": 718.543, |
|
"eval_steps_per_second": 44.924, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.15e-05, |
|
"loss": 0.5061, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_accuracy": 0.8773942634905202, |
|
"eval_loss": 0.4972003996372223, |
|
"eval_runtime": 7.7937, |
|
"eval_samples_per_second": 744.959, |
|
"eval_steps_per_second": 46.576, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.479, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"eval_accuracy": 0.8797814207650273, |
|
"eval_loss": 0.49780747294425964, |
|
"eval_runtime": 7.8838, |
|
"eval_samples_per_second": 736.449, |
|
"eval_steps_per_second": 46.044, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.05e-05, |
|
"loss": 0.5154, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_accuracy": 0.877119904790241, |
|
"eval_loss": 0.5088150501251221, |
|
"eval_runtime": 7.7019, |
|
"eval_samples_per_second": 753.843, |
|
"eval_steps_per_second": 47.131, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"learning_rate": 2e-05, |
|
"loss": 0.4989, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_accuracy": 0.8744332741967278, |
|
"eval_loss": 0.5118668079376221, |
|
"eval_runtime": 7.7316, |
|
"eval_samples_per_second": 750.942, |
|
"eval_steps_per_second": 46.95, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 0.5098, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.34, |
|
"eval_accuracy": 0.8825979176995538, |
|
"eval_loss": 0.4915599524974823, |
|
"eval_runtime": 7.7036, |
|
"eval_samples_per_second": 753.676, |
|
"eval_steps_per_second": 47.121, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.4777, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.49, |
|
"eval_accuracy": 0.882445449184195, |
|
"eval_loss": 0.49568039178848267, |
|
"eval_runtime": 7.7025, |
|
"eval_samples_per_second": 753.779, |
|
"eval_steps_per_second": 47.127, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.5462, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.64, |
|
"eval_accuracy": 0.8778625954198473, |
|
"eval_loss": 0.48457399010658264, |
|
"eval_runtime": 7.7115, |
|
"eval_samples_per_second": 752.903, |
|
"eval_steps_per_second": 47.073, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.509, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_accuracy": 0.8810146190337884, |
|
"eval_loss": 0.48734790086746216, |
|
"eval_runtime": 7.7302, |
|
"eval_samples_per_second": 751.078, |
|
"eval_steps_per_second": 46.959, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.5181, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.95, |
|
"eval_accuracy": 0.8710217755443886, |
|
"eval_loss": 0.5227355360984802, |
|
"eval_runtime": 7.7073, |
|
"eval_samples_per_second": 753.31, |
|
"eval_steps_per_second": 47.098, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.5269, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"eval_accuracy": 0.8802636757182212, |
|
"eval_loss": 0.49287834763526917, |
|
"eval_runtime": 8.3473, |
|
"eval_samples_per_second": 695.551, |
|
"eval_steps_per_second": 43.487, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.5094, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"eval_accuracy": 0.8877481840193705, |
|
"eval_loss": 0.4840761125087738, |
|
"eval_runtime": 8.4693, |
|
"eval_samples_per_second": 685.535, |
|
"eval_steps_per_second": 42.861, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.5033, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"eval_accuracy": 0.8805490654205608, |
|
"eval_loss": 0.5128547549247742, |
|
"eval_runtime": 8.1006, |
|
"eval_samples_per_second": 716.736, |
|
"eval_steps_per_second": 44.811, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.4913, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 10.56, |
|
"eval_accuracy": 0.8789432939810334, |
|
"eval_loss": 0.4978225529193878, |
|
"eval_runtime": 8.4845, |
|
"eval_samples_per_second": 684.304, |
|
"eval_steps_per_second": 42.784, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.4938, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 10.72, |
|
"eval_accuracy": 0.8838202465301368, |
|
"eval_loss": 0.46402791142463684, |
|
"eval_runtime": 8.2894, |
|
"eval_samples_per_second": 700.417, |
|
"eval_steps_per_second": 43.791, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.4954, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_accuracy": 0.8793576184880533, |
|
"eval_loss": 0.4990694522857666, |
|
"eval_runtime": 8.2824, |
|
"eval_samples_per_second": 701.003, |
|
"eval_steps_per_second": 43.828, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.458, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"eval_accuracy": 0.8885711468297012, |
|
"eval_loss": 0.4452793300151825, |
|
"eval_runtime": 8.4212, |
|
"eval_samples_per_second": 689.451, |
|
"eval_steps_per_second": 43.106, |
|
"step": 36000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 50000, |
|
"num_train_epochs": 16, |
|
"save_steps": 500, |
|
"total_flos": 5426535158775808.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|