|
{ |
|
"best_metric": 0.19522710144519806, |
|
"best_model_checkpoint": "./results_train/roberta-base/sst2/checkpoint-3500", |
|
"epoch": 10.0, |
|
"global_step": 42100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 3.9588281868566905e-06, |
|
"loss": 0.575, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.9071100917431193, |
|
"eval_loss": 0.2664913535118103, |
|
"eval_runtime": 2.4433, |
|
"eval_samples_per_second": 356.901, |
|
"eval_steps_per_second": 44.613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 7.917656373713381e-06, |
|
"loss": 0.2989, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.9220183486238532, |
|
"eval_loss": 0.20883557200431824, |
|
"eval_runtime": 2.4454, |
|
"eval_samples_per_second": 356.584, |
|
"eval_steps_per_second": 44.573, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.1876484560570072e-05, |
|
"loss": 0.2725, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_accuracy": 0.9243119266055045, |
|
"eval_loss": 0.25596883893013, |
|
"eval_runtime": 2.451, |
|
"eval_samples_per_second": 355.775, |
|
"eval_steps_per_second": 44.472, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.5835312747426762e-05, |
|
"loss": 0.2814, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.926605504587156, |
|
"eval_loss": 0.20158442854881287, |
|
"eval_runtime": 2.462, |
|
"eval_samples_per_second": 354.188, |
|
"eval_steps_per_second": 44.274, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.9794140934283453e-05, |
|
"loss": 0.2586, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.9174311926605505, |
|
"eval_loss": 0.22930225729942322, |
|
"eval_runtime": 2.4517, |
|
"eval_samples_per_second": 355.671, |
|
"eval_steps_per_second": 44.459, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.9760448779501697e-05, |
|
"loss": 0.2536, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.9323394495412844, |
|
"eval_loss": 0.23396578431129456, |
|
"eval_runtime": 2.4584, |
|
"eval_samples_per_second": 354.697, |
|
"eval_steps_per_second": 44.337, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.95077576186385e-05, |
|
"loss": 0.2494, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.9323394495412844, |
|
"eval_loss": 0.19522710144519806, |
|
"eval_runtime": 2.4521, |
|
"eval_samples_per_second": 355.616, |
|
"eval_steps_per_second": 44.452, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.925506645777531e-05, |
|
"loss": 0.2396, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.9323394495412844, |
|
"eval_loss": 0.24936608970165253, |
|
"eval_runtime": 2.4569, |
|
"eval_samples_per_second": 354.916, |
|
"eval_steps_per_second": 44.365, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.9002375296912114e-05, |
|
"loss": 0.2123, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.21870844066143036, |
|
"eval_runtime": 2.449, |
|
"eval_samples_per_second": 356.068, |
|
"eval_steps_per_second": 44.509, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.874968413604892e-05, |
|
"loss": 0.2042, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.9151376146788991, |
|
"eval_loss": 0.2811821401119232, |
|
"eval_runtime": 2.4602, |
|
"eval_samples_per_second": 354.439, |
|
"eval_steps_per_second": 44.305, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.849699297518573e-05, |
|
"loss": 0.2083, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.27386215329170227, |
|
"eval_runtime": 2.5255, |
|
"eval_samples_per_second": 345.272, |
|
"eval_steps_per_second": 43.159, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.8244301814322537e-05, |
|
"loss": 0.2041, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.20871196687221527, |
|
"eval_runtime": 2.4547, |
|
"eval_samples_per_second": 355.241, |
|
"eval_steps_per_second": 44.405, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.7991610653459345e-05, |
|
"loss": 0.1969, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"eval_accuracy": 0.9254587155963303, |
|
"eval_loss": 0.25904807448387146, |
|
"eval_runtime": 2.4532, |
|
"eval_samples_per_second": 355.448, |
|
"eval_steps_per_second": 44.431, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.773891949259615e-05, |
|
"loss": 0.1982, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.930045871559633, |
|
"eval_loss": 0.2444588840007782, |
|
"eval_runtime": 2.4545, |
|
"eval_samples_per_second": 355.268, |
|
"eval_steps_per_second": 44.409, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 1.7486228331732958e-05, |
|
"loss": 0.1943, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"eval_accuracy": 0.926605504587156, |
|
"eval_loss": 0.2798321545124054, |
|
"eval_runtime": 2.4455, |
|
"eval_samples_per_second": 356.567, |
|
"eval_steps_per_second": 44.571, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.7233537170869766e-05, |
|
"loss": 0.1848, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.2844010591506958, |
|
"eval_runtime": 2.4586, |
|
"eval_samples_per_second": 354.679, |
|
"eval_steps_per_second": 44.335, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.698084601000657e-05, |
|
"loss": 0.1788, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_accuracy": 0.9254587155963303, |
|
"eval_loss": 0.2998378872871399, |
|
"eval_runtime": 2.446, |
|
"eval_samples_per_second": 356.496, |
|
"eval_steps_per_second": 44.562, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.672815484914338e-05, |
|
"loss": 0.1623, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.9392201834862385, |
|
"eval_loss": 0.2695905268192291, |
|
"eval_runtime": 2.4607, |
|
"eval_samples_per_second": 354.365, |
|
"eval_steps_per_second": 44.296, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.6475463688280183e-05, |
|
"loss": 0.1499, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_accuracy": 0.9277522935779816, |
|
"eval_loss": 0.25331878662109375, |
|
"eval_runtime": 2.4449, |
|
"eval_samples_per_second": 356.659, |
|
"eval_steps_per_second": 44.582, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 1.622277252741699e-05, |
|
"loss": 0.1426, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.930045871559633, |
|
"eval_loss": 0.29705262184143066, |
|
"eval_runtime": 2.4651, |
|
"eval_samples_per_second": 353.733, |
|
"eval_steps_per_second": 44.217, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.59700813665538e-05, |
|
"loss": 0.1479, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.25958266854286194, |
|
"eval_runtime": 2.4502, |
|
"eval_samples_per_second": 355.883, |
|
"eval_steps_per_second": 44.485, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 1.5717390205690607e-05, |
|
"loss": 0.1405, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"eval_accuracy": 0.9254587155963303, |
|
"eval_loss": 0.2944609522819519, |
|
"eval_runtime": 2.4554, |
|
"eval_samples_per_second": 355.141, |
|
"eval_steps_per_second": 44.393, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.5464699044827415e-05, |
|
"loss": 0.1577, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_accuracy": 0.9002293577981652, |
|
"eval_loss": 0.40612396597862244, |
|
"eval_runtime": 2.4539, |
|
"eval_samples_per_second": 355.36, |
|
"eval_steps_per_second": 44.42, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.521200788396422e-05, |
|
"loss": 0.1521, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.2724354565143585, |
|
"eval_runtime": 2.4461, |
|
"eval_samples_per_second": 356.483, |
|
"eval_steps_per_second": 44.56, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.4959316723101027e-05, |
|
"loss": 0.1426, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"eval_accuracy": 0.9426605504587156, |
|
"eval_loss": 0.27123740315437317, |
|
"eval_runtime": 2.4449, |
|
"eval_samples_per_second": 356.655, |
|
"eval_steps_per_second": 44.582, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.4706625562237835e-05, |
|
"loss": 0.1206, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.2954227328300476, |
|
"eval_runtime": 2.467, |
|
"eval_samples_per_second": 353.464, |
|
"eval_steps_per_second": 44.183, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.4453934401374641e-05, |
|
"loss": 0.1074, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.9392201834862385, |
|
"eval_loss": 0.2653304934501648, |
|
"eval_runtime": 2.4486, |
|
"eval_samples_per_second": 356.118, |
|
"eval_steps_per_second": 44.515, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.420124324051145e-05, |
|
"loss": 0.112, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.2777578830718994, |
|
"eval_runtime": 2.4566, |
|
"eval_samples_per_second": 354.969, |
|
"eval_steps_per_second": 44.371, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.3948552079648254e-05, |
|
"loss": 0.1147, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.3704558312892914, |
|
"eval_runtime": 2.4454, |
|
"eval_samples_per_second": 356.589, |
|
"eval_steps_per_second": 44.574, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.3695860918785062e-05, |
|
"loss": 0.1196, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.2889645993709564, |
|
"eval_runtime": 2.4563, |
|
"eval_samples_per_second": 354.999, |
|
"eval_steps_per_second": 44.375, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.344316975792187e-05, |
|
"loss": 0.1159, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"eval_accuracy": 0.926605504587156, |
|
"eval_loss": 0.3448694944381714, |
|
"eval_runtime": 2.4429, |
|
"eval_samples_per_second": 356.949, |
|
"eval_steps_per_second": 44.619, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.3190478597058676e-05, |
|
"loss": 0.119, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.3207152187824249, |
|
"eval_runtime": 2.461, |
|
"eval_samples_per_second": 354.323, |
|
"eval_steps_per_second": 44.29, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 1.2937787436195484e-05, |
|
"loss": 0.1268, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.3234628736972809, |
|
"eval_runtime": 2.4504, |
|
"eval_samples_per_second": 355.858, |
|
"eval_steps_per_second": 44.482, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 1.2685096275332289e-05, |
|
"loss": 0.1074, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.3650290369987488, |
|
"eval_runtime": 2.456, |
|
"eval_samples_per_second": 355.052, |
|
"eval_steps_per_second": 44.382, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.2432405114469096e-05, |
|
"loss": 0.0805, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.33378419280052185, |
|
"eval_runtime": 2.4457, |
|
"eval_samples_per_second": 356.538, |
|
"eval_steps_per_second": 44.567, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 1.2179713953605903e-05, |
|
"loss": 0.0838, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_accuracy": 0.9208715596330275, |
|
"eval_loss": 0.4302394688129425, |
|
"eval_runtime": 2.4587, |
|
"eval_samples_per_second": 354.661, |
|
"eval_steps_per_second": 44.333, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.192702279274271e-05, |
|
"loss": 0.0848, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_accuracy": 0.9323394495412844, |
|
"eval_loss": 0.40956971049308777, |
|
"eval_runtime": 2.4483, |
|
"eval_samples_per_second": 356.162, |
|
"eval_steps_per_second": 44.52, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 1.1674331631879519e-05, |
|
"loss": 0.0922, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_accuracy": 0.9369266055045872, |
|
"eval_loss": 0.3332035541534424, |
|
"eval_runtime": 2.4597, |
|
"eval_samples_per_second": 354.511, |
|
"eval_steps_per_second": 44.314, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.1421640471016325e-05, |
|
"loss": 0.091, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"eval_accuracy": 0.9438073394495413, |
|
"eval_loss": 0.3024330735206604, |
|
"eval_runtime": 2.4457, |
|
"eval_samples_per_second": 356.542, |
|
"eval_steps_per_second": 44.568, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 1.1168949310153133e-05, |
|
"loss": 0.0977, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 0.9495412844036697, |
|
"eval_loss": 0.2673788070678711, |
|
"eval_runtime": 2.4587, |
|
"eval_samples_per_second": 354.654, |
|
"eval_steps_per_second": 44.332, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.0916258149289937e-05, |
|
"loss": 0.0897, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"eval_accuracy": 0.930045871559633, |
|
"eval_loss": 0.39930590987205505, |
|
"eval_runtime": 2.4473, |
|
"eval_samples_per_second": 356.313, |
|
"eval_steps_per_second": 44.539, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 1.0663566988426745e-05, |
|
"loss": 0.1013, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9288990825688074, |
|
"eval_loss": 0.322666198015213, |
|
"eval_runtime": 2.4496, |
|
"eval_samples_per_second": 355.981, |
|
"eval_steps_per_second": 44.498, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"learning_rate": 1.0410875827563553e-05, |
|
"loss": 0.0671, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.11, |
|
"eval_accuracy": 0.9426605504587156, |
|
"eval_loss": 0.3374435603618622, |
|
"eval_runtime": 2.4457, |
|
"eval_samples_per_second": 356.54, |
|
"eval_steps_per_second": 44.567, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.015818466670036e-05, |
|
"loss": 0.0671, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"eval_accuracy": 0.9277522935779816, |
|
"eval_loss": 0.4108366072177887, |
|
"eval_runtime": 2.4551, |
|
"eval_samples_per_second": 355.179, |
|
"eval_steps_per_second": 44.397, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 9.905493505837167e-06, |
|
"loss": 0.0652, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.3549734652042389, |
|
"eval_runtime": 2.4475, |
|
"eval_samples_per_second": 356.289, |
|
"eval_steps_per_second": 44.536, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 9.652802344973974e-06, |
|
"loss": 0.0664, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.339821994304657, |
|
"eval_runtime": 2.4559, |
|
"eval_samples_per_second": 355.062, |
|
"eval_steps_per_second": 44.383, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 9.40011118411078e-06, |
|
"loss": 0.0742, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.3286002278327942, |
|
"eval_runtime": 2.4471, |
|
"eval_samples_per_second": 356.342, |
|
"eval_steps_per_second": 44.543, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 9.147420023247588e-06, |
|
"loss": 0.0758, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.32764118909835815, |
|
"eval_runtime": 2.4639, |
|
"eval_samples_per_second": 353.904, |
|
"eval_steps_per_second": 44.238, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 8.894728862384394e-06, |
|
"loss": 0.075, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_accuracy": 0.9369266055045872, |
|
"eval_loss": 0.32022935152053833, |
|
"eval_runtime": 2.4503, |
|
"eval_samples_per_second": 355.874, |
|
"eval_steps_per_second": 44.484, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 8.642037701521202e-06, |
|
"loss": 0.0686, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"eval_accuracy": 0.9415137614678899, |
|
"eval_loss": 0.3481292426586151, |
|
"eval_runtime": 2.4555, |
|
"eval_samples_per_second": 355.12, |
|
"eval_steps_per_second": 44.39, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 8.389346540658008e-06, |
|
"loss": 0.0729, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.38161903619766235, |
|
"eval_runtime": 2.4476, |
|
"eval_samples_per_second": 356.27, |
|
"eval_steps_per_second": 44.534, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 8.136655379794816e-06, |
|
"loss": 0.0568, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.31324318051338196, |
|
"eval_runtime": 2.4707, |
|
"eval_samples_per_second": 352.935, |
|
"eval_steps_per_second": 44.117, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 7.883964218931623e-06, |
|
"loss": 0.0529, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"eval_accuracy": 0.930045871559633, |
|
"eval_loss": 0.3756808340549469, |
|
"eval_runtime": 2.4544, |
|
"eval_samples_per_second": 355.287, |
|
"eval_steps_per_second": 44.411, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 7.631273058068429e-06, |
|
"loss": 0.0506, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.33958113193511963, |
|
"eval_runtime": 2.4531, |
|
"eval_samples_per_second": 355.471, |
|
"eval_steps_per_second": 44.434, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 7.378581897205236e-06, |
|
"loss": 0.0476, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_accuracy": 0.9403669724770642, |
|
"eval_loss": 0.3641544580459595, |
|
"eval_runtime": 2.4417, |
|
"eval_samples_per_second": 357.132, |
|
"eval_steps_per_second": 44.641, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 7.125890736342044e-06, |
|
"loss": 0.0555, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_accuracy": 0.9403669724770642, |
|
"eval_loss": 0.34298017621040344, |
|
"eval_runtime": 2.4463, |
|
"eval_samples_per_second": 356.452, |
|
"eval_steps_per_second": 44.556, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"learning_rate": 6.87319957547885e-06, |
|
"loss": 0.0574, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_accuracy": 0.9392201834862385, |
|
"eval_loss": 0.3401435613632202, |
|
"eval_runtime": 2.4439, |
|
"eval_samples_per_second": 356.811, |
|
"eval_steps_per_second": 44.601, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"learning_rate": 6.620508414615657e-06, |
|
"loss": 0.0524, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 6.89, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.33783158659935, |
|
"eval_runtime": 2.4521, |
|
"eval_samples_per_second": 355.616, |
|
"eval_steps_per_second": 44.452, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 6.367817253752464e-06, |
|
"loss": 0.0492, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.3833492398262024, |
|
"eval_runtime": 2.4457, |
|
"eval_samples_per_second": 356.538, |
|
"eval_steps_per_second": 44.567, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 6.1151260928892706e-06, |
|
"loss": 0.039, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.3346712589263916, |
|
"eval_runtime": 2.4434, |
|
"eval_samples_per_second": 356.873, |
|
"eval_steps_per_second": 44.609, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 5.8624349320260785e-06, |
|
"loss": 0.0411, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.4404141902923584, |
|
"eval_runtime": 2.4419, |
|
"eval_samples_per_second": 357.102, |
|
"eval_steps_per_second": 44.638, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 5.609743771162886e-06, |
|
"loss": 0.0412, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.36179476976394653, |
|
"eval_runtime": 2.4414, |
|
"eval_samples_per_second": 357.173, |
|
"eval_steps_per_second": 44.647, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 5.357052610299692e-06, |
|
"loss": 0.0477, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.3806387484073639, |
|
"eval_runtime": 2.4471, |
|
"eval_samples_per_second": 356.337, |
|
"eval_steps_per_second": 44.542, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 5.104361449436499e-06, |
|
"loss": 0.0435, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_accuracy": 0.9334862385321101, |
|
"eval_loss": 0.39115917682647705, |
|
"eval_runtime": 2.4665, |
|
"eval_samples_per_second": 353.536, |
|
"eval_steps_per_second": 44.192, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 4.851670288573306e-06, |
|
"loss": 0.0443, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"eval_accuracy": 0.9392201834862385, |
|
"eval_loss": 0.39003145694732666, |
|
"eval_runtime": 2.4534, |
|
"eval_samples_per_second": 355.426, |
|
"eval_steps_per_second": 44.428, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 4.598979127710113e-06, |
|
"loss": 0.0421, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"eval_accuracy": 0.9369266055045872, |
|
"eval_loss": 0.4152164161205292, |
|
"eval_runtime": 2.4525, |
|
"eval_samples_per_second": 355.556, |
|
"eval_steps_per_second": 44.445, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 4.34628796684692e-06, |
|
"loss": 0.0495, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"eval_accuracy": 0.9288990825688074, |
|
"eval_loss": 0.3831779360771179, |
|
"eval_runtime": 2.447, |
|
"eval_samples_per_second": 356.361, |
|
"eval_steps_per_second": 44.545, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 4.093596805983727e-06, |
|
"loss": 0.0293, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.44268128275871277, |
|
"eval_runtime": 2.4587, |
|
"eval_samples_per_second": 354.661, |
|
"eval_steps_per_second": 44.333, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 3.840905645120534e-06, |
|
"loss": 0.0253, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.44246163964271545, |
|
"eval_runtime": 2.4427, |
|
"eval_samples_per_second": 356.983, |
|
"eval_steps_per_second": 44.623, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 3.5882144842573407e-06, |
|
"loss": 0.0407, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"eval_accuracy": 0.9357798165137615, |
|
"eval_loss": 0.41019341349601746, |
|
"eval_runtime": 2.453, |
|
"eval_samples_per_second": 355.477, |
|
"eval_steps_per_second": 44.435, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 3.3355233233941482e-06, |
|
"loss": 0.0311, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"eval_accuracy": 0.9369266055045872, |
|
"eval_loss": 0.44467687606811523, |
|
"eval_runtime": 2.4425, |
|
"eval_samples_per_second": 357.013, |
|
"eval_steps_per_second": 44.627, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 3.082832162530955e-06, |
|
"loss": 0.0291, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.46120545268058777, |
|
"eval_runtime": 2.4514, |
|
"eval_samples_per_second": 355.714, |
|
"eval_steps_per_second": 44.464, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 2.8301410016677616e-06, |
|
"loss": 0.035, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"eval_accuracy": 0.9346330275229358, |
|
"eval_loss": 0.4240852892398834, |
|
"eval_runtime": 2.4477, |
|
"eval_samples_per_second": 356.249, |
|
"eval_steps_per_second": 44.531, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.577449840804569e-06, |
|
"loss": 0.0381, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.41976186633110046, |
|
"eval_runtime": 2.4523, |
|
"eval_samples_per_second": 355.586, |
|
"eval_steps_per_second": 44.448, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 2.3247586799413758e-06, |
|
"loss": 0.0234, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"eval_accuracy": 0.9369266055045872, |
|
"eval_loss": 0.4344768822193146, |
|
"eval_runtime": 2.4469, |
|
"eval_samples_per_second": 356.366, |
|
"eval_steps_per_second": 44.546, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 2.072067519078183e-06, |
|
"loss": 0.0311, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_accuracy": 0.9311926605504587, |
|
"eval_loss": 0.45580777525901794, |
|
"eval_runtime": 2.4545, |
|
"eval_samples_per_second": 355.27, |
|
"eval_steps_per_second": 44.409, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"learning_rate": 1.8193763582149898e-06, |
|
"loss": 0.028, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.42450448870658875, |
|
"eval_runtime": 2.4449, |
|
"eval_samples_per_second": 356.658, |
|
"eval_steps_per_second": 44.582, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 1.5666851973517969e-06, |
|
"loss": 0.0213, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.446162611246109, |
|
"eval_runtime": 2.4606, |
|
"eval_samples_per_second": 354.384, |
|
"eval_steps_per_second": 44.298, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"learning_rate": 1.3139940364886035e-06, |
|
"loss": 0.0276, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_accuracy": 0.9380733944954128, |
|
"eval_loss": 0.42100322246551514, |
|
"eval_runtime": 2.4512, |
|
"eval_samples_per_second": 355.743, |
|
"eval_steps_per_second": 44.468, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 1.0613028756254106e-06, |
|
"loss": 0.0183, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_accuracy": 0.9403669724770642, |
|
"eval_loss": 0.43098002672195435, |
|
"eval_runtime": 2.45, |
|
"eval_samples_per_second": 355.922, |
|
"eval_steps_per_second": 44.49, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"learning_rate": 8.086117147622177e-07, |
|
"loss": 0.0184, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 9.62, |
|
"eval_accuracy": 0.9403669724770642, |
|
"eval_loss": 0.4437469244003296, |
|
"eval_runtime": 2.4461, |
|
"eval_samples_per_second": 356.492, |
|
"eval_steps_per_second": 44.561, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"learning_rate": 5.559205538990246e-07, |
|
"loss": 0.0296, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 9.74, |
|
"eval_accuracy": 0.9392201834862385, |
|
"eval_loss": 0.43114030361175537, |
|
"eval_runtime": 2.4504, |
|
"eval_samples_per_second": 355.859, |
|
"eval_steps_per_second": 44.482, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 3.0322939303583163e-07, |
|
"loss": 0.019, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"eval_accuracy": 0.9415137614678899, |
|
"eval_loss": 0.42435380816459656, |
|
"eval_runtime": 2.4473, |
|
"eval_samples_per_second": 356.311, |
|
"eval_steps_per_second": 44.539, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"learning_rate": 5.053823217263861e-08, |
|
"loss": 0.0245, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 9.98, |
|
"eval_accuracy": 0.9415137614678899, |
|
"eval_loss": 0.42697247862815857, |
|
"eval_runtime": 2.46, |
|
"eval_samples_per_second": 354.474, |
|
"eval_steps_per_second": 44.309, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 42100, |
|
"total_flos": 4.43006661686016e+16, |
|
"train_loss": 0.10745611605338416, |
|
"train_runtime": 8358.8854, |
|
"train_samples_per_second": 80.572, |
|
"train_steps_per_second": 5.037 |
|
} |
|
], |
|
"max_steps": 42100, |
|
"num_train_epochs": 10, |
|
"total_flos": 4.43006661686016e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|