|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.25033952297803896, |
|
"global_step": 40000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.984353779813873e-05, |
|
"loss": 2.5206, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_bleu": 38.3843, |
|
"eval_gen_len": 23.8603, |
|
"eval_loss": 1.8556830883026123, |
|
"eval_runtime": 1197.8495, |
|
"eval_samples_per_second": 2.515, |
|
"eval_steps_per_second": 0.629, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9687075596277455e-05, |
|
"loss": 1.9572, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_bleu": 22.3733, |
|
"eval_gen_len": 18.9091, |
|
"eval_loss": 2.9118094444274902, |
|
"eval_runtime": 1006.4293, |
|
"eval_samples_per_second": 2.994, |
|
"eval_steps_per_second": 0.749, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.953061339441618e-05, |
|
"loss": 1.7653, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_bleu": 38.119, |
|
"eval_gen_len": 23.6728, |
|
"eval_loss": 1.9158709049224854, |
|
"eval_runtime": 1104.4939, |
|
"eval_samples_per_second": 2.728, |
|
"eval_steps_per_second": 0.683, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.937415119255491e-05, |
|
"loss": 1.715, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_bleu": 37.8451, |
|
"eval_gen_len": 23.7046, |
|
"eval_loss": 1.9664654731750488, |
|
"eval_runtime": 1119.4046, |
|
"eval_samples_per_second": 2.692, |
|
"eval_steps_per_second": 0.674, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.921768899069363e-05, |
|
"loss": 1.7164, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 36.315, |
|
"eval_gen_len": 23.1172, |
|
"eval_loss": 1.8593918085098267, |
|
"eval_runtime": 1061.9031, |
|
"eval_samples_per_second": 2.837, |
|
"eval_steps_per_second": 0.71, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.906122678883235e-05, |
|
"loss": 1.616, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 37.6941, |
|
"eval_gen_len": 23.617, |
|
"eval_loss": 1.816091537475586, |
|
"eval_runtime": 1096.7235, |
|
"eval_samples_per_second": 2.747, |
|
"eval_steps_per_second": 0.688, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.890476458697108e-05, |
|
"loss": 1.6131, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_bleu": 34.7085, |
|
"eval_gen_len": 22.4826, |
|
"eval_loss": 1.9029065370559692, |
|
"eval_runtime": 1058.2776, |
|
"eval_samples_per_second": 2.847, |
|
"eval_steps_per_second": 0.712, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8748302385109805e-05, |
|
"loss": 1.5981, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 37.0045, |
|
"eval_gen_len": 24.0611, |
|
"eval_loss": 1.9932270050048828, |
|
"eval_runtime": 1166.939, |
|
"eval_samples_per_second": 2.582, |
|
"eval_steps_per_second": 0.646, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.859184018324853e-05, |
|
"loss": 1.5898, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 38.2992, |
|
"eval_gen_len": 24.2121, |
|
"eval_loss": 1.874466896057129, |
|
"eval_runtime": 1133.0065, |
|
"eval_samples_per_second": 2.659, |
|
"eval_steps_per_second": 0.665, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.843537798138726e-05, |
|
"loss": 1.5582, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 36.9851, |
|
"eval_gen_len": 23.4202, |
|
"eval_loss": 1.8227670192718506, |
|
"eval_runtime": 1093.0431, |
|
"eval_samples_per_second": 2.757, |
|
"eval_steps_per_second": 0.69, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.8278915779525984e-05, |
|
"loss": 1.7726, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_bleu": 32.3628, |
|
"eval_gen_len": 22.3083, |
|
"eval_loss": 2.096940755844116, |
|
"eval_runtime": 1059.9352, |
|
"eval_samples_per_second": 2.843, |
|
"eval_steps_per_second": 0.711, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.812245357766471e-05, |
|
"loss": 1.7102, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 36.1748, |
|
"eval_gen_len": 24.1942, |
|
"eval_loss": 2.018200397491455, |
|
"eval_runtime": 1129.5162, |
|
"eval_samples_per_second": 2.668, |
|
"eval_steps_per_second": 0.668, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7965991375803436e-05, |
|
"loss": 1.521, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 32.2516, |
|
"eval_gen_len": 28.4076, |
|
"eval_loss": 2.057112693786621, |
|
"eval_runtime": 1610.6798, |
|
"eval_samples_per_second": 1.871, |
|
"eval_steps_per_second": 0.468, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.780952917394216e-05, |
|
"loss": 1.6184, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_bleu": 37.8637, |
|
"eval_gen_len": 23.6844, |
|
"eval_loss": 1.8763169050216675, |
|
"eval_runtime": 1099.6674, |
|
"eval_samples_per_second": 2.74, |
|
"eval_steps_per_second": 0.686, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.765306697208088e-05, |
|
"loss": 1.5384, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 37.7753, |
|
"eval_gen_len": 23.8984, |
|
"eval_loss": 1.8554104566574097, |
|
"eval_runtime": 1111.1157, |
|
"eval_samples_per_second": 2.712, |
|
"eval_steps_per_second": 0.679, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.749660477021961e-05, |
|
"loss": 1.5485, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 37.1929, |
|
"eval_gen_len": 24.0408, |
|
"eval_loss": 1.9456806182861328, |
|
"eval_runtime": 1136.0111, |
|
"eval_samples_per_second": 2.652, |
|
"eval_steps_per_second": 0.664, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.7340142568358334e-05, |
|
"loss": 1.4872, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_bleu": 35.4276, |
|
"eval_gen_len": 23.2008, |
|
"eval_loss": 1.8643290996551514, |
|
"eval_runtime": 1078.1924, |
|
"eval_samples_per_second": 2.794, |
|
"eval_steps_per_second": 0.699, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.718368036649706e-05, |
|
"loss": 1.4412, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 35.7002, |
|
"eval_gen_len": 23.613, |
|
"eval_loss": 1.910874366760254, |
|
"eval_runtime": 1134.7549, |
|
"eval_samples_per_second": 2.655, |
|
"eval_steps_per_second": 0.664, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.702721816463579e-05, |
|
"loss": 1.4483, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 37.6828, |
|
"eval_gen_len": 24.0441, |
|
"eval_loss": 1.8838558197021484, |
|
"eval_runtime": 1126.6767, |
|
"eval_samples_per_second": 2.674, |
|
"eval_steps_per_second": 0.669, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.687075596277451e-05, |
|
"loss": 1.4523, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_bleu": 36.2473, |
|
"eval_gen_len": 23.3561, |
|
"eval_loss": 1.940147042274475, |
|
"eval_runtime": 1087.7551, |
|
"eval_samples_per_second": 2.77, |
|
"eval_steps_per_second": 0.693, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.671429376091324e-05, |
|
"loss": 1.4405, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_bleu": 35.6564, |
|
"eval_gen_len": 22.9851, |
|
"eval_loss": 1.8357064723968506, |
|
"eval_runtime": 1082.157, |
|
"eval_samples_per_second": 2.784, |
|
"eval_steps_per_second": 0.697, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.6557831559051965e-05, |
|
"loss": 1.3966, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_bleu": 37.8601, |
|
"eval_gen_len": 24.1235, |
|
"eval_loss": 1.8839383125305176, |
|
"eval_runtime": 1137.6484, |
|
"eval_samples_per_second": 2.648, |
|
"eval_steps_per_second": 0.663, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.640136935719069e-05, |
|
"loss": 1.415, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_bleu": 35.9576, |
|
"eval_gen_len": 23.4434, |
|
"eval_loss": 1.868781566619873, |
|
"eval_runtime": 1187.6496, |
|
"eval_samples_per_second": 2.537, |
|
"eval_steps_per_second": 0.635, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.624490715532942e-05, |
|
"loss": 1.4306, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 37.8482, |
|
"eval_gen_len": 24.2154, |
|
"eval_loss": 1.8083645105361938, |
|
"eval_runtime": 1247.7304, |
|
"eval_samples_per_second": 2.415, |
|
"eval_steps_per_second": 0.604, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.6088444953468144e-05, |
|
"loss": 1.3337, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 37.4047, |
|
"eval_gen_len": 23.692, |
|
"eval_loss": 1.798228144645691, |
|
"eval_runtime": 1200.6209, |
|
"eval_samples_per_second": 2.51, |
|
"eval_steps_per_second": 0.628, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.593198275160687e-05, |
|
"loss": 1.4188, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 36.76, |
|
"eval_gen_len": 23.5446, |
|
"eval_loss": 1.8160576820373535, |
|
"eval_runtime": 1178.8918, |
|
"eval_samples_per_second": 2.556, |
|
"eval_steps_per_second": 0.64, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.5775520549745596e-05, |
|
"loss": 1.3576, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_bleu": 37.1149, |
|
"eval_gen_len": 23.6963, |
|
"eval_loss": 1.8919538259506226, |
|
"eval_runtime": 1218.0527, |
|
"eval_samples_per_second": 2.474, |
|
"eval_steps_per_second": 0.619, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.561905834788432e-05, |
|
"loss": 1.4464, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 38.2523, |
|
"eval_gen_len": 24.2449, |
|
"eval_loss": 1.8881698846817017, |
|
"eval_runtime": 1185.5541, |
|
"eval_samples_per_second": 2.541, |
|
"eval_steps_per_second": 0.636, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.546259614602305e-05, |
|
"loss": 1.3548, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 37.0608, |
|
"eval_gen_len": 23.6077, |
|
"eval_loss": 1.8448089361190796, |
|
"eval_runtime": 1093.1892, |
|
"eval_samples_per_second": 2.756, |
|
"eval_steps_per_second": 0.69, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.5306133944161775e-05, |
|
"loss": 1.3779, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_bleu": 37.2558, |
|
"eval_gen_len": 23.5888, |
|
"eval_loss": 1.868304967880249, |
|
"eval_runtime": 1104.5613, |
|
"eval_samples_per_second": 2.728, |
|
"eval_steps_per_second": 0.683, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.51496717423005e-05, |
|
"loss": 1.3565, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 38.3579, |
|
"eval_gen_len": 24.3107, |
|
"eval_loss": 1.9323915243148804, |
|
"eval_runtime": 1143.8823, |
|
"eval_samples_per_second": 2.634, |
|
"eval_steps_per_second": 0.659, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.499320954043923e-05, |
|
"loss": 1.3474, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 37.9665, |
|
"eval_gen_len": 23.8453, |
|
"eval_loss": 1.8365575075149536, |
|
"eval_runtime": 1107.7008, |
|
"eval_samples_per_second": 2.72, |
|
"eval_steps_per_second": 0.681, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.4836747338577954e-05, |
|
"loss": 1.3492, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_bleu": 37.6124, |
|
"eval_gen_len": 23.7939, |
|
"eval_loss": 1.8358746767044067, |
|
"eval_runtime": 1110.7352, |
|
"eval_samples_per_second": 2.713, |
|
"eval_steps_per_second": 0.679, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.468028513671668e-05, |
|
"loss": 1.3347, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 37.7852, |
|
"eval_gen_len": 23.8994, |
|
"eval_loss": 1.7998509407043457, |
|
"eval_runtime": 1113.2105, |
|
"eval_samples_per_second": 2.707, |
|
"eval_steps_per_second": 0.677, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.45238229348554e-05, |
|
"loss": 1.3361, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 37.3385, |
|
"eval_gen_len": 23.6077, |
|
"eval_loss": 1.849636197090149, |
|
"eval_runtime": 1105.7834, |
|
"eval_samples_per_second": 2.725, |
|
"eval_steps_per_second": 0.682, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.4367360732994125e-05, |
|
"loss": 1.3204, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_bleu": 36.8887, |
|
"eval_gen_len": 23.385, |
|
"eval_loss": 1.8504753112792969, |
|
"eval_runtime": 1074.3508, |
|
"eval_samples_per_second": 2.804, |
|
"eval_steps_per_second": 0.702, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.421089853113285e-05, |
|
"loss": 1.3234, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 36.6707, |
|
"eval_gen_len": 23.2629, |
|
"eval_loss": 1.8434782028198242, |
|
"eval_runtime": 1075.4498, |
|
"eval_samples_per_second": 2.802, |
|
"eval_steps_per_second": 0.701, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.405443632927158e-05, |
|
"loss": 1.3189, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 36.6831, |
|
"eval_gen_len": 23.6163, |
|
"eval_loss": 1.8203562498092651, |
|
"eval_runtime": 1116.9517, |
|
"eval_samples_per_second": 2.698, |
|
"eval_steps_per_second": 0.675, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.3897974127410304e-05, |
|
"loss": 1.2984, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_bleu": 36.9382, |
|
"eval_gen_len": 23.699, |
|
"eval_loss": 1.7791001796722412, |
|
"eval_runtime": 1129.6793, |
|
"eval_samples_per_second": 2.667, |
|
"eval_steps_per_second": 0.667, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.374151192554903e-05, |
|
"loss": 1.2913, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 38.155, |
|
"eval_gen_len": 24.1394, |
|
"eval_loss": 1.8548424243927002, |
|
"eval_runtime": 1126.2716, |
|
"eval_samples_per_second": 2.675, |
|
"eval_steps_per_second": 0.669, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.3585049723687756e-05, |
|
"loss": 1.3222, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 37.9634, |
|
"eval_gen_len": 23.9492, |
|
"eval_loss": 1.8165639638900757, |
|
"eval_runtime": 1117.1297, |
|
"eval_samples_per_second": 2.697, |
|
"eval_steps_per_second": 0.675, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.342858752182648e-05, |
|
"loss": 1.3223, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 37.6942, |
|
"eval_gen_len": 24.0, |
|
"eval_loss": 1.8118115663528442, |
|
"eval_runtime": 1118.3371, |
|
"eval_samples_per_second": 2.694, |
|
"eval_steps_per_second": 0.674, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.327212531996521e-05, |
|
"loss": 1.3134, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_bleu": 37.7975, |
|
"eval_gen_len": 23.9004, |
|
"eval_loss": 1.8295111656188965, |
|
"eval_runtime": 1106.5041, |
|
"eval_samples_per_second": 2.723, |
|
"eval_steps_per_second": 0.681, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.3115663118103935e-05, |
|
"loss": 1.3045, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 37.7737, |
|
"eval_gen_len": 23.9373, |
|
"eval_loss": 1.8213391304016113, |
|
"eval_runtime": 1127.6192, |
|
"eval_samples_per_second": 2.672, |
|
"eval_steps_per_second": 0.669, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.2959200916242654e-05, |
|
"loss": 1.3111, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 37.8615, |
|
"eval_gen_len": 23.9519, |
|
"eval_loss": 1.8346147537231445, |
|
"eval_runtime": 1143.7572, |
|
"eval_samples_per_second": 2.634, |
|
"eval_steps_per_second": 0.659, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.280273871438138e-05, |
|
"loss": 1.3346, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_bleu": 38.1234, |
|
"eval_gen_len": 24.1049, |
|
"eval_loss": 1.7679733037948608, |
|
"eval_runtime": 1113.7045, |
|
"eval_samples_per_second": 2.705, |
|
"eval_steps_per_second": 0.677, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.264627651252011e-05, |
|
"loss": 1.2851, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 38.1328, |
|
"eval_gen_len": 24.1069, |
|
"eval_loss": 1.801455020904541, |
|
"eval_runtime": 1136.226, |
|
"eval_samples_per_second": 2.652, |
|
"eval_steps_per_second": 0.664, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.248981431065883e-05, |
|
"loss": 1.2732, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 37.8569, |
|
"eval_gen_len": 23.8579, |
|
"eval_loss": 1.8218289613723755, |
|
"eval_runtime": 1124.2084, |
|
"eval_samples_per_second": 2.68, |
|
"eval_steps_per_second": 0.671, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.233335210879756e-05, |
|
"loss": 1.2666, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_bleu": 37.3143, |
|
"eval_gen_len": 24.2327, |
|
"eval_loss": 1.8298362493515015, |
|
"eval_runtime": 1169.4328, |
|
"eval_samples_per_second": 2.576, |
|
"eval_steps_per_second": 0.645, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.2176889906936285e-05, |
|
"loss": 1.248, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 37.4488, |
|
"eval_gen_len": 24.0846, |
|
"eval_loss": 1.7926667928695679, |
|
"eval_runtime": 1136.492, |
|
"eval_samples_per_second": 2.651, |
|
"eval_steps_per_second": 0.663, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.202042770507501e-05, |
|
"loss": 1.2586, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 36.9098, |
|
"eval_gen_len": 23.5061, |
|
"eval_loss": 1.8259001970291138, |
|
"eval_runtime": 1094.3744, |
|
"eval_samples_per_second": 2.753, |
|
"eval_steps_per_second": 0.689, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.186396550321374e-05, |
|
"loss": 1.2305, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_bleu": 37.6963, |
|
"eval_gen_len": 23.9618, |
|
"eval_loss": 1.7810018062591553, |
|
"eval_runtime": 1112.8793, |
|
"eval_samples_per_second": 2.707, |
|
"eval_steps_per_second": 0.678, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.1707503301352464e-05, |
|
"loss": 1.2435, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 38.219, |
|
"eval_gen_len": 24.2695, |
|
"eval_loss": 1.8444660902023315, |
|
"eval_runtime": 1136.4697, |
|
"eval_samples_per_second": 2.651, |
|
"eval_steps_per_second": 0.663, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.155104109949119e-05, |
|
"loss": 1.2681, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 38.0339, |
|
"eval_gen_len": 23.8065, |
|
"eval_loss": 1.8042678833007812, |
|
"eval_runtime": 1110.1648, |
|
"eval_samples_per_second": 2.714, |
|
"eval_steps_per_second": 0.679, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.139457889762991e-05, |
|
"loss": 1.2581, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_bleu": 36.7337, |
|
"eval_gen_len": 23.6279, |
|
"eval_loss": 1.7898603677749634, |
|
"eval_runtime": 1101.1993, |
|
"eval_samples_per_second": 2.736, |
|
"eval_steps_per_second": 0.685, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.1238116695768636e-05, |
|
"loss": 1.2476, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 37.5418, |
|
"eval_gen_len": 23.7527, |
|
"eval_loss": 1.851403832435608, |
|
"eval_runtime": 1100.3622, |
|
"eval_samples_per_second": 2.738, |
|
"eval_steps_per_second": 0.685, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.108165449390736e-05, |
|
"loss": 1.2778, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 36.9893, |
|
"eval_gen_len": 23.8487, |
|
"eval_loss": 1.7635945081710815, |
|
"eval_runtime": 1113.9066, |
|
"eval_samples_per_second": 2.705, |
|
"eval_steps_per_second": 0.677, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.092519229204609e-05, |
|
"loss": 1.2335, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 38.4148, |
|
"eval_gen_len": 24.0743, |
|
"eval_loss": 1.7612364292144775, |
|
"eval_runtime": 1118.162, |
|
"eval_samples_per_second": 2.695, |
|
"eval_steps_per_second": 0.674, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.0768730090184814e-05, |
|
"loss": 1.2229, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_bleu": 38.387, |
|
"eval_gen_len": 24.1324, |
|
"eval_loss": 1.724638819694519, |
|
"eval_runtime": 1133.5566, |
|
"eval_samples_per_second": 2.658, |
|
"eval_steps_per_second": 0.665, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.061226788832354e-05, |
|
"loss": 1.198, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 38.2916, |
|
"eval_gen_len": 23.918, |
|
"eval_loss": 1.7657166719436646, |
|
"eval_runtime": 1258.8163, |
|
"eval_samples_per_second": 2.394, |
|
"eval_steps_per_second": 0.599, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.045580568646227e-05, |
|
"loss": 1.281, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 36.5976, |
|
"eval_gen_len": 23.24, |
|
"eval_loss": 1.758962631225586, |
|
"eval_runtime": 1321.212, |
|
"eval_samples_per_second": 2.28, |
|
"eval_steps_per_second": 0.571, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.029934348460099e-05, |
|
"loss": 1.231, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_bleu": 37.8396, |
|
"eval_gen_len": 23.7385, |
|
"eval_loss": 1.733566164970398, |
|
"eval_runtime": 1419.0837, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 0.531, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.014288128273972e-05, |
|
"loss": 1.2014, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 37.8945, |
|
"eval_gen_len": 23.8025, |
|
"eval_loss": 1.7839374542236328, |
|
"eval_runtime": 1450.5502, |
|
"eval_samples_per_second": 2.077, |
|
"eval_steps_per_second": 0.52, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9986419080878445e-05, |
|
"loss": 1.2295, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 37.9001, |
|
"eval_gen_len": 23.92, |
|
"eval_loss": 1.757741928100586, |
|
"eval_runtime": 1476.1175, |
|
"eval_samples_per_second": 2.041, |
|
"eval_steps_per_second": 0.511, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3.9829956879017165e-05, |
|
"loss": 1.2154, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_bleu": 37.5283, |
|
"eval_gen_len": 23.537, |
|
"eval_loss": 1.80224609375, |
|
"eval_runtime": 1473.4024, |
|
"eval_samples_per_second": 2.045, |
|
"eval_steps_per_second": 0.512, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.967349467715589e-05, |
|
"loss": 1.1956, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 38.5709, |
|
"eval_gen_len": 24.0189, |
|
"eval_loss": 1.7815015316009521, |
|
"eval_runtime": 1398.0131, |
|
"eval_samples_per_second": 2.155, |
|
"eval_steps_per_second": 0.539, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.951703247529462e-05, |
|
"loss": 1.2045, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 37.2706, |
|
"eval_gen_len": 23.2522, |
|
"eval_loss": 1.7592459917068481, |
|
"eval_runtime": 1445.8551, |
|
"eval_samples_per_second": 2.084, |
|
"eval_steps_per_second": 0.521, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 3.9360570273433344e-05, |
|
"loss": 1.1796, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_bleu": 38.5198, |
|
"eval_gen_len": 24.078, |
|
"eval_loss": 1.8075897693634033, |
|
"eval_runtime": 1528.5032, |
|
"eval_samples_per_second": 1.971, |
|
"eval_steps_per_second": 0.493, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.920410807157207e-05, |
|
"loss": 1.226, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 38.1729, |
|
"eval_gen_len": 23.9038, |
|
"eval_loss": 1.731508493423462, |
|
"eval_runtime": 1501.9126, |
|
"eval_samples_per_second": 2.006, |
|
"eval_steps_per_second": 0.502, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.9047645869710796e-05, |
|
"loss": 1.2304, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 37.8875, |
|
"eval_gen_len": 23.8493, |
|
"eval_loss": 1.7690067291259766, |
|
"eval_runtime": 1584.8349, |
|
"eval_samples_per_second": 1.901, |
|
"eval_steps_per_second": 0.476, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 3.889118366784952e-05, |
|
"loss": 1.197, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 37.7553, |
|
"eval_gen_len": 23.9353, |
|
"eval_loss": 1.7940903902053833, |
|
"eval_runtime": 1636.4218, |
|
"eval_samples_per_second": 1.841, |
|
"eval_steps_per_second": 0.461, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.873472146598825e-05, |
|
"loss": 1.1844, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 38.3847, |
|
"eval_gen_len": 23.9668, |
|
"eval_loss": 1.7370097637176514, |
|
"eval_runtime": 1586.621, |
|
"eval_samples_per_second": 1.899, |
|
"eval_steps_per_second": 0.475, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.8578259264126975e-05, |
|
"loss": 1.2099, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 38.4795, |
|
"eval_gen_len": 23.932, |
|
"eval_loss": 1.7586994171142578, |
|
"eval_runtime": 1530.6865, |
|
"eval_samples_per_second": 1.968, |
|
"eval_steps_per_second": 0.493, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.84217970622657e-05, |
|
"loss": 1.1798, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 37.463, |
|
"eval_gen_len": 23.6664, |
|
"eval_loss": 1.752753496170044, |
|
"eval_runtime": 1408.4249, |
|
"eval_samples_per_second": 2.139, |
|
"eval_steps_per_second": 0.535, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 3.826533486040442e-05, |
|
"loss": 1.1959, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_bleu": 38.1287, |
|
"eval_gen_len": 23.7693, |
|
"eval_loss": 1.7170414924621582, |
|
"eval_runtime": 1482.0293, |
|
"eval_samples_per_second": 2.033, |
|
"eval_steps_per_second": 0.509, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.8108872658543146e-05, |
|
"loss": 1.2061, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 38.8039, |
|
"eval_gen_len": 23.9973, |
|
"eval_loss": 1.731540322303772, |
|
"eval_runtime": 1484.0478, |
|
"eval_samples_per_second": 2.03, |
|
"eval_steps_per_second": 0.508, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.795241045668187e-05, |
|
"loss": 1.1606, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 37.93, |
|
"eval_gen_len": 23.8347, |
|
"eval_loss": 1.7204748392105103, |
|
"eval_runtime": 1527.1349, |
|
"eval_samples_per_second": 1.973, |
|
"eval_steps_per_second": 0.494, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 3.77959482548206e-05, |
|
"loss": 1.1605, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_bleu": 38.2637, |
|
"eval_gen_len": 24.2688, |
|
"eval_loss": 1.7610867023468018, |
|
"eval_runtime": 1550.9436, |
|
"eval_samples_per_second": 1.943, |
|
"eval_steps_per_second": 0.486, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.7639486052959325e-05, |
|
"loss": 1.1792, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 38.3329, |
|
"eval_gen_len": 24.088, |
|
"eval_loss": 1.728115200996399, |
|
"eval_runtime": 1598.5717, |
|
"eval_samples_per_second": 1.885, |
|
"eval_steps_per_second": 0.472, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 3.748302385109805e-05, |
|
"loss": 1.1613, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_bleu": 38.7357, |
|
"eval_gen_len": 24.1019, |
|
"eval_loss": 1.7274950742721558, |
|
"eval_runtime": 1552.6289, |
|
"eval_samples_per_second": 1.941, |
|
"eval_steps_per_second": 0.486, |
|
"step": 40000 |
|
} |
|
], |
|
"max_steps": 159783, |
|
"num_train_epochs": 1, |
|
"total_flos": 6683673699483648.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|