|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"global_step": 43890, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002965823650034176, |
|
"loss": 3.7122, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00029316473000683526, |
|
"loss": 1.7581, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002897470950102529, |
|
"loss": 1.5341, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00028632946001367054, |
|
"loss": 1.4748, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002829118250170881, |
|
"loss": 1.3464, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002794941900205058, |
|
"loss": 1.2892, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0002760765550239234, |
|
"loss": 1.2375, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00027265892002734105, |
|
"loss": 1.2408, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0002692412850307587, |
|
"loss": 1.1577, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002658236500341763, |
|
"loss": 1.1164, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_gen_len": 4.541, |
|
"eval_loss": 0.8244166970252991, |
|
"eval_rouge1": 66.4678, |
|
"eval_rouge2": 35.3554, |
|
"eval_rougeL": 66.4543, |
|
"eval_rougeLsum": 66.4522, |
|
"eval_runtime": 109.1952, |
|
"eval_samples_per_second": 36.632, |
|
"eval_steps_per_second": 4.579, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00026240601503759397, |
|
"loss": 1.1345, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0002589883800410116, |
|
"loss": 1.1116, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00025557074504442925, |
|
"loss": 1.0336, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0002521531100478469, |
|
"loss": 0.9108, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0002487354750512645, |
|
"loss": 0.92, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0002453178400546821, |
|
"loss": 0.9071, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00024190020505809978, |
|
"loss": 0.8936, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0002384825700615174, |
|
"loss": 0.888, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00023506493506493504, |
|
"loss": 0.8847, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00023164730006835268, |
|
"loss": 0.9097, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_gen_len": 4.5548, |
|
"eval_loss": 0.7299422025680542, |
|
"eval_rouge1": 70.0574, |
|
"eval_rouge2": 37.5535, |
|
"eval_rougeL": 69.9512, |
|
"eval_rougeLsum": 70.0084, |
|
"eval_runtime": 106.5141, |
|
"eval_samples_per_second": 37.554, |
|
"eval_steps_per_second": 4.694, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00022822966507177032, |
|
"loss": 0.8895, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00022481203007518796, |
|
"loss": 0.8544, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0002213943950786056, |
|
"loss": 0.8807, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00021797676008202322, |
|
"loss": 0.8451, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00021455912508544086, |
|
"loss": 0.82, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00021114149008885847, |
|
"loss": 0.6878, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00020772385509227614, |
|
"loss": 0.6759, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00020430622009569378, |
|
"loss": 0.6998, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.0002008885850991114, |
|
"loss": 0.6751, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00019747095010252903, |
|
"loss": 0.6637, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_gen_len": 4.703, |
|
"eval_loss": 0.7314157485961914, |
|
"eval_rouge1": 72.0767, |
|
"eval_rouge2": 39.2263, |
|
"eval_rougeL": 72.0257, |
|
"eval_rougeLsum": 72.0473, |
|
"eval_runtime": 110.4087, |
|
"eval_samples_per_second": 36.229, |
|
"eval_steps_per_second": 4.529, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00019405331510594667, |
|
"loss": 0.6698, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0001906356801093643, |
|
"loss": 0.672, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00018721804511278195, |
|
"loss": 0.6431, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00018380041011619957, |
|
"loss": 0.6653, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001803827751196172, |
|
"loss": 0.6824, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00017696514012303485, |
|
"loss": 0.6668, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00017354750512645246, |
|
"loss": 0.6318, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.0001701298701298701, |
|
"loss": 0.5934, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 0.00016671223513328777, |
|
"loss": 0.5086, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 0.00016329460013670539, |
|
"loss": 0.5015, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_gen_len": 4.75, |
|
"eval_loss": 0.7147404551506042, |
|
"eval_rouge1": 73.0185, |
|
"eval_rouge2": 39.9998, |
|
"eval_rougeL": 72.9347, |
|
"eval_rougeLsum": 72.9576, |
|
"eval_runtime": 106.9579, |
|
"eval_samples_per_second": 37.398, |
|
"eval_steps_per_second": 4.675, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 0.00015987696514012303, |
|
"loss": 0.4909, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00015645933014354064, |
|
"loss": 0.5114, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00015304169514695828, |
|
"loss": 0.5314, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 0.00014962406015037592, |
|
"loss": 0.5089, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00014620642515379356, |
|
"loss": 0.5133, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 0.0001427887901572112, |
|
"loss": 0.5057, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 0.00013937115516062882, |
|
"loss": 0.5181, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00013595352016404648, |
|
"loss": 0.4826, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 0.0001325358851674641, |
|
"loss": 0.497, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"learning_rate": 0.00012911825017088174, |
|
"loss": 0.5101, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_gen_len": 4.8728, |
|
"eval_loss": 0.7054756283760071, |
|
"eval_rouge1": 73.7898, |
|
"eval_rouge2": 40.5481, |
|
"eval_rougeL": 73.7235, |
|
"eval_rougeLsum": 73.7901, |
|
"eval_runtime": 110.0456, |
|
"eval_samples_per_second": 36.349, |
|
"eval_steps_per_second": 4.544, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 4.07, |
|
"learning_rate": 0.00012570061517429938, |
|
"loss": 0.3961, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 0.00012228298017771702, |
|
"loss": 0.3725, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 0.00011886534518113465, |
|
"loss": 0.3698, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 0.00011544771018455227, |
|
"loss": 0.3946, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 0.00011203007518796991, |
|
"loss": 0.4009, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 0.00010861244019138756, |
|
"loss": 0.391, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 0.00010519480519480518, |
|
"loss": 0.3787, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 0.00010177717019822282, |
|
"loss": 0.3736, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 9.835953520164045e-05, |
|
"loss": 0.3842, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 9.494190020505809e-05, |
|
"loss": 0.3903, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_gen_len": 4.5938, |
|
"eval_loss": 0.7442232370376587, |
|
"eval_rouge1": 74.0845, |
|
"eval_rouge2": 39.9841, |
|
"eval_rougeL": 74.0172, |
|
"eval_rougeLsum": 74.0635, |
|
"eval_runtime": 110.762, |
|
"eval_samples_per_second": 36.113, |
|
"eval_steps_per_second": 4.514, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 9.152426520847573e-05, |
|
"loss": 0.3945, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 8.810663021189336e-05, |
|
"loss": 0.4128, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"learning_rate": 8.468899521531099e-05, |
|
"loss": 0.363, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 8.127136021872864e-05, |
|
"loss": 0.3144, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 7.785372522214627e-05, |
|
"loss": 0.3106, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.44360902255639e-05, |
|
"loss": 0.2982, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 7.101845522898154e-05, |
|
"loss": 0.3016, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 6.760082023239918e-05, |
|
"loss": 0.3095, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 6.41831852358168e-05, |
|
"loss": 0.2863, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 6.076555023923445e-05, |
|
"loss": 0.2993, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"eval_gen_len": 4.7412, |
|
"eval_loss": 0.8183711171150208, |
|
"eval_rouge1": 73.8405, |
|
"eval_rouge2": 40.2569, |
|
"eval_rougeL": 73.7756, |
|
"eval_rougeLsum": 73.7972, |
|
"eval_runtime": 109.7934, |
|
"eval_samples_per_second": 36.432, |
|
"eval_steps_per_second": 4.554, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 5.734791524265208e-05, |
|
"loss": 0.2975, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 5.393028024606972e-05, |
|
"loss": 0.3014, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 5.051264524948735e-05, |
|
"loss": 0.2996, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 4.7095010252904986e-05, |
|
"loss": 0.3068, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"learning_rate": 4.367737525632262e-05, |
|
"loss": 0.2993, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.025974025974026e-05, |
|
"loss": 0.2447, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.684210526315789e-05, |
|
"loss": 0.2379, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 3.342447026657553e-05, |
|
"loss": 0.2447, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 3.0006835269993163e-05, |
|
"loss": 0.2452, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.65892002734108e-05, |
|
"loss": 0.2227, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"eval_gen_len": 4.742, |
|
"eval_loss": 0.8277584910392761, |
|
"eval_rouge1": 74.0159, |
|
"eval_rouge2": 40.6403, |
|
"eval_rougeL": 73.9412, |
|
"eval_rougeLsum": 73.9722, |
|
"eval_runtime": 108.0867, |
|
"eval_samples_per_second": 37.007, |
|
"eval_steps_per_second": 4.626, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"learning_rate": 2.3171565276828434e-05, |
|
"loss": 0.2331, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.9753930280246068e-05, |
|
"loss": 0.2374, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.6336295283663705e-05, |
|
"loss": 0.2462, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.2918660287081339e-05, |
|
"loss": 0.24, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 9.501025290498975e-06, |
|
"loss": 0.2217, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 6.083390293916609e-06, |
|
"loss": 0.2283, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 2.6657552973342446e-06, |
|
"loss": 0.2345, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 43890, |
|
"total_flos": 8.113234147780608e+16, |
|
"train_loss": 0.639304427944139, |
|
"train_runtime": 13849.1505, |
|
"train_samples_per_second": 25.353, |
|
"train_steps_per_second": 3.169 |
|
} |
|
], |
|
"max_steps": 43890, |
|
"num_train_epochs": 7, |
|
"total_flos": 8.113234147780608e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|