|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 24489, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.959165339540202e-05, |
|
"loss": 3.1241, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9183306790804037e-05, |
|
"loss": 2.5382, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8774960186206054e-05, |
|
"loss": 2.2802, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.8366613581608072e-05, |
|
"loss": 2.0981, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.795826697701009e-05, |
|
"loss": 1.9806, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7549920372412107e-05, |
|
"loss": 1.8749, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.714157376781412e-05, |
|
"loss": 1.7845, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.673322716321614e-05, |
|
"loss": 1.7693, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.6324880558618156e-05, |
|
"loss": 1.6563, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5916533954020174e-05, |
|
"loss": 1.6036, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.550818734942219e-05, |
|
"loss": 1.572, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 1.5099840744824207e-05, |
|
"loss": 1.547, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.4691494140226225e-05, |
|
"loss": 1.5027, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.4283147535628244e-05, |
|
"loss": 1.4847, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3874800931030258e-05, |
|
"loss": 1.4495, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.3466454326432276e-05, |
|
"loss": 1.4558, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.3058107721834295e-05, |
|
"loss": 1.3089, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.2649761117236313e-05, |
|
"loss": 1.2217, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.2241414512638327e-05, |
|
"loss": 1.255, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.1833067908040346e-05, |
|
"loss": 1.1977, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1424721303442364e-05, |
|
"loss": 1.209, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.1016374698844381e-05, |
|
"loss": 1.1799, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.0608028094246397e-05, |
|
"loss": 1.1807, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.0199681489648415e-05, |
|
"loss": 1.1597, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.791334885050432e-06, |
|
"loss": 1.1603, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 9.382988280452448e-06, |
|
"loss": 1.1847, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.974641675854466e-06, |
|
"loss": 1.1543, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 8.566295071256483e-06, |
|
"loss": 1.1336, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 8.157948466658501e-06, |
|
"loss": 1.1141, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 7.749601862060517e-06, |
|
"loss": 1.1361, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.341255257462535e-06, |
|
"loss": 1.142, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 6.932908652864552e-06, |
|
"loss": 1.1296, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 6.5245620482665695e-06, |
|
"loss": 1.0464, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 6.116215443668586e-06, |
|
"loss": 0.9281, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.707868839070604e-06, |
|
"loss": 0.9222, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2995222344726205e-06, |
|
"loss": 0.9243, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 4.891175629874638e-06, |
|
"loss": 0.93, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 4.482829025276656e-06, |
|
"loss": 0.9329, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.074482420678672e-06, |
|
"loss": 0.9151, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.66613581608069e-06, |
|
"loss": 0.8925, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.257789211482707e-06, |
|
"loss": 0.9195, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.849442606884724e-06, |
|
"loss": 0.9131, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.441096002286741e-06, |
|
"loss": 0.8965, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.0327493976887585e-06, |
|
"loss": 0.9158, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 1.6244027930907754e-06, |
|
"loss": 0.9219, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2160561884927927e-06, |
|
"loss": 0.9128, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 8.0770958389481e-07, |
|
"loss": 0.9106, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.993629792968272e-07, |
|
"loss": 0.9122, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 24489, |
|
"total_flos": 9.597903475284864e+16, |
|
"train_loss": 1.3127011176813441, |
|
"train_runtime": 19384.6994, |
|
"train_samples_per_second": 20.212, |
|
"train_steps_per_second": 1.263 |
|
} |
|
], |
|
"max_steps": 24489, |
|
"num_train_epochs": 3, |
|
"total_flos": 9.597903475284864e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|