|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 184555, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 4.9865351792148686e-05, |
|
"loss": 8.822, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9729890818455207e-05, |
|
"loss": 8.316, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.959442984476173e-05, |
|
"loss": 8.2016, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.945896887106825e-05, |
|
"loss": 8.095, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 4.9323507897374774e-05, |
|
"loss": 7.977, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.9188046923681294e-05, |
|
"loss": 7.9151, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.9052585949987814e-05, |
|
"loss": 7.8304, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8917124976294334e-05, |
|
"loss": 7.7661, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.8781664002600854e-05, |
|
"loss": 7.6552, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.8646203028907374e-05, |
|
"loss": 7.5958, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.8510742055213895e-05, |
|
"loss": 7.5298, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.837528108152042e-05, |
|
"loss": 7.4355, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.824036195172171e-05, |
|
"loss": 7.3976, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.810490097802823e-05, |
|
"loss": 7.3092, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.796971092628214e-05, |
|
"loss": 7.2767, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.783424995258866e-05, |
|
"loss": 7.2137, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.769878897889518e-05, |
|
"loss": 7.1265, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.75633280052017e-05, |
|
"loss": 7.0906, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.7427867031508226e-05, |
|
"loss": 7.0238, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.7292406057814747e-05, |
|
"loss": 6.9584, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.715694508412127e-05, |
|
"loss": 6.9311, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.702148411042779e-05, |
|
"loss": 6.8991, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.688602313673431e-05, |
|
"loss": 6.8189, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.675083308498821e-05, |
|
"loss": 6.7935, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.661537211129473e-05, |
|
"loss": 6.7746, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.647991113760126e-05, |
|
"loss": 6.6974, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.634445016390778e-05, |
|
"loss": 6.7058, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.62089891902143e-05, |
|
"loss": 6.658, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.607352821652082e-05, |
|
"loss": 6.6636, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.593806724282734e-05, |
|
"loss": 6.6163, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.580260626913386e-05, |
|
"loss": 6.5785, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.566714529544038e-05, |
|
"loss": 6.5564, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.5531955243694293e-05, |
|
"loss": 6.5223, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.5396494270000814e-05, |
|
"loss": 6.5068, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.5261033296307334e-05, |
|
"loss": 6.4513, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.5125572322613854e-05, |
|
"loss": 6.4563, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.499011134892038e-05, |
|
"loss": 6.4462, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.485519221912167e-05, |
|
"loss": 6.4118, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.471973124542819e-05, |
|
"loss": 6.3876, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.458427027173471e-05, |
|
"loss": 6.3519, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.444880929804124e-05, |
|
"loss": 6.3444, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.431334832434776e-05, |
|
"loss": 6.323, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 4.417788735065428e-05, |
|
"loss": 6.3065, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.40424263769608e-05, |
|
"loss": 6.2821, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.390696540326732e-05, |
|
"loss": 6.2683, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.377150442957384e-05, |
|
"loss": 6.2759, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.3636314377827746e-05, |
|
"loss": 6.2454, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.350085340413427e-05, |
|
"loss": 6.195, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.336539243044079e-05, |
|
"loss": 6.1899, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.322993145674731e-05, |
|
"loss": 6.1767, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.3094470483053833e-05, |
|
"loss": 6.1696, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.295928043130774e-05, |
|
"loss": 6.1583, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 4.282381945761426e-05, |
|
"loss": 6.1395, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.268835848392078e-05, |
|
"loss": 6.0983, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 4.2552897510227306e-05, |
|
"loss": 6.0714, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.241743653653383e-05, |
|
"loss": 6.0803, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.228197556284035e-05, |
|
"loss": 6.0694, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.214678551109425e-05, |
|
"loss": 6.0678, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.201132453740077e-05, |
|
"loss": 6.0277, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.187586356370729e-05, |
|
"loss": 6.0379, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.174040259001381e-05, |
|
"loss": 5.9916, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4.160521253826773e-05, |
|
"loss": 6.0088, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.146975156457425e-05, |
|
"loss": 5.9999, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.133429059088077e-05, |
|
"loss": 6.0088, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.119882961718729e-05, |
|
"loss": 5.962, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 4.106336864349381e-05, |
|
"loss": 5.9289, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.092790766980033e-05, |
|
"loss": 5.9553, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 4.079271761805424e-05, |
|
"loss": 5.9288, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.065752756630815e-05, |
|
"loss": 5.9114, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.052206659261467e-05, |
|
"loss": 5.9116, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.038687654086858e-05, |
|
"loss": 5.9009, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.02514155671751e-05, |
|
"loss": 5.8766, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.011595459348162e-05, |
|
"loss": 5.8671, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.998049361978814e-05, |
|
"loss": 5.8468, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.984503264609466e-05, |
|
"loss": 5.8158, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.9709571672401185e-05, |
|
"loss": 5.8173, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 3.9574110698707705e-05, |
|
"loss": 5.8186, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 3.9438649725014225e-05, |
|
"loss": 5.7901, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.9303188751320746e-05, |
|
"loss": 5.7937, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.9167727777627266e-05, |
|
"loss": 5.7851, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.9032266803933786e-05, |
|
"loss": 5.7665, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.8896805830240306e-05, |
|
"loss": 5.7412, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.876134485654683e-05, |
|
"loss": 5.7565, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 3.862615480480074e-05, |
|
"loss": 5.7464, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.849069383110726e-05, |
|
"loss": 5.754, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.835523285741378e-05, |
|
"loss": 5.7239, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.82197718837203e-05, |
|
"loss": 5.7001, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.808431091002682e-05, |
|
"loss": 5.7204, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 3.794912085828073e-05, |
|
"loss": 5.7096, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 3.781365988458725e-05, |
|
"loss": 5.7, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 3.767819891089377e-05, |
|
"loss": 5.6793, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 3.754273793720029e-05, |
|
"loss": 5.6423, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 3.740727696350681e-05, |
|
"loss": 5.6658, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 3.7272086911760725e-05, |
|
"loss": 5.6413, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 3.7136625938067245e-05, |
|
"loss": 5.6256, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 3.7001164964373765e-05, |
|
"loss": 5.6421, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 3.686570399068029e-05, |
|
"loss": 5.6292, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 3.67305139389342e-05, |
|
"loss": 5.6188, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 3.659505296524072e-05, |
|
"loss": 5.6004, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 3.645959199154724e-05, |
|
"loss": 5.6285, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 3.632413101785376e-05, |
|
"loss": 5.5817, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 3.618867004416028e-05, |
|
"loss": 5.6021, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 3.60532090704668e-05, |
|
"loss": 5.5992, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 3.5917748096773326e-05, |
|
"loss": 5.5771, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 3.578255804502723e-05, |
|
"loss": 5.5678, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 3.564709707133375e-05, |
|
"loss": 5.5662, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 3.551163609764027e-05, |
|
"loss": 5.5427, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 3.537617512394679e-05, |
|
"loss": 5.5281, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.52409850722007e-05, |
|
"loss": 5.5256, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 3.510552409850722e-05, |
|
"loss": 5.52, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 3.4970063124813745e-05, |
|
"loss": 5.5423, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.4834602151120265e-05, |
|
"loss": 5.5106, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.4699141177426785e-05, |
|
"loss": 5.5255, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 3.4563680203733305e-05, |
|
"loss": 5.5081, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.4428219230039826e-05, |
|
"loss": 5.4839, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 3.4292758256346346e-05, |
|
"loss": 5.4764, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.4157839126547644e-05, |
|
"loss": 5.478, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.4022378152854164e-05, |
|
"loss": 5.4977, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.3886917179160684e-05, |
|
"loss": 5.4547, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 3.3751456205467204e-05, |
|
"loss": 5.4656, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.361599523177373e-05, |
|
"loss": 5.4723, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.348080518002764e-05, |
|
"loss": 5.4581, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 3.334534420633416e-05, |
|
"loss": 5.4571, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 3.320988323264068e-05, |
|
"loss": 5.4361, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.3074693180894583e-05, |
|
"loss": 5.4347, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.293923220720111e-05, |
|
"loss": 5.4383, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 3.280377123350763e-05, |
|
"loss": 5.413, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.266831025981415e-05, |
|
"loss": 5.4095, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 3.253284928612067e-05, |
|
"loss": 5.408, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 3.2397659234374577e-05, |
|
"loss": 5.3968, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.22621982606811e-05, |
|
"loss": 5.3889, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.212673728698762e-05, |
|
"loss": 5.3855, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 3.1991276313294144e-05, |
|
"loss": 5.3925, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.1855815339600664e-05, |
|
"loss": 5.3731, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 3.1720354365907184e-05, |
|
"loss": 5.3555, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.1584893392213704e-05, |
|
"loss": 5.3598, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.1449432418520224e-05, |
|
"loss": 5.3567, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.1313971444826745e-05, |
|
"loss": 5.3744, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 3.117878139308065e-05, |
|
"loss": 5.3452, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.104332041938718e-05, |
|
"loss": 5.35, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.09078594456937e-05, |
|
"loss": 5.3457, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.077239847200022e-05, |
|
"loss": 5.3314, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 3.063693749830674e-05, |
|
"loss": 5.3572, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.0501747446560647e-05, |
|
"loss": 5.3013, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 3.0366286472867167e-05, |
|
"loss": 5.3205, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 3.0230825499173687e-05, |
|
"loss": 5.3093, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.0095364525480214e-05, |
|
"loss": 5.3443, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9959903551786734e-05, |
|
"loss": 5.3265, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 2.982471350004064e-05, |
|
"loss": 5.2934, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.968952344829455e-05, |
|
"loss": 5.2899, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.955406247460107e-05, |
|
"loss": 5.2758, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.941860150090759e-05, |
|
"loss": 5.2977, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 2.928314052721411e-05, |
|
"loss": 5.2775, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.9147679553520633e-05, |
|
"loss": 5.2554, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.9012218579827153e-05, |
|
"loss": 5.2733, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 2.8876757606133674e-05, |
|
"loss": 5.2688, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.8741296632440197e-05, |
|
"loss": 5.2627, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.8605835658746717e-05, |
|
"loss": 5.2376, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.8470645607000623e-05, |
|
"loss": 5.2662, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.8335184633307143e-05, |
|
"loss": 5.2381, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.8199994581561053e-05, |
|
"loss": 5.2358, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.8064533607867576e-05, |
|
"loss": 5.2339, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.7929072634174096e-05, |
|
"loss": 5.2222, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.7793611660480616e-05, |
|
"loss": 5.2319, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 2.7658150686787136e-05, |
|
"loss": 5.2433, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.7522689713093657e-05, |
|
"loss": 5.2092, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.7387228739400177e-05, |
|
"loss": 5.2139, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 2.7251767765706704e-05, |
|
"loss": 5.1814, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.7116306792013224e-05, |
|
"loss": 5.211, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.6981116740267133e-05, |
|
"loss": 5.1822, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 2.6845655766573653e-05, |
|
"loss": 5.1847, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.6710194792880173e-05, |
|
"loss": 5.1873, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.6574733819186694e-05, |
|
"loss": 5.2079, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.6439272845493214e-05, |
|
"loss": 5.1783, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 2.630408279374712e-05, |
|
"loss": 5.1551, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.6168621820053646e-05, |
|
"loss": 5.1518, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 2.6033160846360167e-05, |
|
"loss": 5.1565, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.5897699872666687e-05, |
|
"loss": 5.17, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.5762509820920593e-05, |
|
"loss": 5.153, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 2.5627048847227113e-05, |
|
"loss": 5.1538, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 2.5491587873533633e-05, |
|
"loss": 5.1384, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 2.5356126899840153e-05, |
|
"loss": 5.1408, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 2.522066592614668e-05, |
|
"loss": 5.1577, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 2.50852049524532e-05, |
|
"loss": 5.1285, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.494974397875972e-05, |
|
"loss": 5.1188, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 2.481428300506624e-05, |
|
"loss": 5.1353, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 2.467882203137276e-05, |
|
"loss": 5.1382, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 2.4543631979626673e-05, |
|
"loss": 5.1367, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.4408171005933193e-05, |
|
"loss": 5.1425, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 2.4272710032239713e-05, |
|
"loss": 5.1325, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 2.4137249058546234e-05, |
|
"loss": 5.1209, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 2.4001788084852757e-05, |
|
"loss": 5.1236, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 2.3866327111159277e-05, |
|
"loss": 5.0968, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 2.3731137059413183e-05, |
|
"loss": 5.1113, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.3595676085719707e-05, |
|
"loss": 5.0963, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 2.3460215112026227e-05, |
|
"loss": 5.1008, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 2.3324754138332747e-05, |
|
"loss": 5.0947, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.318929316463927e-05, |
|
"loss": 5.0981, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.3054103112893176e-05, |
|
"loss": 5.0859, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 2.2918913061147085e-05, |
|
"loss": 5.0686, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 2.2783452087453606e-05, |
|
"loss": 5.0733, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 2.264799111376013e-05, |
|
"loss": 5.1095, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.251253014006665e-05, |
|
"loss": 5.0707, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 2.237706916637317e-05, |
|
"loss": 5.0737, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.224160819267969e-05, |
|
"loss": 5.0509, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 2.2106147218986213e-05, |
|
"loss": 5.0901, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 2.1970686245292733e-05, |
|
"loss": 5.0635, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 2.1835225271599253e-05, |
|
"loss": 5.0665, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.1700035219853163e-05, |
|
"loss": 5.0423, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 2.1564574246159683e-05, |
|
"loss": 5.0465, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.1429113272466203e-05, |
|
"loss": 5.0422, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 2.1293923220720112e-05, |
|
"loss": 5.0851, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.1158462247026632e-05, |
|
"loss": 5.0242, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 2.1023001273333152e-05, |
|
"loss": 5.0386, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 2.0887540299639673e-05, |
|
"loss": 5.0455, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 2.0752079325946196e-05, |
|
"loss": 5.0002, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 2.0616618352252716e-05, |
|
"loss": 5.043, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 2.0481157378559236e-05, |
|
"loss": 5.012, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.0345696404865757e-05, |
|
"loss": 5.0406, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.021050635311967e-05, |
|
"loss": 5.0514, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 2.007504537942619e-05, |
|
"loss": 5.015, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 1.993958440573271e-05, |
|
"loss": 5.0416, |
|
"step": 111000 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 1.9804123432039233e-05, |
|
"loss": 4.9965, |
|
"step": 111500 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 1.9668662458345753e-05, |
|
"loss": 4.998, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.9533201484652273e-05, |
|
"loss": 4.9926, |
|
"step": 112500 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 1.9397740510958793e-05, |
|
"loss": 4.9909, |
|
"step": 113000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 1.9262550459212703e-05, |
|
"loss": 5.006, |
|
"step": 113500 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.9127089485519223e-05, |
|
"loss": 4.9905, |
|
"step": 114000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.8991628511825743e-05, |
|
"loss": 4.9792, |
|
"step": 114500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 1.8856167538132266e-05, |
|
"loss": 5.0, |
|
"step": 115000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 1.8720706564438787e-05, |
|
"loss": 4.9839, |
|
"step": 115500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.8585516512692692e-05, |
|
"loss": 4.9721, |
|
"step": 116000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.8450055538999213e-05, |
|
"loss": 4.9973, |
|
"step": 116500 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 1.8314594565305736e-05, |
|
"loss": 4.9879, |
|
"step": 117000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.8179133591612256e-05, |
|
"loss": 4.9664, |
|
"step": 117500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.8043672617918776e-05, |
|
"loss": 4.994, |
|
"step": 118000 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 1.7908482566172686e-05, |
|
"loss": 4.9679, |
|
"step": 118500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 1.777302159247921e-05, |
|
"loss": 4.9841, |
|
"step": 119000 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 1.763756061878573e-05, |
|
"loss": 4.9488, |
|
"step": 119500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.750209964509225e-05, |
|
"loss": 4.9523, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 1.7366638671398773e-05, |
|
"loss": 4.9748, |
|
"step": 120500 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 1.7231177697705293e-05, |
|
"loss": 4.9513, |
|
"step": 121000 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 1.7095716724011813e-05, |
|
"loss": 4.9561, |
|
"step": 121500 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.696052667226572e-05, |
|
"loss": 4.9517, |
|
"step": 122000 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.6825065698572243e-05, |
|
"loss": 4.9385, |
|
"step": 122500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.6689604724878763e-05, |
|
"loss": 4.9755, |
|
"step": 123000 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.6554143751185283e-05, |
|
"loss": 4.9507, |
|
"step": 123500 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 1.6418682777491807e-05, |
|
"loss": 4.9272, |
|
"step": 124000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 1.6283492725745712e-05, |
|
"loss": 4.936, |
|
"step": 124500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.6148031752052233e-05, |
|
"loss": 4.9498, |
|
"step": 125000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.6012570778358756e-05, |
|
"loss": 4.9539, |
|
"step": 125500 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 1.5877109804665276e-05, |
|
"loss": 4.962, |
|
"step": 126000 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 1.5741919752919185e-05, |
|
"loss": 4.9617, |
|
"step": 126500 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 1.560672970117309e-05, |
|
"loss": 4.9512, |
|
"step": 127000 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.547126872747961e-05, |
|
"loss": 4.9527, |
|
"step": 127500 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.5335807753786135e-05, |
|
"loss": 4.9305, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 1.5200346780092655e-05, |
|
"loss": 4.9259, |
|
"step": 128500 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 1.5064885806399175e-05, |
|
"loss": 4.9345, |
|
"step": 129000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.4929424832705699e-05, |
|
"loss": 4.9137, |
|
"step": 129500 |
|
}, |
|
{ |
|
"epoch": 3.52, |
|
"learning_rate": 1.4793963859012219e-05, |
|
"loss": 4.9374, |
|
"step": 130000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.4658502885318739e-05, |
|
"loss": 4.9258, |
|
"step": 130500 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 1.4523041911625263e-05, |
|
"loss": 4.9272, |
|
"step": 131000 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.4387580937931783e-05, |
|
"loss": 4.92, |
|
"step": 131500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.425239088618569e-05, |
|
"loss": 4.9077, |
|
"step": 132000 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 1.411692991249221e-05, |
|
"loss": 4.9334, |
|
"step": 132500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.3981468938798734e-05, |
|
"loss": 4.9178, |
|
"step": 133000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.3846007965105254e-05, |
|
"loss": 4.8956, |
|
"step": 133500 |
|
}, |
|
{ |
|
"epoch": 3.63, |
|
"learning_rate": 1.3710546991411774e-05, |
|
"loss": 4.9255, |
|
"step": 134000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.3575086017718298e-05, |
|
"loss": 4.9, |
|
"step": 134500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.3439625044024818e-05, |
|
"loss": 4.9317, |
|
"step": 135000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.3304164070331338e-05, |
|
"loss": 4.9136, |
|
"step": 135500 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 1.3168974018585246e-05, |
|
"loss": 4.8968, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.3033783966839155e-05, |
|
"loss": 4.8791, |
|
"step": 136500 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 1.2898322993145675e-05, |
|
"loss": 4.8937, |
|
"step": 137000 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.2762862019452195e-05, |
|
"loss": 4.8803, |
|
"step": 137500 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 1.2627401045758719e-05, |
|
"loss": 4.8928, |
|
"step": 138000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.2491940072065239e-05, |
|
"loss": 4.8757, |
|
"step": 138500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.2356750020319146e-05, |
|
"loss": 4.8957, |
|
"step": 139000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.2221289046625668e-05, |
|
"loss": 4.8828, |
|
"step": 139500 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 1.2085828072932188e-05, |
|
"loss": 4.8819, |
|
"step": 140000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 1.195036709923871e-05, |
|
"loss": 4.8843, |
|
"step": 140500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.181490612554523e-05, |
|
"loss": 4.8891, |
|
"step": 141000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 1.1679445151851752e-05, |
|
"loss": 4.8904, |
|
"step": 141500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.1543984178158274e-05, |
|
"loss": 4.8785, |
|
"step": 142000 |
|
}, |
|
{ |
|
"epoch": 3.86, |
|
"learning_rate": 1.1408523204464794e-05, |
|
"loss": 4.8821, |
|
"step": 142500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.1273604074666089e-05, |
|
"loss": 4.8758, |
|
"step": 143000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.1138143100972611e-05, |
|
"loss": 4.876, |
|
"step": 143500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.1002682127279133e-05, |
|
"loss": 4.8651, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 1.0867221153585653e-05, |
|
"loss": 4.8934, |
|
"step": 144500 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.0731760179892173e-05, |
|
"loss": 4.8698, |
|
"step": 145000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.0596299206198695e-05, |
|
"loss": 4.8707, |
|
"step": 145500 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 1.0461109154452602e-05, |
|
"loss": 4.8499, |
|
"step": 146000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 1.0325648180759123e-05, |
|
"loss": 4.8797, |
|
"step": 146500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.0190187207065644e-05, |
|
"loss": 4.8525, |
|
"step": 147000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.0054726233372166e-05, |
|
"loss": 4.8662, |
|
"step": 147500 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 9.919265259678686e-06, |
|
"loss": 4.8338, |
|
"step": 148000 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 9.784075207932596e-06, |
|
"loss": 4.8457, |
|
"step": 148500 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 9.648614234239116e-06, |
|
"loss": 4.8422, |
|
"step": 149000 |
|
}, |
|
{ |
|
"epoch": 4.05, |
|
"learning_rate": 9.513153260545638e-06, |
|
"loss": 4.8263, |
|
"step": 149500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 9.377692286852158e-06, |
|
"loss": 4.851, |
|
"step": 150000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 9.24223131315868e-06, |
|
"loss": 4.8555, |
|
"step": 150500 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 9.107041261412587e-06, |
|
"loss": 4.8442, |
|
"step": 151000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 8.971580287719109e-06, |
|
"loss": 4.8482, |
|
"step": 151500 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 8.83611931402563e-06, |
|
"loss": 4.8402, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 8.700929262279538e-06, |
|
"loss": 4.8554, |
|
"step": 152500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 8.56546828858606e-06, |
|
"loss": 4.8254, |
|
"step": 153000 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 8.43000731489258e-06, |
|
"loss": 4.8518, |
|
"step": 153500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 8.2945463411991e-06, |
|
"loss": 4.8341, |
|
"step": 154000 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 8.159085367505622e-06, |
|
"loss": 4.8232, |
|
"step": 154500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 8.023624393812142e-06, |
|
"loss": 4.8318, |
|
"step": 155000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.888163420118664e-06, |
|
"loss": 4.8271, |
|
"step": 155500 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 7.752702446425184e-06, |
|
"loss": 4.8277, |
|
"step": 156000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 7.617241472731706e-06, |
|
"loss": 4.8374, |
|
"step": 156500 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 7.481780499038226e-06, |
|
"loss": 4.8639, |
|
"step": 157000 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 7.346319525344748e-06, |
|
"loss": 4.867, |
|
"step": 157500 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 7.21085855165127e-06, |
|
"loss": 4.8409, |
|
"step": 158000 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 7.0756684999051776e-06, |
|
"loss": 4.8484, |
|
"step": 158500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 6.940207526211699e-06, |
|
"loss": 4.8246, |
|
"step": 159000 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 6.8047465525182196e-06, |
|
"loss": 4.838, |
|
"step": 159500 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 6.669285578824741e-06, |
|
"loss": 4.8192, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 6.533824605131262e-06, |
|
"loss": 4.8319, |
|
"step": 160500 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 6.39863455338517e-06, |
|
"loss": 4.8169, |
|
"step": 161000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 6.26317357969169e-06, |
|
"loss": 4.8317, |
|
"step": 161500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 6.127712605998212e-06, |
|
"loss": 4.836, |
|
"step": 162000 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 5.992251632304733e-06, |
|
"loss": 4.8141, |
|
"step": 162500 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 5.857061580558641e-06, |
|
"loss": 4.8147, |
|
"step": 163000 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 5.721600606865162e-06, |
|
"loss": 4.8071, |
|
"step": 163500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5.586139633171684e-06, |
|
"loss": 4.8112, |
|
"step": 164000 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"learning_rate": 5.450678659478205e-06, |
|
"loss": 4.8153, |
|
"step": 164500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 5.315488607732113e-06, |
|
"loss": 4.8249, |
|
"step": 165000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 5.180027634038634e-06, |
|
"loss": 4.8046, |
|
"step": 165500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5.044566660345155e-06, |
|
"loss": 4.8319, |
|
"step": 166000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.909105686651676e-06, |
|
"loss": 4.8174, |
|
"step": 166500 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 4.773644712958197e-06, |
|
"loss": 4.8236, |
|
"step": 167000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 4.6381837392647184e-06, |
|
"loss": 4.8157, |
|
"step": 167500 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 4.502993687518626e-06, |
|
"loss": 4.8317, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.367532713825148e-06, |
|
"loss": 4.7945, |
|
"step": 168500 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 4.232071740131669e-06, |
|
"loss": 4.8353, |
|
"step": 169000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 4.096610766438189e-06, |
|
"loss": 4.8057, |
|
"step": 169500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 3.96114979274471e-06, |
|
"loss": 4.8076, |
|
"step": 170000 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 3.825688819051231e-06, |
|
"loss": 4.7903, |
|
"step": 170500 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 3.6902278453577528e-06, |
|
"loss": 4.8109, |
|
"step": 171000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 3.5550377936116603e-06, |
|
"loss": 4.8055, |
|
"step": 171500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 3.419576819918182e-06, |
|
"loss": 4.8029, |
|
"step": 172000 |
|
}, |
|
{ |
|
"epoch": 4.67, |
|
"learning_rate": 3.2841158462247027e-06, |
|
"loss": 4.8136, |
|
"step": 172500 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 3.1486548725312237e-06, |
|
"loss": 4.8165, |
|
"step": 173000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 3.013193898837745e-06, |
|
"loss": 4.8255, |
|
"step": 173500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 2.877732925144266e-06, |
|
"loss": 4.7768, |
|
"step": 174000 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 2.7422719514507875e-06, |
|
"loss": 4.7727, |
|
"step": 174500 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.607081899704695e-06, |
|
"loss": 4.7951, |
|
"step": 175000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.471620926011216e-06, |
|
"loss": 4.7991, |
|
"step": 175500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.3361599523177375e-06, |
|
"loss": 4.8037, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 2.2006989786242585e-06, |
|
"loss": 4.7915, |
|
"step": 176500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.0652380049307795e-06, |
|
"loss": 4.7938, |
|
"step": 177000 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 1.9297770312373005e-06, |
|
"loss": 4.7778, |
|
"step": 177500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.7943160575438219e-06, |
|
"loss": 4.7994, |
|
"step": 178000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.6588550838503429e-06, |
|
"loss": 4.7939, |
|
"step": 178500 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.5236650321042508e-06, |
|
"loss": 4.7956, |
|
"step": 179000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.388474980358159e-06, |
|
"loss": 4.8032, |
|
"step": 179500 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 1.25301400666468e-06, |
|
"loss": 4.7941, |
|
"step": 180000 |
|
}, |
|
{ |
|
"epoch": 4.89, |
|
"learning_rate": 1.1175530329712012e-06, |
|
"loss": 4.7933, |
|
"step": 180500 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 9.820920592777222e-07, |
|
"loss": 4.7929, |
|
"step": 181000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 8.466310855842432e-07, |
|
"loss": 4.8078, |
|
"step": 181500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 7.111701118907644e-07, |
|
"loss": 4.7951, |
|
"step": 182000 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5.757091381972854e-07, |
|
"loss": 4.8039, |
|
"step": 182500 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.4024816450380646e-07, |
|
"loss": 4.7995, |
|
"step": 183000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 3.0505811275771453e-07, |
|
"loss": 4.8061, |
|
"step": 183500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.695971390642356e-07, |
|
"loss": 4.7984, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.413616537075669e-08, |
|
"loss": 4.7822, |
|
"step": 184500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 184555, |
|
"total_flos": 4.869967130385408e+17, |
|
"train_loss": 5.414583057241204, |
|
"train_runtime": 31930.2738, |
|
"train_samples_per_second": 57.799, |
|
"train_steps_per_second": 5.78 |
|
} |
|
], |
|
"max_steps": 184555, |
|
"num_train_epochs": 5, |
|
"total_flos": 4.869967130385408e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|