|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"global_step": 76416, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9803706030150755e-05, |
|
"loss": 1.2294, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.960741206030151e-05, |
|
"loss": 0.9625, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.9411118090452263e-05, |
|
"loss": 0.9145, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.9214824120603013e-05, |
|
"loss": 0.8595, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.901853015075377e-05, |
|
"loss": 0.8622, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.8822236180904525e-05, |
|
"loss": 0.8504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8625942211055275e-05, |
|
"loss": 0.8308, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.842964824120603e-05, |
|
"loss": 0.8292, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.8233354271356787e-05, |
|
"loss": 0.8085, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.8037060301507537e-05, |
|
"loss": 0.7914, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.784076633165829e-05, |
|
"loss": 0.8014, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.7644472361809045e-05, |
|
"loss": 0.7876, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.74481783919598e-05, |
|
"loss": 0.7936, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.7251884422110553e-05, |
|
"loss": 0.7975, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.7055590452261307e-05, |
|
"loss": 0.7748, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 2.685929648241206e-05, |
|
"loss": 0.7804, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.6663002512562815e-05, |
|
"loss": 0.773, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.646670854271357e-05, |
|
"loss": 0.7652, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.6270414572864323e-05, |
|
"loss": 0.7727, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8350390195846558, |
|
"eval_runtime": 8.4552, |
|
"eval_samples_per_second": 1312.323, |
|
"eval_steps_per_second": 41.04, |
|
"step": 9552 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.6074120603015074e-05, |
|
"loss": 0.7397, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.587782663316583e-05, |
|
"loss": 0.7335, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.5681532663316585e-05, |
|
"loss": 0.7095, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.5485238693467336e-05, |
|
"loss": 0.7407, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.528894472361809e-05, |
|
"loss": 0.7235, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.5092650753768844e-05, |
|
"loss": 0.709, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 2.4896356783919598e-05, |
|
"loss": 0.7161, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 2.4700062814070352e-05, |
|
"loss": 0.7278, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 2.4503768844221106e-05, |
|
"loss": 0.74, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 2.430747487437186e-05, |
|
"loss": 0.7323, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.4111180904522614e-05, |
|
"loss": 0.7298, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.3914886934673368e-05, |
|
"loss": 0.7394, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 2.3718592964824122e-05, |
|
"loss": 0.7393, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.3522298994974873e-05, |
|
"loss": 0.715, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.332600502512563e-05, |
|
"loss": 0.7139, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 2.3129711055276384e-05, |
|
"loss": 0.7215, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 2.2933417085427135e-05, |
|
"loss": 0.7122, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.273712311557789e-05, |
|
"loss": 0.7119, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 2.2540829145728646e-05, |
|
"loss": 0.7113, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.8123675584793091, |
|
"eval_runtime": 8.2517, |
|
"eval_samples_per_second": 1344.695, |
|
"eval_steps_per_second": 42.052, |
|
"step": 19104 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 2.2344535175879397e-05, |
|
"loss": 0.6898, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 2.214824120603015e-05, |
|
"loss": 0.6819, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 2.1951947236180905e-05, |
|
"loss": 0.6797, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 2.175565326633166e-05, |
|
"loss": 0.6808, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 2.1559359296482413e-05, |
|
"loss": 0.6949, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.1363065326633167e-05, |
|
"loss": 0.689, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.116677135678392e-05, |
|
"loss": 0.6993, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.0970477386934675e-05, |
|
"loss": 0.6795, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 2.077418341708543e-05, |
|
"loss": 0.6972, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 2.0577889447236183e-05, |
|
"loss": 0.6831, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 2.0381595477386933e-05, |
|
"loss": 0.678, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.0185301507537687e-05, |
|
"loss": 0.6871, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 1.9989007537688445e-05, |
|
"loss": 0.6793, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.9792713567839195e-05, |
|
"loss": 0.6808, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.959641959798995e-05, |
|
"loss": 0.6894, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.9400125628140703e-05, |
|
"loss": 0.6876, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.9203831658291457e-05, |
|
"loss": 0.6919, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.900753768844221e-05, |
|
"loss": 0.6856, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 1.8811243718592965e-05, |
|
"loss": 0.6708, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.7998350262641907, |
|
"eval_runtime": 8.2332, |
|
"eval_samples_per_second": 1347.713, |
|
"eval_steps_per_second": 42.146, |
|
"step": 28656 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.861494974874372e-05, |
|
"loss": 0.6758, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.8418655778894473e-05, |
|
"loss": 0.6638, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 1.8222361809045227e-05, |
|
"loss": 0.654, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.802606783919598e-05, |
|
"loss": 0.6792, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.7829773869346732e-05, |
|
"loss": 0.6999, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.763347989949749e-05, |
|
"loss": 0.6834, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 1.7437185929648243e-05, |
|
"loss": 0.647, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 1.7240891959798994e-05, |
|
"loss": 0.662, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 1.7044597989949748e-05, |
|
"loss": 0.6674, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 1.6848304020100505e-05, |
|
"loss": 0.6409, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.6652010050251256e-05, |
|
"loss": 0.6766, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.645571608040201e-05, |
|
"loss": 0.6658, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.6259422110552764e-05, |
|
"loss": 0.6765, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 1.6063128140703518e-05, |
|
"loss": 0.6478, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.5866834170854272e-05, |
|
"loss": 0.6604, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 1.5670540201005026e-05, |
|
"loss": 0.6398, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.547424623115578e-05, |
|
"loss": 0.6554, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.527795226130653e-05, |
|
"loss": 0.6442, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.5081658291457286e-05, |
|
"loss": 0.6552, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.793069064617157, |
|
"eval_runtime": 8.0925, |
|
"eval_samples_per_second": 1371.141, |
|
"eval_steps_per_second": 42.879, |
|
"step": 38208 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.488536432160804e-05, |
|
"loss": 0.6526, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.4689070351758794e-05, |
|
"loss": 0.656, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 1.4492776381909548e-05, |
|
"loss": 0.6499, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 1.4296482412060302e-05, |
|
"loss": 0.6319, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.4100188442211055e-05, |
|
"loss": 0.6511, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.390389447236181e-05, |
|
"loss": 0.6461, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 1.3707600502512563e-05, |
|
"loss": 0.6472, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 1.3511306532663317e-05, |
|
"loss": 0.6334, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 1.331501256281407e-05, |
|
"loss": 0.6302, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.3118718592964825e-05, |
|
"loss": 0.6584, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"learning_rate": 1.2922424623115579e-05, |
|
"loss": 0.635, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"learning_rate": 1.2726130653266331e-05, |
|
"loss": 0.6305, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 1.2529836683417085e-05, |
|
"loss": 0.6448, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 1.233354271356784e-05, |
|
"loss": 0.6546, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 1.2137248743718593e-05, |
|
"loss": 0.6426, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.1940954773869347e-05, |
|
"loss": 0.6405, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 1.1744660804020101e-05, |
|
"loss": 0.6207, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 1.1548366834170855e-05, |
|
"loss": 0.6319, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 1.1352072864321609e-05, |
|
"loss": 0.6419, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.7893060445785522, |
|
"eval_runtime": 8.4012, |
|
"eval_samples_per_second": 1320.763, |
|
"eval_steps_per_second": 41.304, |
|
"step": 47760 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 1.1155778894472361e-05, |
|
"loss": 0.6382, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.0959484924623115e-05, |
|
"loss": 0.6264, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 1.076319095477387e-05, |
|
"loss": 0.627, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 1.0566896984924623e-05, |
|
"loss": 0.6284, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 1.0370603015075377e-05, |
|
"loss": 0.6256, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 1.0174309045226131e-05, |
|
"loss": 0.6423, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 9.978015075376884e-06, |
|
"loss": 0.634, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 9.78172110552764e-06, |
|
"loss": 0.6393, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 9.585427135678392e-06, |
|
"loss": 0.6259, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 9.389133165829146e-06, |
|
"loss": 0.628, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 9.1928391959799e-06, |
|
"loss": 0.6337, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.996545226130654e-06, |
|
"loss": 0.6128, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 8.800251256281408e-06, |
|
"loss": 0.6276, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 8.60395728643216e-06, |
|
"loss": 0.629, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 8.407663316582914e-06, |
|
"loss": 0.6261, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 8.21136934673367e-06, |
|
"loss": 0.6137, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 8.015075376884422e-06, |
|
"loss": 0.6353, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 7.818781407035176e-06, |
|
"loss": 0.6239, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 7.622487437185929e-06, |
|
"loss": 0.618, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.7882150411605835, |
|
"eval_runtime": 8.1734, |
|
"eval_samples_per_second": 1357.582, |
|
"eval_steps_per_second": 42.455, |
|
"step": 57312 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 7.426193467336683e-06, |
|
"loss": 0.6427, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 7.229899497487438e-06, |
|
"loss": 0.6283, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 7.033605527638191e-06, |
|
"loss": 0.6277, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 6.837311557788945e-06, |
|
"loss": 0.6131, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"learning_rate": 6.641017587939698e-06, |
|
"loss": 0.6086, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"learning_rate": 6.444723618090452e-06, |
|
"loss": 0.6154, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 6.248429648241206e-06, |
|
"loss": 0.6184, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 6.0521356783919595e-06, |
|
"loss": 0.6139, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 5.8558417085427135e-06, |
|
"loss": 0.6148, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 5.6595477386934675e-06, |
|
"loss": 0.6256, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 5.4632537688442215e-06, |
|
"loss": 0.6135, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 5.266959798994975e-06, |
|
"loss": 0.6274, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 5.070665829145729e-06, |
|
"loss": 0.623, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 4.874371859296483e-06, |
|
"loss": 0.6254, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 6.75, |
|
"learning_rate": 4.678077889447237e-06, |
|
"loss": 0.6233, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 4.48178391959799e-06, |
|
"loss": 0.6124, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 4.285489949748744e-06, |
|
"loss": 0.6108, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 4.089195979899497e-06, |
|
"loss": 0.6182, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 6.96, |
|
"learning_rate": 3.892902010050252e-06, |
|
"loss": 0.5991, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 0.7896326184272766, |
|
"eval_runtime": 8.35, |
|
"eval_samples_per_second": 1328.855, |
|
"eval_steps_per_second": 41.557, |
|
"step": 66864 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.696608040201005e-06, |
|
"loss": 0.6139, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.5003140703517586e-06, |
|
"loss": 0.6092, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 3.3040201005025126e-06, |
|
"loss": 0.6021, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"learning_rate": 3.107726130653266e-06, |
|
"loss": 0.6138, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 2.91143216080402e-06, |
|
"loss": 0.6279, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 2.7151381909547737e-06, |
|
"loss": 0.6086, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 2.5188442211055277e-06, |
|
"loss": 0.6108, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 2.3225502512562813e-06, |
|
"loss": 0.5933, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 7.43, |
|
"learning_rate": 2.1262562814070353e-06, |
|
"loss": 0.6089, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 1.929962311557789e-06, |
|
"loss": 0.6086, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 1.7336683417085427e-06, |
|
"loss": 0.622, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 7.59, |
|
"learning_rate": 1.5373743718592965e-06, |
|
"loss": 0.6041, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.3410804020100503e-06, |
|
"loss": 0.6146, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 1.144786432160804e-06, |
|
"loss": 0.5999, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 9.484924623115579e-07, |
|
"loss": 0.6094, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 7.8, |
|
"learning_rate": 7.521984924623115e-07, |
|
"loss": 0.6137, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 5.559045226130653e-07, |
|
"loss": 0.6065, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 3.5961055276381907e-07, |
|
"loss": 0.6149, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.6331658291457286e-07, |
|
"loss": 0.6159, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.7893399000167847, |
|
"eval_runtime": 8.9446, |
|
"eval_samples_per_second": 1240.522, |
|
"eval_steps_per_second": 38.794, |
|
"step": 76416 |
|
} |
|
], |
|
"max_steps": 76416, |
|
"num_train_epochs": 8, |
|
"total_flos": 2.326706308251648e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|