|
{ |
|
"best_metric": 1.3161581754684448, |
|
"best_model_checkpoint": "./outputs/checkpoint-4100", |
|
"epoch": 2.9879781420765026, |
|
"eval_steps": 100, |
|
"global_step": 4100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.3134, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.1594152450561523, |
|
"eval_runtime": 144.083, |
|
"eval_samples_per_second": 43.544, |
|
"eval_steps_per_second": 5.448, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1084, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 2.06351900100708, |
|
"eval_runtime": 143.9954, |
|
"eval_samples_per_second": 43.571, |
|
"eval_steps_per_second": 5.452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 2.0343, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 2.0027661323547363, |
|
"eval_runtime": 144.0229, |
|
"eval_samples_per_second": 43.563, |
|
"eval_steps_per_second": 5.451, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9806, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.9533618688583374, |
|
"eval_runtime": 143.9496, |
|
"eval_samples_per_second": 43.585, |
|
"eval_steps_per_second": 5.453, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.926, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.9100102186203003, |
|
"eval_runtime": 144.0297, |
|
"eval_samples_per_second": 43.56, |
|
"eval_steps_per_second": 5.45, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8969, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.8807153701782227, |
|
"eval_runtime": 144.1421, |
|
"eval_samples_per_second": 43.526, |
|
"eval_steps_per_second": 5.446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8568, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.8459755182266235, |
|
"eval_runtime": 144.0267, |
|
"eval_samples_per_second": 43.561, |
|
"eval_steps_per_second": 5.45, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8329, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.8178023099899292, |
|
"eval_runtime": 143.9854, |
|
"eval_samples_per_second": 43.574, |
|
"eval_steps_per_second": 5.452, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7961, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.7928833961486816, |
|
"eval_runtime": 144.015, |
|
"eval_samples_per_second": 43.565, |
|
"eval_steps_per_second": 5.451, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7798, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.7671397924423218, |
|
"eval_runtime": 144.0009, |
|
"eval_samples_per_second": 43.569, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7596, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.7436497211456299, |
|
"eval_runtime": 144.11, |
|
"eval_samples_per_second": 43.536, |
|
"eval_steps_per_second": 5.447, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7215, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.7219586372375488, |
|
"eval_runtime": 144.0047, |
|
"eval_samples_per_second": 43.568, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7295, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.700698733329773, |
|
"eval_runtime": 143.9971, |
|
"eval_samples_per_second": 43.57, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6805, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.679167628288269, |
|
"eval_runtime": 144.0847, |
|
"eval_samples_per_second": 43.544, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6505, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.6613633632659912, |
|
"eval_runtime": 144.175, |
|
"eval_samples_per_second": 43.517, |
|
"eval_steps_per_second": 5.445, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6327, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.6426126956939697, |
|
"eval_runtime": 143.9637, |
|
"eval_samples_per_second": 43.58, |
|
"eval_steps_per_second": 5.453, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.629, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.6252305507659912, |
|
"eval_runtime": 144.0148, |
|
"eval_samples_per_second": 43.565, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5993, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.6091866493225098, |
|
"eval_runtime": 144.0349, |
|
"eval_samples_per_second": 43.559, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5907, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.5939242839813232, |
|
"eval_runtime": 151.8105, |
|
"eval_samples_per_second": 41.328, |
|
"eval_steps_per_second": 5.171, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5726, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.578723669052124, |
|
"eval_runtime": 144.0665, |
|
"eval_samples_per_second": 43.549, |
|
"eval_steps_per_second": 5.449, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5572, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.562243938446045, |
|
"eval_runtime": 144.0023, |
|
"eval_samples_per_second": 43.569, |
|
"eval_steps_per_second": 5.451, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5788, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.5802664756774902, |
|
"eval_runtime": 133.2244, |
|
"eval_samples_per_second": 47.093, |
|
"eval_steps_per_second": 5.892, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5406, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.5586791038513184, |
|
"eval_runtime": 133.3887, |
|
"eval_samples_per_second": 47.035, |
|
"eval_steps_per_second": 5.885, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5397, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.5387272834777832, |
|
"eval_runtime": 133.2598, |
|
"eval_samples_per_second": 47.081, |
|
"eval_steps_per_second": 5.891, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5072, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.5237348079681396, |
|
"eval_runtime": 133.2857, |
|
"eval_samples_per_second": 47.072, |
|
"eval_steps_per_second": 5.89, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4925, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.5068706274032593, |
|
"eval_runtime": 133.2428, |
|
"eval_samples_per_second": 47.087, |
|
"eval_steps_per_second": 5.891, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4832, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.492112636566162, |
|
"eval_runtime": 133.2152, |
|
"eval_samples_per_second": 47.097, |
|
"eval_steps_per_second": 5.893, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4506, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.4801561832427979, |
|
"eval_runtime": 133.2908, |
|
"eval_samples_per_second": 47.07, |
|
"eval_steps_per_second": 5.889, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4311, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.4649614095687866, |
|
"eval_runtime": 133.232, |
|
"eval_samples_per_second": 47.091, |
|
"eval_steps_per_second": 5.892, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4268, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.4508098363876343, |
|
"eval_runtime": 133.3471, |
|
"eval_samples_per_second": 47.05, |
|
"eval_steps_per_second": 5.887, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4142, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.4384832382202148, |
|
"eval_runtime": 133.1612, |
|
"eval_samples_per_second": 47.116, |
|
"eval_steps_per_second": 5.895, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4131, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.4258862733840942, |
|
"eval_runtime": 133.2271, |
|
"eval_samples_per_second": 47.093, |
|
"eval_steps_per_second": 5.892, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3948, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.4105219841003418, |
|
"eval_runtime": 133.2146, |
|
"eval_samples_per_second": 47.097, |
|
"eval_steps_per_second": 5.893, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3738, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.3999289274215698, |
|
"eval_runtime": 133.2454, |
|
"eval_samples_per_second": 47.086, |
|
"eval_steps_per_second": 5.891, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.373, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.388107180595398, |
|
"eval_runtime": 133.2079, |
|
"eval_samples_per_second": 47.099, |
|
"eval_steps_per_second": 5.893, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3325, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.3749202489852905, |
|
"eval_runtime": 132.999, |
|
"eval_samples_per_second": 47.173, |
|
"eval_steps_per_second": 5.902, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.332, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.3657797574996948, |
|
"eval_runtime": 133.0705, |
|
"eval_samples_per_second": 47.148, |
|
"eval_steps_per_second": 5.899, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3103, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.3516861200332642, |
|
"eval_runtime": 132.9998, |
|
"eval_samples_per_second": 47.173, |
|
"eval_steps_per_second": 5.902, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3085, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.340726613998413, |
|
"eval_runtime": 132.9904, |
|
"eval_samples_per_second": 47.176, |
|
"eval_steps_per_second": 5.903, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2963, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_loss": 1.3258968591690063, |
|
"eval_runtime": 132.9668, |
|
"eval_samples_per_second": 47.185, |
|
"eval_steps_per_second": 5.904, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2867, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_loss": 1.3161581754684448, |
|
"eval_runtime": 132.905, |
|
"eval_samples_per_second": 47.207, |
|
"eval_steps_per_second": 5.906, |
|
"step": 4100 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2.4411263908542874e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|