|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0, |
|
"global_step": 35000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9600000000000002e-05, |
|
"loss": 1.5146, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.9200000000000003e-05, |
|
"loss": 1.1216, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.88e-05, |
|
"loss": 1.0414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.8400000000000003e-05, |
|
"loss": 1.0385, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.8e-05, |
|
"loss": 1.0463, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.76e-05, |
|
"loss": 1.0464, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.72e-05, |
|
"loss": 1.0343, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1.6800000000000002e-05, |
|
"loss": 1.0375, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.64e-05, |
|
"loss": 1.0389, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 1.0303, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.9517383575439453, |
|
"eval_runtime": 520.9725, |
|
"eval_samples_per_second": 19.195, |
|
"eval_steps_per_second": 2.399, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 1.5600000000000003e-05, |
|
"loss": 1.0216, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.5200000000000002e-05, |
|
"loss": 1.0131, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.48e-05, |
|
"loss": 1.0193, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.4400000000000001e-05, |
|
"loss": 1.0079, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.4e-05, |
|
"loss": 1.0117, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.3600000000000002e-05, |
|
"loss": 1.0169, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 1.3200000000000002e-05, |
|
"loss": 1.0274, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.2800000000000001e-05, |
|
"loss": 1.0096, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2400000000000002e-05, |
|
"loss": 1.009, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.01, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.9439992308616638, |
|
"eval_runtime": 529.1205, |
|
"eval_samples_per_second": 18.899, |
|
"eval_steps_per_second": 2.362, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.16e-05, |
|
"loss": 0.9979, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.1200000000000001e-05, |
|
"loss": 0.9961, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.0800000000000002e-05, |
|
"loss": 0.9941, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.04e-05, |
|
"loss": 0.9823, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9886, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.9784, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.9931, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.9944, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.9873, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.9898, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.9317747950553894, |
|
"eval_runtime": 529.6801, |
|
"eval_samples_per_second": 18.879, |
|
"eval_steps_per_second": 2.36, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.9791, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.9821, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.9809, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.9701, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9783, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 5.600000000000001e-06, |
|
"loss": 0.9763, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 5.2e-06, |
|
"loss": 0.9631, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.973, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.9688, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.9788, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.9208911061286926, |
|
"eval_runtime": 528.6017, |
|
"eval_samples_per_second": 18.918, |
|
"eval_steps_per_second": 2.365, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.9695, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.9725, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.9606, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.9611, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.9511, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 0.9571, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 0.9661, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 0.9566, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 0.9647, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.9692, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 9.800000000000001e-06, |
|
"loss": 0.9756, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 9.600000000000001e-06, |
|
"loss": 0.9775, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 9.4e-06, |
|
"loss": 0.9843, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 9.200000000000002e-06, |
|
"loss": 0.9883, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 9e-06, |
|
"loss": 0.976, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 8.8e-06, |
|
"loss": 0.98, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 8.6e-06, |
|
"loss": 0.9807, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 8.400000000000001e-06, |
|
"loss": 0.985, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 5.9, |
|
"learning_rate": 8.2e-06, |
|
"loss": 0.9655, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.974, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.8957818150520325, |
|
"eval_runtime": 248.7642, |
|
"eval_samples_per_second": 40.199, |
|
"eval_steps_per_second": 5.025, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.800000000000002e-06, |
|
"loss": 0.976, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.600000000000001e-06, |
|
"loss": 0.9739, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"learning_rate": 7.4e-06, |
|
"loss": 0.9668, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 7.2000000000000005e-06, |
|
"loss": 0.9691, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 7e-06, |
|
"loss": 0.9734, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.800000000000001e-06, |
|
"loss": 0.9722, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 6.600000000000001e-06, |
|
"loss": 0.9738, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 6.4000000000000006e-06, |
|
"loss": 0.9701, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 6.200000000000001e-06, |
|
"loss": 0.9596, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9628, |
|
"step": 35000 |
|
} |
|
], |
|
"max_steps": 50000, |
|
"num_train_epochs": 10, |
|
"total_flos": 7.371432972288e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|