|
{ |
|
"best_metric": 3.715158224105835, |
|
"best_model_checkpoint": "runs/checkpoint-1000", |
|
"epoch": 4.587155963302752, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 6.9799, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 6.7268, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2e-05, |
|
"loss": 6.4767, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 6.3317, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 6.2151, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.010721727291912753, |
|
"eval_loss": 6.165957927703857, |
|
"eval_runtime": 954.4032, |
|
"eval_samples_per_second": 3.09, |
|
"eval_steps_per_second": 0.065, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4e-05, |
|
"loss": 6.13, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 6.0514, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 5.9384, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 6e-05, |
|
"loss": 5.679, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 5.2673, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.0003641545871791163, |
|
"eval_loss": 4.998520851135254, |
|
"eval_runtime": 3395.6647, |
|
"eval_samples_per_second": 0.868, |
|
"eval_steps_per_second": 0.018, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 4.8258, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 8e-05, |
|
"loss": 4.6404, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 4.5153, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 4.4108, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001, |
|
"loss": 4.3584, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 2.3712391723291295e-06, |
|
"eval_loss": 4.306852340698242, |
|
"eval_runtime": 3914.0936, |
|
"eval_samples_per_second": 0.753, |
|
"eval_steps_per_second": 0.016, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 9.979871469976196e-05, |
|
"loss": 4.2798, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.919647942993148e-05, |
|
"loss": 4.2084, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 9.819814303479267e-05, |
|
"loss": 4.173, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 9.681174353198687e-05, |
|
"loss": 4.1324, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 9.504844339512095e-05, |
|
"loss": 4.0826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 8.299337103151952e-05, |
|
"eval_loss": 4.064357280731201, |
|
"eval_runtime": 625.6747, |
|
"eval_samples_per_second": 4.713, |
|
"eval_steps_per_second": 0.099, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 9.292243968009331e-05, |
|
"loss": 4.0714, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.045084971874738e-05, |
|
"loss": 4.0267, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.765357330018056e-05, |
|
"loss": 3.9907, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 8.455313244934324e-05, |
|
"loss": 3.9733, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 8.117449009293668e-05, |
|
"loss": 3.9506, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_accuracy": 0.000247963867734989, |
|
"eval_loss": 3.9375269412994385, |
|
"eval_runtime": 646.8101, |
|
"eval_samples_per_second": 4.559, |
|
"eval_steps_per_second": 0.096, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 7.754484907260513e-05, |
|
"loss": 3.9255, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 7.369343312364993e-05, |
|
"loss": 3.9064, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 6.965125158269619e-05, |
|
"loss": 3.8806, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 6.545084971874738e-05, |
|
"loss": 3.89, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 6.112604669781572e-05, |
|
"loss": 3.8576, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.0002405114017648117, |
|
"eval_loss": 3.8471720218658447, |
|
"eval_runtime": 641.884, |
|
"eval_samples_per_second": 4.594, |
|
"eval_steps_per_second": 0.097, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 5.6711663290882776e-05, |
|
"loss": 3.8314, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5.2243241517525754e-05, |
|
"loss": 3.8356, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 4.775675848247427e-05, |
|
"loss": 3.8056, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 4.328833670911724e-05, |
|
"loss": 3.7761, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 3.887395330218429e-05, |
|
"loss": 3.7937, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.00025236759762645736, |
|
"eval_loss": 3.7817437648773193, |
|
"eval_runtime": 1546.8275, |
|
"eval_samples_per_second": 1.906, |
|
"eval_steps_per_second": 0.04, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"loss": 3.7696, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 3.0348748417303823e-05, |
|
"loss": 3.7522, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 2.630656687635007e-05, |
|
"loss": 3.743, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 2.245515092739488e-05, |
|
"loss": 3.7389, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"learning_rate": 1.8825509907063327e-05, |
|
"loss": 3.7287, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_accuracy": 0.00031435856456020456, |
|
"eval_loss": 3.7385406494140625, |
|
"eval_runtime": 664.5264, |
|
"eval_samples_per_second": 4.438, |
|
"eval_steps_per_second": 0.093, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 1.544686755065677e-05, |
|
"loss": 3.7229, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.2346426699819458e-05, |
|
"loss": 3.7138, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 9.549150281252633e-06, |
|
"loss": 3.7314, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 7.077560319906695e-06, |
|
"loss": 3.7059, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 4.951556604879048e-06, |
|
"loss": 3.7205, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_accuracy": 0.0003251985150622806, |
|
"eval_loss": 3.7189857959747314, |
|
"eval_runtime": 713.2391, |
|
"eval_samples_per_second": 4.135, |
|
"eval_steps_per_second": 0.087, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 3.18825646801314e-06, |
|
"loss": 3.71, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.8018569652073381e-06, |
|
"loss": 3.6848, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 8.035205700685167e-07, |
|
"loss": 3.6929, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 2.012853002380466e-07, |
|
"loss": 3.7022, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 0.0, |
|
"loss": 3.6858, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"eval_accuracy": 0.00034552342225367314, |
|
"eval_loss": 3.715158224105835, |
|
"eval_runtime": 606.7664, |
|
"eval_samples_per_second": 4.86, |
|
"eval_steps_per_second": 0.102, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"step": 1000, |
|
"total_flos": 9439661425655808.0, |
|
"train_loss": 4.394818229675293, |
|
"train_runtime": 143576.7666, |
|
"train_samples_per_second": 0.334, |
|
"train_steps_per_second": 0.007 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 1000, |
|
"total_flos": 9439661425655808.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|