|
{ |
|
"best_metric": 0.73, |
|
"best_model_checkpoint": "distilhubert-finetuned-gtzan/checkpoint-25", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002, |
|
"loss": 0.0001, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0004, |
|
"loss": 0.0427, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0006, |
|
"loss": 0.2245, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0008, |
|
"loss": 0.468, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.001, |
|
"loss": 0.8622, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 2.1819140911102295, |
|
"eval_runtime": 144.9991, |
|
"eval_samples_per_second": 1.379, |
|
"eval_steps_per_second": 0.172, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0009777777777777777, |
|
"loss": 1.6806, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0009555555555555556, |
|
"loss": 2.4373, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 2.3049, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0009111111111111111, |
|
"loss": 1.55, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0008888888888888888, |
|
"loss": 1.2651, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.615, |
|
"eval_loss": 1.3465973138809204, |
|
"eval_runtime": 143.8182, |
|
"eval_samples_per_second": 1.391, |
|
"eval_steps_per_second": 0.174, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 0.7705, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0008444444444444444, |
|
"loss": 0.6799, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0008222222222222222, |
|
"loss": 0.7068, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0008, |
|
"loss": 0.934, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.0007777777777777778, |
|
"loss": 0.7861, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.605, |
|
"eval_loss": 1.3502554893493652, |
|
"eval_runtime": 149.3628, |
|
"eval_samples_per_second": 1.339, |
|
"eval_steps_per_second": 0.167, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 0.0007555555555555555, |
|
"loss": 0.8192, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.6076, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 0.0007111111111111111, |
|
"loss": 0.7342, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 0.000688888888888889, |
|
"loss": 0.8138, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0006666666666666666, |
|
"loss": 0.7081, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.725, |
|
"eval_loss": 0.7991036176681519, |
|
"eval_runtime": 146.6273, |
|
"eval_samples_per_second": 1.364, |
|
"eval_steps_per_second": 0.171, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0007, |
|
"loss": 0.6077, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0007333333333333333, |
|
"loss": 0.2685, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0007666666666666667, |
|
"loss": 1.048, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0008, |
|
"loss": 1.1636, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.0008333333333333334, |
|
"loss": 1.0052, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.0008666666666666667, |
|
"loss": 2.0122, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0009000000000000001, |
|
"loss": 1.6791, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0009333333333333333, |
|
"loss": 1.3824, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0009666666666666667, |
|
"loss": 1.0703, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.001, |
|
"loss": 1.4339, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0009962962962962963, |
|
"loss": 1.5909, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0009925925925925927, |
|
"loss": 2.139, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.000988888888888889, |
|
"loss": 1.8476, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.000985185185185185, |
|
"loss": 2.1354, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0009814814814814816, |
|
"loss": 2.0248, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0009777777777777777, |
|
"loss": 2.1732, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0009740740740740741, |
|
"loss": 1.9887, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0009703703703703704, |
|
"loss": 2.0138, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0009666666666666667, |
|
"loss": 1.9447, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0009629629629629629, |
|
"loss": 1.9785, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.33, |
|
"eval_loss": 2.211935520172119, |
|
"eval_runtime": 145.0026, |
|
"eval_samples_per_second": 1.379, |
|
"eval_steps_per_second": 0.172, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1500, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"total_flos": 2.725837948512e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|