|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 75, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1e-06, |
|
"loss": 2.718, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5852, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.26974484268246846, |
|
"eval_loss": 2.595703125, |
|
"eval_runtime": 73.3015, |
|
"eval_samples_per_second": 4.966, |
|
"eval_steps_per_second": 0.628, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5956, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.27058504875406286, |
|
"eval_loss": 2.576171875, |
|
"eval_runtime": 73.9801, |
|
"eval_samples_per_second": 4.92, |
|
"eval_steps_per_second": 0.622, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5961, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.27107148384814383, |
|
"eval_loss": 2.5546875, |
|
"eval_runtime": 73.8954, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 0.623, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5731, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.27218807349592056, |
|
"eval_loss": 2.53125, |
|
"eval_runtime": 73.9168, |
|
"eval_samples_per_second": 4.924, |
|
"eval_steps_per_second": 0.622, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5415, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.2733599398589338, |
|
"eval_loss": 2.51171875, |
|
"eval_runtime": 73.9155, |
|
"eval_samples_per_second": 4.925, |
|
"eval_steps_per_second": 0.622, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5168, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.2745704999226126, |
|
"eval_loss": 2.49609375, |
|
"eval_runtime": 73.8737, |
|
"eval_samples_per_second": 4.927, |
|
"eval_steps_per_second": 0.623, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4972, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.2755544254538218, |
|
"eval_loss": 2.482421875, |
|
"eval_runtime": 73.9197, |
|
"eval_samples_per_second": 4.924, |
|
"eval_steps_per_second": 0.622, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4354, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.27611824794923384, |
|
"eval_loss": 2.47265625, |
|
"eval_runtime": 74.1261, |
|
"eval_samples_per_second": 4.911, |
|
"eval_steps_per_second": 0.621, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4055, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.2768423729188317, |
|
"eval_loss": 2.4609375, |
|
"eval_runtime": 73.8501, |
|
"eval_samples_per_second": 4.929, |
|
"eval_steps_per_second": 0.623, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4681, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.27778207707785174, |
|
"eval_loss": 2.44921875, |
|
"eval_runtime": 73.8512, |
|
"eval_samples_per_second": 4.929, |
|
"eval_steps_per_second": 0.623, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1e-06, |
|
"loss": 2.5866, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.27841223163154755, |
|
"eval_loss": 2.435546875, |
|
"eval_runtime": 73.8943, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 0.623, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1e-06, |
|
"loss": 2.4221, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.27929113140380746, |
|
"eval_loss": 2.423828125, |
|
"eval_runtime": 73.8857, |
|
"eval_samples_per_second": 4.927, |
|
"eval_steps_per_second": 0.623, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 75, |
|
"total_flos": 4974649540608.0, |
|
"train_loss": 2.5182421875, |
|
"train_runtime": 15501.0618, |
|
"train_samples_per_second": 0.039, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"max_steps": 75, |
|
"num_train_epochs": 1, |
|
"total_flos": 4974649540608.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|