|
{ |
|
"best_metric": 1.1149991750717163, |
|
"best_model_checkpoint": "/kaggle/output/checkpoint-9000", |
|
"epoch": 208.0, |
|
"eval_steps": 1000, |
|
"global_step": 13000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.7777777777777777e-11, |
|
"loss": 1.2395, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 2.7638888888888893e-08, |
|
"loss": 1.1422, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.33073852295409184, |
|
"eval_loss": 1.1557353734970093, |
|
"eval_runtime": 54.1805, |
|
"eval_samples_per_second": 92.469, |
|
"eval_steps_per_second": 11.572, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 5.541666666666667e-08, |
|
"loss": 1.1401, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.3311377245508982, |
|
"eval_loss": 1.1507551670074463, |
|
"eval_runtime": 53.98, |
|
"eval_samples_per_second": 92.812, |
|
"eval_steps_per_second": 11.615, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 8.316666666666666e-08, |
|
"loss": 1.1287, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.3321357285429142, |
|
"eval_loss": 1.1441665887832642, |
|
"eval_runtime": 53.9873, |
|
"eval_samples_per_second": 92.8, |
|
"eval_steps_per_second": 11.614, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"learning_rate": 1.1094444444444445e-07, |
|
"loss": 1.117, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.3287425149700599, |
|
"eval_loss": 1.134506344795227, |
|
"eval_runtime": 54.0067, |
|
"eval_samples_per_second": 92.766, |
|
"eval_steps_per_second": 11.61, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 1.3872222222222222e-07, |
|
"loss": 1.1111, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.32954091816367265, |
|
"eval_loss": 1.1283142566680908, |
|
"eval_runtime": 54.0921, |
|
"eval_samples_per_second": 92.62, |
|
"eval_steps_per_second": 11.591, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"learning_rate": 1.6650000000000002e-07, |
|
"loss": 1.1053, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.32894211576846305, |
|
"eval_loss": 1.1218452453613281, |
|
"eval_runtime": 54.0607, |
|
"eval_samples_per_second": 92.674, |
|
"eval_steps_per_second": 11.598, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"learning_rate": 1.9422222222222223e-07, |
|
"loss": 1.0952, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.33073852295409184, |
|
"eval_loss": 1.118302583694458, |
|
"eval_runtime": 53.9252, |
|
"eval_samples_per_second": 92.906, |
|
"eval_steps_per_second": 11.627, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"learning_rate": 2.2197222222222225e-07, |
|
"loss": 1.0909, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.3281437125748503, |
|
"eval_loss": 1.1167163848876953, |
|
"eval_runtime": 54.095, |
|
"eval_samples_per_second": 92.615, |
|
"eval_steps_per_second": 11.591, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"learning_rate": 2.4975e-07, |
|
"loss": 1.0838, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.3317365269461078, |
|
"eval_loss": 1.1149991750717163, |
|
"eval_runtime": 54.0536, |
|
"eval_samples_per_second": 92.686, |
|
"eval_steps_per_second": 11.6, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.775277777777778e-07, |
|
"loss": 1.0746, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.3347305389221557, |
|
"eval_loss": 1.1178398132324219, |
|
"eval_runtime": 54.1273, |
|
"eval_samples_per_second": 92.56, |
|
"eval_steps_per_second": 11.584, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"learning_rate": 3.0530555555555556e-07, |
|
"loss": 1.0722, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.3373253493013972, |
|
"eval_loss": 1.1214321851730347, |
|
"eval_runtime": 54.046, |
|
"eval_samples_per_second": 92.699, |
|
"eval_steps_per_second": 11.601, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"learning_rate": 3.3308333333333333e-07, |
|
"loss": 1.0617, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.34271457085828344, |
|
"eval_loss": 1.124221682548523, |
|
"eval_runtime": 54.0489, |
|
"eval_samples_per_second": 92.694, |
|
"eval_steps_per_second": 11.601, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"learning_rate": 3.608611111111111e-07, |
|
"loss": 1.0509, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 208.0, |
|
"eval_accuracy": 0.3475049900199601, |
|
"eval_loss": 1.1377869844436646, |
|
"eval_runtime": 54.0137, |
|
"eval_samples_per_second": 92.754, |
|
"eval_steps_per_second": 11.608, |
|
"step": 13000 |
|
} |
|
], |
|
"logging_steps": 1000, |
|
"max_steps": 10000000, |
|
"num_train_epochs": 161291, |
|
"save_steps": 1000, |
|
"total_flos": 2.7174616694784e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|