|
{ |
|
"best_metric": 0.31465813517570496, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_large/epochs_2_bs_32_lr_5e-6/checkpoint-2700", |
|
"epoch": 2.0, |
|
"global_step": 3088, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.7465000152587891, |
|
"eval_loss": 0.5256621241569519, |
|
"eval_runtime": 3.674, |
|
"eval_samples_per_second": 544.365, |
|
"eval_steps_per_second": 17.148, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.195272020725389e-06, |
|
"loss": 0.5015, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.8044999837875366, |
|
"eval_loss": 0.42749786376953125, |
|
"eval_runtime": 3.7001, |
|
"eval_samples_per_second": 540.533, |
|
"eval_steps_per_second": 17.027, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.8335000276565552, |
|
"eval_loss": 0.40074634552001953, |
|
"eval_runtime": 3.6783, |
|
"eval_samples_per_second": 543.722, |
|
"eval_steps_per_second": 17.127, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.3856865284974094e-06, |
|
"loss": 0.302, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.8454999923706055, |
|
"eval_loss": 0.3730817139148712, |
|
"eval_runtime": 3.6669, |
|
"eval_samples_per_second": 545.419, |
|
"eval_steps_per_second": 17.181, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.5761010362694307e-06, |
|
"loss": 0.2552, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.859000027179718, |
|
"eval_loss": 0.33458662033081055, |
|
"eval_runtime": 3.679, |
|
"eval_samples_per_second": 543.627, |
|
"eval_steps_per_second": 17.124, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_accuracy": 0.8654999732971191, |
|
"eval_loss": 0.33431583642959595, |
|
"eval_runtime": 3.6739, |
|
"eval_samples_per_second": 544.382, |
|
"eval_steps_per_second": 17.148, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.7665155440414508e-06, |
|
"loss": 0.2051, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.871999979019165, |
|
"eval_loss": 0.3163151144981384, |
|
"eval_runtime": 3.662, |
|
"eval_samples_per_second": 546.147, |
|
"eval_steps_per_second": 17.204, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_accuracy": 0.8784999847412109, |
|
"eval_loss": 0.31933271884918213, |
|
"eval_runtime": 3.6771, |
|
"eval_samples_per_second": 543.901, |
|
"eval_steps_per_second": 17.133, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 9.585492227979275e-07, |
|
"loss": 0.1902, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.8769999742507935, |
|
"eval_loss": 0.31465813517570496, |
|
"eval_runtime": 3.6635, |
|
"eval_samples_per_second": 545.931, |
|
"eval_steps_per_second": 17.197, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.4896373056994818e-07, |
|
"loss": 0.1928, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.871999979019165, |
|
"eval_loss": 0.32511112093925476, |
|
"eval_runtime": 3.6606, |
|
"eval_samples_per_second": 546.359, |
|
"eval_steps_per_second": 17.21, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 3088, |
|
"total_flos": 803053584088992.0, |
|
"train_loss": 0.2719091473465756, |
|
"train_runtime": 1265.6788, |
|
"train_samples_per_second": 78.062, |
|
"train_steps_per_second": 2.44 |
|
} |
|
], |
|
"max_steps": 3088, |
|
"num_train_epochs": 2, |
|
"total_flos": 803053584088992.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|