josecannete's picture
adding model finetuned on PAWS-X
21a13b0
{
"best_metric": 0.2880019545555115,
"best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_base/epochs_3_bs_64_lr_5e-5/checkpoint-1500",
"epoch": 3.0,
"global_step": 2316,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.39,
"eval_accuracy": 0.8234999775886536,
"eval_loss": 0.4035343527793884,
"eval_runtime": 2.5624,
"eval_samples_per_second": 780.524,
"eval_steps_per_second": 12.488,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 3.924870466321244e-05,
"loss": 0.3623,
"step": 500
},
{
"epoch": 0.78,
"eval_accuracy": 0.8634999990463257,
"eval_loss": 0.3268177807331085,
"eval_runtime": 1.4365,
"eval_samples_per_second": 1392.272,
"eval_steps_per_second": 22.276,
"step": 600
},
{
"epoch": 1.17,
"eval_accuracy": 0.8774999976158142,
"eval_loss": 0.3128760755062103,
"eval_runtime": 1.4694,
"eval_samples_per_second": 1361.101,
"eval_steps_per_second": 21.778,
"step": 900
},
{
"epoch": 1.3,
"learning_rate": 2.8454231433506046e-05,
"loss": 0.2024,
"step": 1000
},
{
"epoch": 1.55,
"eval_accuracy": 0.8759999871253967,
"eval_loss": 0.31563544273376465,
"eval_runtime": 2.7664,
"eval_samples_per_second": 722.974,
"eval_steps_per_second": 11.568,
"step": 1200
},
{
"epoch": 1.94,
"learning_rate": 1.7659758203799656e-05,
"loss": 0.1576,
"step": 1500
},
{
"epoch": 1.94,
"eval_accuracy": 0.8899999856948853,
"eval_loss": 0.2880019545555115,
"eval_runtime": 1.5048,
"eval_samples_per_second": 1329.124,
"eval_steps_per_second": 21.266,
"step": 1500
},
{
"epoch": 2.33,
"eval_accuracy": 0.8820000290870667,
"eval_loss": 0.3631149232387543,
"eval_runtime": 1.466,
"eval_samples_per_second": 1364.255,
"eval_steps_per_second": 21.828,
"step": 1800
},
{
"epoch": 2.59,
"learning_rate": 6.865284974093264e-06,
"loss": 0.0951,
"step": 2000
},
{
"epoch": 2.72,
"eval_accuracy": 0.8870000243186951,
"eval_loss": 0.3527218997478485,
"eval_runtime": 2.7631,
"eval_samples_per_second": 723.813,
"eval_steps_per_second": 11.581,
"step": 2100
},
{
"epoch": 3.0,
"step": 2316,
"total_flos": 712974847684320.0,
"train_loss": 0.18752603909718146,
"train_runtime": 736.4947,
"train_samples_per_second": 201.228,
"train_steps_per_second": 3.145
}
],
"max_steps": 2316,
"num_train_epochs": 3,
"total_flos": 712974847684320.0,
"trial_name": null,
"trial_params": null
}