{ "best_metric": 0.2880019545555115, "best_model_checkpoint": "/data/jcanete/all_results/pawsx/albeto_base/epochs_3_bs_64_lr_5e-5/checkpoint-1500", "epoch": 3.0, "global_step": 2316, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.39, "eval_accuracy": 0.8234999775886536, "eval_loss": 0.4035343527793884, "eval_runtime": 2.5624, "eval_samples_per_second": 780.524, "eval_steps_per_second": 12.488, "step": 300 }, { "epoch": 0.65, "learning_rate": 3.924870466321244e-05, "loss": 0.3623, "step": 500 }, { "epoch": 0.78, "eval_accuracy": 0.8634999990463257, "eval_loss": 0.3268177807331085, "eval_runtime": 1.4365, "eval_samples_per_second": 1392.272, "eval_steps_per_second": 22.276, "step": 600 }, { "epoch": 1.17, "eval_accuracy": 0.8774999976158142, "eval_loss": 0.3128760755062103, "eval_runtime": 1.4694, "eval_samples_per_second": 1361.101, "eval_steps_per_second": 21.778, "step": 900 }, { "epoch": 1.3, "learning_rate": 2.8454231433506046e-05, "loss": 0.2024, "step": 1000 }, { "epoch": 1.55, "eval_accuracy": 0.8759999871253967, "eval_loss": 0.31563544273376465, "eval_runtime": 2.7664, "eval_samples_per_second": 722.974, "eval_steps_per_second": 11.568, "step": 1200 }, { "epoch": 1.94, "learning_rate": 1.7659758203799656e-05, "loss": 0.1576, "step": 1500 }, { "epoch": 1.94, "eval_accuracy": 0.8899999856948853, "eval_loss": 0.2880019545555115, "eval_runtime": 1.5048, "eval_samples_per_second": 1329.124, "eval_steps_per_second": 21.266, "step": 1500 }, { "epoch": 2.33, "eval_accuracy": 0.8820000290870667, "eval_loss": 0.3631149232387543, "eval_runtime": 1.466, "eval_samples_per_second": 1364.255, "eval_steps_per_second": 21.828, "step": 1800 }, { "epoch": 2.59, "learning_rate": 6.865284974093264e-06, "loss": 0.0951, "step": 2000 }, { "epoch": 2.72, "eval_accuracy": 0.8870000243186951, "eval_loss": 0.3527218997478485, "eval_runtime": 2.7631, "eval_samples_per_second": 723.813, "eval_steps_per_second": 11.581, "step": 2100 }, { "epoch": 3.0, "step": 2316, "total_flos": 712974847684320.0, "train_loss": 0.18752603909718146, "train_runtime": 736.4947, "train_samples_per_second": 201.228, "train_steps_per_second": 3.145 } ], "max_steps": 2316, "num_train_epochs": 3, "total_flos": 712974847684320.0, "trial_name": null, "trial_params": null }