{ "best_metric": 0.20911003649234772, "best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_xlarge/epochs_3_bs_16_lr_5e-6/checkpoint-1500", "epoch": 2.9991546914623837, "global_step": 1773, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.17, "eval_accuracy": 0.2770000100135803, "eval_loss": 1.3528416156768799, "eval_runtime": 24.2204, "eval_samples_per_second": 41.287, "eval_steps_per_second": 5.161, "step": 100 }, { "epoch": 0.34, "eval_accuracy": 0.47999998927116394, "eval_loss": 1.0430142879486084, "eval_runtime": 23.7736, "eval_samples_per_second": 42.063, "eval_steps_per_second": 5.258, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.5640000104904175, "eval_loss": 1.0979323387145996, "eval_runtime": 23.5496, "eval_samples_per_second": 42.464, "eval_steps_per_second": 5.308, "step": 300 }, { "epoch": 0.68, "eval_accuracy": 0.5720000267028809, "eval_loss": 0.9388349652290344, "eval_runtime": 25.4729, "eval_samples_per_second": 39.257, "eval_steps_per_second": 4.907, "step": 400 }, { "epoch": 0.85, "learning_rate": 3.601240834743373e-06, "loss": 0.2676, "step": 500 }, { "epoch": 0.85, "eval_accuracy": 0.7160000205039978, "eval_loss": 0.6925698518753052, "eval_runtime": 24.8675, "eval_samples_per_second": 40.213, "eval_steps_per_second": 5.027, "step": 500 }, { "epoch": 1.02, "eval_accuracy": 0.8560000061988831, "eval_loss": 0.673073947429657, "eval_runtime": 25.8013, "eval_samples_per_second": 38.758, "eval_steps_per_second": 4.845, "step": 600 }, { "epoch": 1.18, "eval_accuracy": 0.9369999766349792, "eval_loss": 0.3746216893196106, "eval_runtime": 25.6232, "eval_samples_per_second": 39.027, "eval_steps_per_second": 4.878, "step": 700 }, { "epoch": 1.35, "eval_accuracy": 0.9350000023841858, "eval_loss": 0.35705798864364624, "eval_runtime": 25.7852, "eval_samples_per_second": 38.782, "eval_steps_per_second": 4.848, "step": 800 }, { "epoch": 1.52, "eval_accuracy": 0.9409999847412109, "eval_loss": 0.27689608931541443, "eval_runtime": 24.8438, "eval_samples_per_second": 40.252, "eval_steps_per_second": 5.031, "step": 900 }, { "epoch": 1.69, "learning_rate": 2.1912013536379022e-06, "loss": 0.1019, "step": 1000 }, { "epoch": 1.69, "eval_accuracy": 0.9350000023841858, "eval_loss": 0.291538268327713, "eval_runtime": 26.3153, "eval_samples_per_second": 38.001, "eval_steps_per_second": 4.75, "step": 1000 }, { "epoch": 1.86, "eval_accuracy": 0.9509999752044678, "eval_loss": 0.21956732869148254, "eval_runtime": 24.9343, "eval_samples_per_second": 40.105, "eval_steps_per_second": 5.013, "step": 1100 }, { "epoch": 2.03, "eval_accuracy": 0.9520000219345093, "eval_loss": 0.2259828746318817, "eval_runtime": 25.7253, "eval_samples_per_second": 38.872, "eval_steps_per_second": 4.859, "step": 1200 }, { "epoch": 2.2, "eval_accuracy": 0.9509999752044678, "eval_loss": 0.256782591342926, "eval_runtime": 25.7431, "eval_samples_per_second": 38.845, "eval_steps_per_second": 4.856, "step": 1300 }, { "epoch": 2.37, "eval_accuracy": 0.9549999833106995, "eval_loss": 0.2290709912776947, "eval_runtime": 24.8624, "eval_samples_per_second": 40.221, "eval_steps_per_second": 5.028, "step": 1400 }, { "epoch": 2.54, "learning_rate": 7.81161872532431e-07, "loss": 0.0593, "step": 1500 }, { "epoch": 2.54, "eval_accuracy": 0.9589999914169312, "eval_loss": 0.20911003649234772, "eval_runtime": 25.438, "eval_samples_per_second": 39.311, "eval_steps_per_second": 4.914, "step": 1500 }, { "epoch": 2.71, "eval_accuracy": 0.9490000009536743, "eval_loss": 0.2575547695159912, "eval_runtime": 25.2822, "eval_samples_per_second": 39.554, "eval_steps_per_second": 4.944, "step": 1600 }, { "epoch": 2.88, "eval_accuracy": 0.9509999752044678, "eval_loss": 0.23298123478889465, "eval_runtime": 25.3064, "eval_samples_per_second": 39.516, "eval_steps_per_second": 4.939, "step": 1700 }, { "epoch": 3.0, "step": 1773, "total_flos": 3375408735295488.0, "train_loss": 0.1292072490857808, "train_runtime": 3156.6951, "train_samples_per_second": 8.989, "train_steps_per_second": 0.562 } ], "max_steps": 1773, "num_train_epochs": 3, "total_flos": 3375408735295488.0, "trial_name": null, "trial_params": null }