{ "best_metric": 0.04727424308657646, "best_model_checkpoint": "results_simple-latin-bert-uncased/epoch20_bs64/checkpoint-36", "epoch": 11.0, "eval_steps": 500, "global_step": 99, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.7604417204856873, "learning_rate": 4.75e-05, "loss": 0.2645, "step": 9 }, { "epoch": 1.0, "eval_accurracy": { "accuracy": 0.9775494672754946 }, "eval_f1": [ 0.9749575551782682, 0.7978910369068541, 0.9994264958898872 ], "eval_loss": 0.07480967044830322, "eval_precision": [ 0.968381112984823, 0.850187265917603, 0.9988536492166603 ], "eval_recall": [ 0.9816239316239316, 0.7516556291390728, 1.0 ], "eval_runtime": 2.8335, "eval_samples_per_second": 25.41, "eval_steps_per_second": 0.706, "step": 9 }, { "epoch": 2.0, "grad_norm": 0.30367013812065125, "learning_rate": 4.5e-05, "loss": 0.0564, "step": 18 }, { "epoch": 2.0, "eval_accurracy": { "accuracy": 0.9813546423135464 }, "eval_f1": [ 0.9791400595998297, 0.8327645051194539, 1.0 ], "eval_loss": 0.053640153259038925, "eval_precision": [ 0.9754028837998303, 0.8591549295774648, 1.0 ], "eval_recall": [ 0.9829059829059829, 0.8079470198675497, 1.0 ], "eval_runtime": 2.7042, "eval_samples_per_second": 26.625, "eval_steps_per_second": 0.74, "step": 18 }, { "epoch": 3.0, "grad_norm": 0.28803956508636475, "learning_rate": 4.25e-05, "loss": 0.0447, "step": 27 }, { "epoch": 3.0, "eval_accurracy": { "accuracy": 0.9823059360730594 }, "eval_f1": [ 0.9801916932907349, 0.8421052631578947, 1.0 ], "eval_loss": 0.052741460502147675, "eval_precision": [ 0.9770700636942675, 0.8641114982578397, 1.0 ], "eval_recall": [ 0.9833333333333333, 0.8211920529801324, 1.0 ], "eval_runtime": 2.6628, "eval_samples_per_second": 27.04, "eval_steps_per_second": 0.751, "step": 27 }, { "epoch": 4.0, "grad_norm": 0.2971172630786896, "learning_rate": 4e-05, "loss": 0.0377, "step": 36 }, { "epoch": 4.0, "eval_accurracy": { "accuracy": 0.9811643835616438 }, "eval_f1": [ 0.9789227166276346, 0.8313458262350937, 1.0 ], "eval_loss": 0.04727424308657646, "eval_precision": [ 0.9753924480271532, 0.856140350877193, 1.0 ], "eval_recall": [ 0.9824786324786324, 0.8079470198675497, 1.0 ], "eval_runtime": 2.6802, "eval_samples_per_second": 26.864, "eval_steps_per_second": 0.746, "step": 36 }, { "epoch": 5.0, "grad_norm": 0.32125240564346313, "learning_rate": 3.7500000000000003e-05, "loss": 0.0325, "step": 45 }, { "epoch": 5.0, "eval_accurracy": { "accuracy": 0.9826864535768646 }, "eval_f1": [ 0.9806176783812567, 0.8455008488964346, 1.0 ], "eval_loss": 0.04757271707057953, "eval_precision": [ 0.9774946921443737, 0.867595818815331, 1.0 ], "eval_recall": [ 0.9837606837606837, 0.8245033112582781, 1.0 ], "eval_runtime": 2.6737, "eval_samples_per_second": 26.929, "eval_steps_per_second": 0.748, "step": 45 }, { "epoch": 6.0, "grad_norm": 0.23932930827140808, "learning_rate": 3.5e-05, "loss": 0.0276, "step": 54 }, { "epoch": 6.0, "eval_accurracy": { "accuracy": 0.9828767123287672 }, "eval_f1": [ 0.98080204778157, 0.8489932885906041, 1.0 ], "eval_loss": 0.04851287603378296, "eval_precision": [ 0.9791311754684838, 0.8605442176870748, 1.0 ], "eval_recall": [ 0.9824786324786324, 0.8377483443708609, 1.0 ], "eval_runtime": 2.7322, "eval_samples_per_second": 26.352, "eval_steps_per_second": 0.732, "step": 54 }, { "epoch": 7.0, "grad_norm": 0.2584969997406006, "learning_rate": 3.2500000000000004e-05, "loss": 0.0235, "step": 63 }, { "epoch": 7.0, "eval_accurracy": { "accuracy": 0.9834474885844748 }, "eval_f1": [ 0.9814300960512273, 0.8547579298831386, 1.0 ], "eval_loss": 0.04932190850377083, "eval_precision": [ 0.9803837953091684, 0.8619528619528619, 1.0 ], "eval_recall": [ 0.9824786324786324, 0.847682119205298, 1.0 ], "eval_runtime": 2.7945, "eval_samples_per_second": 25.765, "eval_steps_per_second": 0.716, "step": 63 }, { "epoch": 8.0, "grad_norm": 0.22960752248764038, "learning_rate": 3e-05, "loss": 0.0206, "step": 72 }, { "epoch": 8.0, "eval_accurracy": { "accuracy": 0.9836377473363774 }, "eval_f1": [ 0.9816160752458315, 0.858085808580858, 1.0 ], "eval_loss": 0.05238157883286476, "eval_precision": [ 0.9820359281437125, 0.8552631578947368, 1.0 ], "eval_recall": [ 0.9811965811965812, 0.8609271523178808, 1.0 ], "eval_runtime": 2.6883, "eval_samples_per_second": 26.783, "eval_steps_per_second": 0.744, "step": 72 }, { "epoch": 9.0, "grad_norm": 0.19476890563964844, "learning_rate": 2.7500000000000004e-05, "loss": 0.0169, "step": 81 }, { "epoch": 9.0, "eval_accurracy": { "accuracy": 0.9828767123287672 }, "eval_f1": [ 0.9807527801539777, 0.8519736842105263, 1.0 ], "eval_loss": 0.055463388562202454, "eval_precision": [ 0.9815924657534246, 0.8464052287581699, 1.0 ], "eval_recall": [ 0.9799145299145299, 0.8576158940397351, 1.0 ], "eval_runtime": 2.6823, "eval_samples_per_second": 26.843, "eval_steps_per_second": 0.746, "step": 81 }, { "epoch": 10.0, "grad_norm": 0.2635619640350342, "learning_rate": 2.5e-05, "loss": 0.0146, "step": 90 }, { "epoch": 10.0, "eval_accurracy": { "accuracy": 0.9836377473363774 }, "eval_f1": [ 0.9816160752458315, 0.858085808580858, 1.0 ], "eval_loss": 0.05970200523734093, "eval_precision": [ 0.9820359281437125, 0.8552631578947368, 1.0 ], "eval_recall": [ 0.9811965811965812, 0.8609271523178808, 1.0 ], "eval_runtime": 2.7111, "eval_samples_per_second": 26.558, "eval_steps_per_second": 0.738, "step": 90 }, { "epoch": 11.0, "grad_norm": 0.23059502243995667, "learning_rate": 2.25e-05, "loss": 0.0125, "step": 99 }, { "epoch": 11.0, "eval_accurracy": { "accuracy": 0.9834474885844748 }, "eval_f1": [ 0.9813824095869891, 0.8576104746317512, 1.0 ], "eval_loss": 0.06442799419164658, "eval_precision": [ 0.9828546935276468, 0.8478964401294499, 1.0 ], "eval_recall": [ 0.9799145299145299, 0.8675496688741722, 1.0 ], "eval_runtime": 2.6437, "eval_samples_per_second": 27.234, "eval_steps_per_second": 0.757, "step": 99 } ], "logging_steps": 500, "max_steps": 180, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 267219177271200.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }