{ "best_metric": 0.07052170485258102, "best_model_checkpoint": "token_level_model/best_model/checkpoint-948", "epoch": 12.0, "global_step": 3792, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_balanced accuracy": 0.5633085631676005, "eval_f1": 0.5303163390780957, "eval_loss": 0.09163307398557663, "eval_precision": 0.6548165588282941, "eval_recall": 0.5633085631676005, "eval_runtime": 5.3367, "eval_samples_per_second": 236.852, "eval_steps_per_second": 14.803, "step": 316 }, { "epoch": 1.58, "learning_rate": 1.7362869198312237e-05, "loss": 0.1839, "step": 500 }, { "epoch": 2.0, "eval_balanced accuracy": 0.7195902052549533, "eval_f1": 0.7255288809894066, "eval_loss": 0.0722324550151825, "eval_precision": 0.7454575741625619, "eval_recall": 0.7195902052549533, "eval_runtime": 5.3534, "eval_samples_per_second": 236.111, "eval_steps_per_second": 14.757, "step": 632 }, { "epoch": 3.0, "eval_balanced accuracy": 0.7147400555506803, "eval_f1": 0.7192305114894215, "eval_loss": 0.07052170485258102, "eval_precision": 0.7756273409712318, "eval_recall": 0.7147400555506803, "eval_runtime": 5.4755, "eval_samples_per_second": 230.847, "eval_steps_per_second": 14.428, "step": 948 }, { "epoch": 3.16, "learning_rate": 1.4725738396624474e-05, "loss": 0.0587, "step": 1000 }, { "epoch": 4.0, "eval_balanced accuracy": 0.7295185936273012, "eval_f1": 0.7296703416168215, "eval_loss": 0.07182055711746216, "eval_precision": 0.7401045017667032, "eval_recall": 0.7295185936273012, "eval_runtime": 5.3937, "eval_samples_per_second": 234.345, "eval_steps_per_second": 14.647, "step": 1264 }, { "epoch": 4.75, "learning_rate": 1.208860759493671e-05, "loss": 0.0376, "step": 1500 }, { "epoch": 5.0, "eval_balanced accuracy": 0.735330002568732, "eval_f1": 0.743746101471838, "eval_loss": 0.07561135292053223, "eval_precision": 0.758929084908083, "eval_recall": 0.735330002568732, "eval_runtime": 5.5304, "eval_samples_per_second": 228.555, "eval_steps_per_second": 14.285, "step": 1580 }, { "epoch": 6.0, "eval_balanced accuracy": 0.7487913019838482, "eval_f1": 0.7539294881409199, "eval_loss": 0.08333344757556915, "eval_precision": 0.7640729935139194, "eval_recall": 0.7487913019838482, "eval_runtime": 5.304, "eval_samples_per_second": 238.312, "eval_steps_per_second": 14.894, "step": 1896 }, { "epoch": 6.33, "learning_rate": 9.451476793248946e-06, "loss": 0.0239, "step": 2000 }, { "epoch": 7.0, "eval_balanced accuracy": 0.7495077050154062, "eval_f1": 0.7547220689413356, "eval_loss": 0.09176070988178253, "eval_precision": 0.7680132999431392, "eval_recall": 0.7495077050154062, "eval_runtime": 5.4894, "eval_samples_per_second": 230.262, "eval_steps_per_second": 14.391, "step": 2212 }, { "epoch": 7.91, "learning_rate": 6.814345991561182e-06, "loss": 0.0161, "step": 2500 }, { "epoch": 8.0, "eval_balanced accuracy": 0.7519480763726148, "eval_f1": 0.7441340002103095, "eval_loss": 0.10090441256761551, "eval_precision": 0.7421918161304624, "eval_recall": 0.7519480763726148, "eval_runtime": 5.4772, "eval_samples_per_second": 230.774, "eval_steps_per_second": 14.423, "step": 2528 }, { "epoch": 9.0, "eval_balanced accuracy": 0.7372305744818235, "eval_f1": 0.7418663358868686, "eval_loss": 0.10627683997154236, "eval_precision": 0.747694948865169, "eval_recall": 0.7372305744818235, "eval_runtime": 5.673, "eval_samples_per_second": 222.809, "eval_steps_per_second": 13.926, "step": 2844 }, { "epoch": 9.49, "learning_rate": 4.177215189873418e-06, "loss": 0.0107, "step": 3000 }, { "epoch": 10.0, "eval_balanced accuracy": 0.7454153105654866, "eval_f1": 0.7532278014935634, "eval_loss": 0.11291743814945221, "eval_precision": 0.7635910633921945, "eval_recall": 0.7454153105654866, "eval_runtime": 5.7497, "eval_samples_per_second": 219.839, "eval_steps_per_second": 13.74, "step": 3160 }, { "epoch": 11.0, "eval_balanced accuracy": 0.7422514651185799, "eval_f1": 0.7462413455365297, "eval_loss": 0.11802595853805542, "eval_precision": 0.7518280300030182, "eval_recall": 0.7422514651185799, "eval_runtime": 5.6277, "eval_samples_per_second": 224.602, "eval_steps_per_second": 14.038, "step": 3476 }, { "epoch": 11.08, "learning_rate": 1.5400843881856542e-06, "loss": 0.007, "step": 3500 }, { "epoch": 12.0, "eval_balanced accuracy": 0.7397630177088332, "eval_f1": 0.7454360643197575, "eval_loss": 0.11985792219638824, "eval_precision": 0.7526407260582226, "eval_recall": 0.7397630177088332, "eval_runtime": 5.7001, "eval_samples_per_second": 221.752, "eval_steps_per_second": 13.859, "step": 3792 } ], "max_steps": 3792, "num_train_epochs": 12, "total_flos": 302819736843288.0, "trial_name": null, "trial_params": null }