{ "best_metric": 0.9323447636700648, "best_model_checkpoint": "/content/dissertation/scripts/ner/output/checkpoint-4333", "epoch": 9.989615784008308, "eval_steps": 500, "global_step": 4810, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9989615784008308, "eval_accuracy": 0.9987441743644127, "eval_f1": 0.9291628334866606, "eval_loss": 0.004182814620435238, "eval_precision": 0.9173478655767484, "eval_recall": 0.9412861136999068, "eval_runtime": 15.1684, "eval_samples_per_second": 457.925, "eval_steps_per_second": 57.29, "step": 481 }, { "epoch": 1.0384215991692627, "grad_norm": 0.07515838742256165, "learning_rate": 4.48024948024948e-05, "loss": 0.0156, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.9986464990372004, "eval_f1": 0.9189439555349699, "eval_loss": 0.004935940261930227, "eval_precision": 0.9134438305709024, "eval_recall": 0.9245107176141659, "eval_runtime": 15.18, "eval_samples_per_second": 457.575, "eval_steps_per_second": 57.246, "step": 963 }, { "epoch": 2.0768431983385254, "grad_norm": 0.005299085285514593, "learning_rate": 3.9604989604989604e-05, "loss": 0.0039, "step": 1000 }, { "epoch": 2.9989615784008308, "eval_accuracy": 0.9985627773281612, "eval_f1": 0.9191873589164785, "eval_loss": 0.005292736925184727, "eval_precision": 0.8914185639229422, "eval_recall": 0.9487418452935694, "eval_runtime": 15.1155, "eval_samples_per_second": 459.529, "eval_steps_per_second": 57.491, "step": 1444 }, { "epoch": 3.115264797507788, "grad_norm": 0.06427361071109772, "learning_rate": 3.4407484407484405e-05, "loss": 0.0024, "step": 1500 }, { "epoch": 4.0, "eval_accuracy": 0.9984720788100354, "eval_f1": 0.9167412712623096, "eval_loss": 0.0061088865622878075, "eval_precision": 0.8819982773471146, "eval_recall": 0.9543336439888164, "eval_runtime": 15.4198, "eval_samples_per_second": 450.46, "eval_steps_per_second": 56.356, "step": 1926 }, { "epoch": 4.153686396677051, "grad_norm": 0.04428843781352043, "learning_rate": 2.920997920997921e-05, "loss": 0.0017, "step": 2000 }, { "epoch": 4.998961578400831, "eval_accuracy": 0.9986046381826807, "eval_f1": 0.9254284390921722, "eval_loss": 0.007444376591593027, "eval_precision": 0.919889502762431, "eval_recall": 0.9310344827586207, "eval_runtime": 15.1205, "eval_samples_per_second": 459.377, "eval_steps_per_second": 57.472, "step": 2407 }, { "epoch": 5.192107995846314, "grad_norm": 0.004208261147141457, "learning_rate": 2.4012474012474013e-05, "loss": 0.0011, "step": 2500 }, { "epoch": 6.0, "eval_accuracy": 0.9985767309463344, "eval_f1": 0.9266943291839557, "eval_loss": 0.007942954078316689, "eval_precision": 0.916970802919708, "eval_recall": 0.9366262814538676, "eval_runtime": 15.1887, "eval_samples_per_second": 457.314, "eval_steps_per_second": 57.214, "step": 2889 }, { "epoch": 6.230529595015576, "grad_norm": 0.004876282997429371, "learning_rate": 1.8814968814968818e-05, "loss": 0.0007, "step": 3000 }, { "epoch": 6.998961578400831, "eval_accuracy": 0.9986744062735468, "eval_f1": 0.9254004576659038, "eval_loss": 0.006659395061433315, "eval_precision": 0.9091726618705036, "eval_recall": 0.9422180801491147, "eval_runtime": 15.1599, "eval_samples_per_second": 458.182, "eval_steps_per_second": 57.322, "step": 3370 }, { "epoch": 7.268951194184839, "grad_norm": 0.0032651671208441257, "learning_rate": 1.3617463617463619e-05, "loss": 0.0005, "step": 3500 }, { "epoch": 8.0, "eval_accuracy": 0.9986674294644602, "eval_f1": 0.9275092936802974, "eval_loss": 0.007326104678213596, "eval_precision": 0.9249304911955515, "eval_recall": 0.9301025163094129, "eval_runtime": 15.1332, "eval_samples_per_second": 458.991, "eval_steps_per_second": 57.423, "step": 3852 }, { "epoch": 8.307372793354102, "grad_norm": 0.0013715826207771897, "learning_rate": 8.419958419958421e-06, "loss": 0.0004, "step": 4000 }, { "epoch": 8.99896157840083, "eval_accuracy": 0.9987162671280663, "eval_f1": 0.9323447636700648, "eval_loss": 0.008018395863473415, "eval_precision": 0.9271889400921659, "eval_recall": 0.9375582479030755, "eval_runtime": 15.4151, "eval_samples_per_second": 450.598, "eval_steps_per_second": 56.373, "step": 4333 }, { "epoch": 9.345794392523365, "grad_norm": 0.005300257820636034, "learning_rate": 3.2224532224532228e-06, "loss": 0.0002, "step": 4500 }, { "epoch": 9.989615784008308, "eval_accuracy": 0.9987092903189797, "eval_f1": 0.9315448658649399, "eval_loss": 0.007857992313802242, "eval_precision": 0.9247015610651974, "eval_recall": 0.9384902143522833, "eval_runtime": 15.1843, "eval_samples_per_second": 457.447, "eval_steps_per_second": 57.23, "step": 4810 }, { "epoch": 9.989615784008308, "step": 4810, "total_flos": 1.3132946802550608e+16, "train_loss": 0.0027612092573652642, "train_runtime": 2084.9871, "train_samples_per_second": 147.78, "train_steps_per_second": 2.307 } ], "logging_steps": 500, "max_steps": 4810, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.3132946802550608e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }