{ "best_metric": 0.8851654529571533, "best_model_checkpoint": "finetune-tiktok-brat4\\checkpoint-20", "epoch": 1.0, "eval_steps": 500, "global_step": 20, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 54.496673583984375, "learning_rate": 1.6666666666666667e-06, "loss": 0.7894, "step": 1 }, { "epoch": 0.1, "grad_norm": 11.606372833251953, "learning_rate": 3.3333333333333333e-06, "loss": 0.7589, "step": 2 }, { "epoch": 0.15, "grad_norm": 38.31898498535156, "learning_rate": 5e-06, "loss": 1.2543, "step": 3 }, { "epoch": 0.2, "grad_norm": 47.521976470947266, "learning_rate": 6.666666666666667e-06, "loss": 0.8361, "step": 4 }, { "epoch": 0.25, "grad_norm": 23.255413055419922, "learning_rate": 8.333333333333334e-06, "loss": 0.2909, "step": 5 }, { "epoch": 0.3, "grad_norm": 36.35523986816406, "learning_rate": 1e-05, "loss": 0.7906, "step": 6 }, { "epoch": 0.35, "grad_norm": 35.16067886352539, "learning_rate": 9.814814814814815e-06, "loss": 0.96, "step": 7 }, { "epoch": 0.4, "grad_norm": 36.35455322265625, "learning_rate": 9.62962962962963e-06, "loss": 0.3895, "step": 8 }, { "epoch": 0.45, "grad_norm": 119.45866394042969, "learning_rate": 9.444444444444445e-06, "loss": 0.5939, "step": 9 }, { "epoch": 0.5, "grad_norm": 32.66160202026367, "learning_rate": 9.25925925925926e-06, "loss": 0.9343, "step": 10 }, { "epoch": 0.55, "grad_norm": 16.789913177490234, "learning_rate": 9.074074074074075e-06, "loss": 0.6067, "step": 11 }, { "epoch": 0.6, "grad_norm": 13.208721160888672, "learning_rate": 8.888888888888888e-06, "loss": 0.3274, "step": 12 }, { "epoch": 0.65, "grad_norm": 21.66659927368164, "learning_rate": 8.703703703703705e-06, "loss": 1.8018, "step": 13 }, { "epoch": 0.7, "grad_norm": 14.522764205932617, "learning_rate": 8.518518518518519e-06, "loss": 0.9705, "step": 14 }, { "epoch": 0.75, "grad_norm": 33.81802749633789, "learning_rate": 8.333333333333334e-06, "loss": 1.3949, "step": 15 }, { "epoch": 0.8, "grad_norm": 46.284576416015625, "learning_rate": 8.148148148148148e-06, "loss": 1.7191, "step": 16 }, { "epoch": 0.85, "grad_norm": 18.38251304626465, "learning_rate": 7.962962962962963e-06, "loss": 0.4667, "step": 17 }, { "epoch": 0.9, "grad_norm": 18.9488582611084, "learning_rate": 7.77777777777778e-06, "loss": 1.1351, "step": 18 }, { "epoch": 0.95, "grad_norm": 9.29172420501709, "learning_rate": 7.592592592592594e-06, "loss": 0.1151, "step": 19 }, { "epoch": 1.0, "grad_norm": 39.511680603027344, "learning_rate": 7.4074074074074075e-06, "loss": 1.3048, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.6805555555555556, "eval_f1_macro": 0.6377799415774099, "eval_f1_micro": 0.6805555555555556, "eval_f1_weighted": 0.6723466407010711, "eval_loss": 0.8851654529571533, "eval_precision_macro": 0.6714285714285714, "eval_precision_micro": 0.6805555555555556, "eval_precision_weighted": 0.680952380952381, "eval_recall_macro": 0.6266299193930772, "eval_recall_micro": 0.6805555555555556, "eval_recall_weighted": 0.6805555555555556, "eval_runtime": 85.5963, "eval_samples_per_second": 0.841, "eval_steps_per_second": 0.058, "step": 20 } ], "logging_steps": 1, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 42098146836480.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }