{ "best_metric": 0.6926904320716858, "best_model_checkpoint": "/tmp/model/checkpoint-300", "epoch": 4.0, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 2e-05, "loss": 0.6932, "step": 15 }, { "epoch": 0.4, "learning_rate": 4e-05, "loss": 0.6935, "step": 30 }, { "epoch": 0.6, "learning_rate": 6e-05, "loss": 0.6934, "step": 45 }, { "epoch": 0.8, "learning_rate": 8e-05, "loss": 0.6936, "step": 60 }, { "epoch": 1.0, "learning_rate": 0.0001, "loss": 0.6936, "step": 75 }, { "epoch": 1.0, "eval_accuracy": 0.5533333333333333, "eval_auc": 0.5650777777777778, "eval_f1": 0.5620915032679739, "eval_loss": 0.6929760575294495, "eval_precision": 0.5512820512820513, "eval_recall": 0.5733333333333334, "eval_runtime": 4.573, "eval_samples_per_second": 131.205, "eval_steps_per_second": 8.31, "step": 75 }, { "epoch": 1.2, "learning_rate": 9.777777777777778e-05, "loss": 0.6933, "step": 90 }, { "epoch": 1.4, "learning_rate": 9.555555555555557e-05, "loss": 0.6913, "step": 105 }, { "epoch": 1.6, "learning_rate": 9.333333333333334e-05, "loss": 0.6926, "step": 120 }, { "epoch": 1.8, "learning_rate": 9.111111111111112e-05, "loss": 0.6945, "step": 135 }, { "epoch": 2.0, "learning_rate": 8.888888888888889e-05, "loss": 0.7082, "step": 150 }, { "epoch": 2.0, "eval_accuracy": 0.5, "eval_auc": 0.5238666666666667, "eval_f1": 0.6666666666666666, "eval_loss": 0.6932576298713684, "eval_precision": 0.5, "eval_recall": 1.0, "eval_runtime": 4.5135, "eval_samples_per_second": 132.936, "eval_steps_per_second": 8.419, "step": 150 }, { "epoch": 2.2, "learning_rate": 8.666666666666667e-05, "loss": 0.6868, "step": 165 }, { "epoch": 2.4, "learning_rate": 8.444444444444444e-05, "loss": 0.6979, "step": 180 }, { "epoch": 2.6, "learning_rate": 8.222222222222222e-05, "loss": 0.6939, "step": 195 }, { "epoch": 2.8, "learning_rate": 8e-05, "loss": 0.6946, "step": 210 }, { "epoch": 3.0, "learning_rate": 7.777777777777778e-05, "loss": 0.6936, "step": 225 }, { "epoch": 3.0, "eval_accuracy": 0.5466666666666666, "eval_auc": 0.5420722222222223, "eval_f1": 0.4999999999999999, "eval_loss": 0.6928548216819763, "eval_precision": 0.5573770491803278, "eval_recall": 0.4533333333333333, "eval_runtime": 4.4853, "eval_samples_per_second": 133.77, "eval_steps_per_second": 8.472, "step": 225 }, { "epoch": 3.2, "learning_rate": 7.555555555555556e-05, "loss": 0.6932, "step": 240 }, { "epoch": 3.4, "learning_rate": 7.333333333333333e-05, "loss": 0.693, "step": 255 }, { "epoch": 3.6, "learning_rate": 7.111111111111112e-05, "loss": 0.6905, "step": 270 }, { "epoch": 3.8, "learning_rate": 6.88888888888889e-05, "loss": 0.6906, "step": 285 }, { "epoch": 4.0, "learning_rate": 6.666666666666667e-05, "loss": 0.6952, "step": 300 }, { "epoch": 4.0, "eval_accuracy": 0.5033333333333333, "eval_auc": 0.5401888888888888, "eval_f1": 0.6644144144144143, "eval_loss": 0.6926904320716858, "eval_precision": 0.5017006802721088, "eval_recall": 0.9833333333333333, "eval_runtime": 4.4974, "eval_samples_per_second": 133.412, "eval_steps_per_second": 8.449, "step": 300 } ], "logging_steps": 15, "max_steps": 750, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 5.0964968226816e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }