{ "best_metric": 0.7901234567901234, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-hc8i9nht/checkpoint-200", "epoch": 1.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.6317474842071533, "learning_rate": 8.174509670130923e-05, "loss": 0.543, "step": 10 }, { "epoch": 0.1, "grad_norm": 2.941434860229492, "learning_rate": 8.091938865382126e-05, "loss": 0.4521, "step": 20 }, { "epoch": 0.15, "grad_norm": 8.016721725463867, "learning_rate": 8.009368060633328e-05, "loss": 0.4908, "step": 30 }, { "epoch": 0.2, "grad_norm": 2.4489011764526367, "learning_rate": 7.92679725588453e-05, "loss": 0.4119, "step": 40 }, { "epoch": 0.25, "grad_norm": 3.7478549480438232, "learning_rate": 7.844226451135734e-05, "loss": 0.5418, "step": 50 }, { "epoch": 0.3, "grad_norm": 20.588834762573242, "learning_rate": 7.761655646386936e-05, "loss": 0.3073, "step": 60 }, { "epoch": 0.35, "grad_norm": 1.3618388175964355, "learning_rate": 7.67908484163814e-05, "loss": 0.4359, "step": 70 }, { "epoch": 0.4, "grad_norm": 7.6314921379089355, "learning_rate": 7.596514036889342e-05, "loss": 0.5815, "step": 80 }, { "epoch": 0.45, "grad_norm": 5.3096113204956055, "learning_rate": 7.513943232140545e-05, "loss": 0.4516, "step": 90 }, { "epoch": 0.5, "grad_norm": 8.00711441040039, "learning_rate": 7.431372427391748e-05, "loss": 0.5594, "step": 100 }, { "epoch": 0.55, "grad_norm": 2.1074318885803223, "learning_rate": 7.348801622642951e-05, "loss": 0.4839, "step": 110 }, { "epoch": 0.6, "grad_norm": 1.2857903242111206, "learning_rate": 7.266230817894153e-05, "loss": 0.3655, "step": 120 }, { "epoch": 0.65, "grad_norm": 0.47830930352211, "learning_rate": 7.183660013145356e-05, "loss": 0.4362, "step": 130 }, { "epoch": 0.7, "grad_norm": 6.92142915725708, "learning_rate": 7.101089208396559e-05, "loss": 0.3585, "step": 140 }, { "epoch": 0.75, "grad_norm": 12.701593399047852, "learning_rate": 7.018518403647762e-05, "loss": 0.4539, "step": 150 }, { "epoch": 0.8, "grad_norm": 3.4179468154907227, "learning_rate": 6.935947598898965e-05, "loss": 0.291, "step": 160 }, { "epoch": 0.85, "grad_norm": 6.713927745819092, "learning_rate": 6.853376794150168e-05, "loss": 0.533, "step": 170 }, { "epoch": 0.9, "grad_norm": 4.076853275299072, "learning_rate": 6.77080598940137e-05, "loss": 0.4721, "step": 180 }, { "epoch": 0.95, "grad_norm": 4.515071868896484, "learning_rate": 6.688235184652574e-05, "loss": 0.4085, "step": 190 }, { "epoch": 1.0, "grad_norm": 5.7936811447143555, "learning_rate": 6.605664379903777e-05, "loss": 0.391, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8725, "eval_f1": 0.7901234567901234, "eval_loss": 0.337982714176178, "eval_precision": 0.8, "eval_recall": 0.7804878048780488, "eval_runtime": 1.5299, "eval_samples_per_second": 261.453, "eval_steps_per_second": 16.341, "step": 200 } ], "logging_steps": 10, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 211815370450944.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 8.25708047487972e-05, "metric": "eval/loss", "num_train_epochs": 5, "per_device_train_batch_size": 8, "seed": 17 } }