{ "best_metric": 0.7644787644787645, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-xxn3kofd/checkpoint-400", "epoch": 2.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 2.57446026802063, "learning_rate": 8.430421587841344e-05, "loss": 0.6104, "step": 10 }, { "epoch": 0.1, "grad_norm": 72.49688720703125, "learning_rate": 8.214256931742847e-05, "loss": 0.4612, "step": 20 }, { "epoch": 0.15, "grad_norm": 3.086334705352783, "learning_rate": 7.998092275644352e-05, "loss": 0.4366, "step": 30 }, { "epoch": 0.2, "grad_norm": 1.2897779941558838, "learning_rate": 7.781927619545856e-05, "loss": 0.4857, "step": 40 }, { "epoch": 0.25, "grad_norm": 2.7454018592834473, "learning_rate": 7.56576296344736e-05, "loss": 0.4842, "step": 50 }, { "epoch": 0.3, "grad_norm": 13.36482048034668, "learning_rate": 7.349598307348864e-05, "loss": 0.6072, "step": 60 }, { "epoch": 0.35, "grad_norm": 1.689212441444397, "learning_rate": 7.133433651250367e-05, "loss": 0.3871, "step": 70 }, { "epoch": 0.4, "grad_norm": 1.120545744895935, "learning_rate": 6.917268995151872e-05, "loss": 0.5613, "step": 80 }, { "epoch": 0.45, "grad_norm": 12.049583435058594, "learning_rate": 6.701104339053376e-05, "loss": 0.5383, "step": 90 }, { "epoch": 0.5, "grad_norm": 1.9796591997146606, "learning_rate": 6.48493968295488e-05, "loss": 0.5167, "step": 100 }, { "epoch": 0.55, "grad_norm": 1.916353464126587, "learning_rate": 6.268775026856383e-05, "loss": 0.5366, "step": 110 }, { "epoch": 0.6, "grad_norm": 3.923786163330078, "learning_rate": 6.052610370757888e-05, "loss": 0.4549, "step": 120 }, { "epoch": 0.65, "grad_norm": 7.377273082733154, "learning_rate": 5.836445714659392e-05, "loss": 0.3999, "step": 130 }, { "epoch": 0.7, "grad_norm": 8.601259231567383, "learning_rate": 5.6202810585608965e-05, "loss": 0.2955, "step": 140 }, { "epoch": 0.75, "grad_norm": 21.193166732788086, "learning_rate": 5.4041164024624e-05, "loss": 0.4126, "step": 150 }, { "epoch": 0.8, "grad_norm": 13.54879379272461, "learning_rate": 5.187951746363904e-05, "loss": 0.3501, "step": 160 }, { "epoch": 0.85, "grad_norm": 2.4327757358551025, "learning_rate": 4.9717870902654076e-05, "loss": 0.4196, "step": 170 }, { "epoch": 0.9, "grad_norm": 4.049587249755859, "learning_rate": 4.7556224341669126e-05, "loss": 0.2942, "step": 180 }, { "epoch": 0.95, "grad_norm": 14.283734321594238, "learning_rate": 4.539457778068416e-05, "loss": 0.4683, "step": 190 }, { "epoch": 1.0, "grad_norm": 2.0148279666900635, "learning_rate": 4.32329312196992e-05, "loss": 0.4988, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.705, "eval_f1": 0.6685393258426966, "eval_loss": 0.5600033402442932, "eval_precision": 0.5107296137339056, "eval_recall": 0.967479674796748, "eval_runtime": 1.5406, "eval_samples_per_second": 259.637, "eval_steps_per_second": 16.227, "step": 200 }, { "epoch": 1.05, "grad_norm": 13.742440223693848, "learning_rate": 4.107128465871424e-05, "loss": 0.454, "step": 210 }, { "epoch": 1.1, "grad_norm": 5.356462001800537, "learning_rate": 3.890963809772928e-05, "loss": 0.3232, "step": 220 }, { "epoch": 1.15, "grad_norm": 8.677199363708496, "learning_rate": 3.674799153674432e-05, "loss": 0.3169, "step": 230 }, { "epoch": 1.2, "grad_norm": 4.404296875, "learning_rate": 3.458634497575936e-05, "loss": 0.264, "step": 240 }, { "epoch": 1.25, "grad_norm": 2.6422922611236572, "learning_rate": 3.24246984147744e-05, "loss": 0.1895, "step": 250 }, { "epoch": 1.3, "grad_norm": 1.4365283250808716, "learning_rate": 3.026305185378944e-05, "loss": 0.3452, "step": 260 }, { "epoch": 1.35, "grad_norm": 5.857382297515869, "learning_rate": 2.8101405292804482e-05, "loss": 0.2193, "step": 270 }, { "epoch": 1.4, "grad_norm": 4.946323394775391, "learning_rate": 2.593975873181952e-05, "loss": 0.6003, "step": 280 }, { "epoch": 1.45, "grad_norm": 3.101881504058838, "learning_rate": 2.3778112170834563e-05, "loss": 0.3291, "step": 290 }, { "epoch": 1.5, "grad_norm": 11.071148872375488, "learning_rate": 2.16164656098496e-05, "loss": 0.3606, "step": 300 }, { "epoch": 1.55, "grad_norm": 3.46047306060791, "learning_rate": 1.945481904886464e-05, "loss": 0.2745, "step": 310 }, { "epoch": 1.6, "grad_norm": 7.391355991363525, "learning_rate": 1.729317248787968e-05, "loss": 0.5095, "step": 320 }, { "epoch": 1.65, "grad_norm": 0.8834757208824158, "learning_rate": 1.513152592689472e-05, "loss": 0.247, "step": 330 }, { "epoch": 1.7, "grad_norm": 0.27169790863990784, "learning_rate": 1.296987936590976e-05, "loss": 0.4244, "step": 340 }, { "epoch": 1.75, "grad_norm": 1.2865382432937622, "learning_rate": 1.08082328049248e-05, "loss": 0.4902, "step": 350 }, { "epoch": 1.8, "grad_norm": 3.9220056533813477, "learning_rate": 8.64658624393984e-06, "loss": 0.3548, "step": 360 }, { "epoch": 1.85, "grad_norm": 2.0294697284698486, "learning_rate": 6.48493968295488e-06, "loss": 0.3243, "step": 370 }, { "epoch": 1.9, "grad_norm": 9.069091796875, "learning_rate": 4.32329312196992e-06, "loss": 0.3318, "step": 380 }, { "epoch": 1.95, "grad_norm": 3.9282755851745605, "learning_rate": 2.16164656098496e-06, "loss": 0.221, "step": 390 }, { "epoch": 2.0, "grad_norm": 1.1504770517349243, "learning_rate": 0.0, "loss": 0.365, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.8475, "eval_f1": 0.7644787644787645, "eval_loss": 0.35866764187812805, "eval_precision": 0.7279411764705882, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5614, "eval_samples_per_second": 256.176, "eval_steps_per_second": 16.011, "step": 400 } ], "logging_steps": 10, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 423630740901888.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 8.64658624393984e-05, "metric": "eval/loss", "num_train_epochs": 2, "per_device_train_batch_size": 8, "seed": 7 } }