{ "best_metric": 0.7865168539325843, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-ktqxy3xz/checkpoint-400", "epoch": 1.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025, "grad_norm": 2.0802054405212402, "learning_rate": 4.896382897724454e-05, "loss": 0.6556, "step": 10 }, { "epoch": 0.05, "grad_norm": 3.7415668964385986, "learning_rate": 4.7708346182956214e-05, "loss": 0.4257, "step": 20 }, { "epoch": 0.075, "grad_norm": 6.465210437774658, "learning_rate": 4.64528633886679e-05, "loss": 0.8034, "step": 30 }, { "epoch": 0.1, "grad_norm": 2.8712220191955566, "learning_rate": 4.5197380594379574e-05, "loss": 0.6423, "step": 40 }, { "epoch": 0.125, "grad_norm": 2.292738676071167, "learning_rate": 4.3941897800091254e-05, "loss": 0.6402, "step": 50 }, { "epoch": 0.15, "grad_norm": 3.6097824573516846, "learning_rate": 4.268641500580293e-05, "loss": 0.412, "step": 60 }, { "epoch": 0.175, "grad_norm": 3.220292806625366, "learning_rate": 4.143093221151461e-05, "loss": 0.5088, "step": 70 }, { "epoch": 0.2, "grad_norm": 0.5234849452972412, "learning_rate": 4.0175449417226294e-05, "loss": 0.3435, "step": 80 }, { "epoch": 0.225, "grad_norm": 1.0715090036392212, "learning_rate": 3.891996662293797e-05, "loss": 0.7451, "step": 90 }, { "epoch": 0.25, "grad_norm": 6.492457389831543, "learning_rate": 3.766448382864965e-05, "loss": 0.2199, "step": 100 }, { "epoch": 0.275, "grad_norm": 1.9438965320587158, "learning_rate": 3.640900103436132e-05, "loss": 0.7001, "step": 110 }, { "epoch": 0.3, "grad_norm": 0.4070729613304138, "learning_rate": 3.5153518240073e-05, "loss": 0.4277, "step": 120 }, { "epoch": 0.325, "grad_norm": 0.3407312035560608, "learning_rate": 3.389803544578468e-05, "loss": 0.2982, "step": 130 }, { "epoch": 0.35, "grad_norm": 0.44988295435905457, "learning_rate": 3.264255265149636e-05, "loss": 0.3097, "step": 140 }, { "epoch": 0.375, "grad_norm": 0.9247645735740662, "learning_rate": 3.138706985720804e-05, "loss": 1.0367, "step": 150 }, { "epoch": 0.4, "grad_norm": 2.748664140701294, "learning_rate": 3.0131587062919714e-05, "loss": 0.3645, "step": 160 }, { "epoch": 0.425, "grad_norm": 7.6074910163879395, "learning_rate": 2.8876104268631394e-05, "loss": 0.5364, "step": 170 }, { "epoch": 0.45, "grad_norm": 6.632686138153076, "learning_rate": 2.7620621474343077e-05, "loss": 0.4973, "step": 180 }, { "epoch": 0.475, "grad_norm": 0.3361460268497467, "learning_rate": 2.6365138680054754e-05, "loss": 0.4819, "step": 190 }, { "epoch": 0.5, "grad_norm": 5.405449390411377, "learning_rate": 2.510965588576643e-05, "loss": 0.3588, "step": 200 }, { "epoch": 0.525, "grad_norm": 0.504410445690155, "learning_rate": 2.3854173091478107e-05, "loss": 0.5494, "step": 210 }, { "epoch": 0.55, "grad_norm": 3.5160725116729736, "learning_rate": 2.2598690297189787e-05, "loss": 0.5517, "step": 220 }, { "epoch": 0.575, "grad_norm": 10.914329528808594, "learning_rate": 2.1343207502901464e-05, "loss": 0.4456, "step": 230 }, { "epoch": 0.6, "grad_norm": 13.415484428405762, "learning_rate": 2.0087724708613147e-05, "loss": 0.3435, "step": 240 }, { "epoch": 0.625, "grad_norm": 0.3822052478790283, "learning_rate": 1.8832241914324824e-05, "loss": 0.4514, "step": 250 }, { "epoch": 0.65, "grad_norm": 13.96898365020752, "learning_rate": 1.75767591200365e-05, "loss": 0.5878, "step": 260 }, { "epoch": 0.675, "grad_norm": 14.09548568725586, "learning_rate": 1.632127632574818e-05, "loss": 0.4703, "step": 270 }, { "epoch": 0.7, "grad_norm": 8.53896713256836, "learning_rate": 1.5065793531459857e-05, "loss": 0.3875, "step": 280 }, { "epoch": 0.725, "grad_norm": 7.014000415802002, "learning_rate": 1.3810310737171539e-05, "loss": 0.2587, "step": 290 }, { "epoch": 0.75, "grad_norm": 4.1433258056640625, "learning_rate": 1.2554827942883215e-05, "loss": 0.4717, "step": 300 }, { "epoch": 0.775, "grad_norm": 0.7594526410102844, "learning_rate": 1.1299345148594894e-05, "loss": 0.6532, "step": 310 }, { "epoch": 0.8, "grad_norm": 3.1434805393218994, "learning_rate": 1.0043862354306574e-05, "loss": 0.3508, "step": 320 }, { "epoch": 0.825, "grad_norm": 18.727951049804688, "learning_rate": 8.78837956001825e-06, "loss": 0.3343, "step": 330 }, { "epoch": 0.85, "grad_norm": 4.23914909362793, "learning_rate": 7.5328967657299285e-06, "loss": 0.2194, "step": 340 }, { "epoch": 0.875, "grad_norm": 14.330482482910156, "learning_rate": 6.277413971441608e-06, "loss": 0.4582, "step": 350 }, { "epoch": 0.9, "grad_norm": 0.7658752202987671, "learning_rate": 5.021931177153287e-06, "loss": 0.4202, "step": 360 }, { "epoch": 0.925, "grad_norm": 0.7938013672828674, "learning_rate": 3.7664483828649642e-06, "loss": 0.2567, "step": 370 }, { "epoch": 0.95, "grad_norm": 1.5704269409179688, "learning_rate": 2.5109655885766434e-06, "loss": 0.6044, "step": 380 }, { "epoch": 0.975, "grad_norm": 5.166118621826172, "learning_rate": 1.2554827942883217e-06, "loss": 0.3807, "step": 390 }, { "epoch": 1.0, "grad_norm": 0.3811684250831604, "learning_rate": 0.0, "loss": 0.4158, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.8575, "eval_f1": 0.7865168539325843, "eval_loss": 0.42347240447998047, "eval_precision": 0.7291666666666666, "eval_recall": 0.8536585365853658, "eval_runtime": 1.5735, "eval_samples_per_second": 254.213, "eval_steps_per_second": 15.888, "step": 400 } ], "logging_steps": 10, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 211815370450944.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 5.021931177153286e-05, "metric": "eval/loss", "num_train_epochs": 1, "per_device_train_batch_size": 4, "seed": 26 } }