{ "best_metric": 0.791124713083397, "best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-2/checkpoint-492", "epoch": 3.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06097560975609756, "grad_norm": 3.2577075958251953, "learning_rate": 1.7097812500543722e-06, "loss": 0.7201, "step": 10 }, { "epoch": 0.12195121951219512, "grad_norm": 1.7486741542816162, "learning_rate": 1.6743086100117504e-06, "loss": 0.6884, "step": 20 }, { "epoch": 0.18292682926829268, "grad_norm": 2.3384933471679688, "learning_rate": 1.6388359699691285e-06, "loss": 0.6482, "step": 30 }, { "epoch": 0.24390243902439024, "grad_norm": 2.0673463344573975, "learning_rate": 1.6033633299265067e-06, "loss": 0.6254, "step": 40 }, { "epoch": 0.3048780487804878, "grad_norm": 1.9012755155563354, "learning_rate": 1.5678906898838849e-06, "loss": 0.6049, "step": 50 }, { "epoch": 0.36585365853658536, "grad_norm": 1.1691405773162842, "learning_rate": 1.532418049841263e-06, "loss": 0.5601, "step": 60 }, { "epoch": 0.4268292682926829, "grad_norm": 2.259779691696167, "learning_rate": 1.4969454097986412e-06, "loss": 0.5567, "step": 70 }, { "epoch": 0.4878048780487805, "grad_norm": 1.7687610387802124, "learning_rate": 1.4614727697560194e-06, "loss": 0.5499, "step": 80 }, { "epoch": 0.5487804878048781, "grad_norm": 1.2938015460968018, "learning_rate": 1.4260001297133975e-06, "loss": 0.5247, "step": 90 }, { "epoch": 0.6097560975609756, "grad_norm": 1.534727692604065, "learning_rate": 1.3905274896707757e-06, "loss": 0.5265, "step": 100 }, { "epoch": 0.6707317073170732, "grad_norm": 1.6018732786178589, "learning_rate": 1.3550548496281539e-06, "loss": 0.4982, "step": 110 }, { "epoch": 0.7317073170731707, "grad_norm": 1.9590264558792114, "learning_rate": 1.319582209585532e-06, "loss": 0.4593, "step": 120 }, { "epoch": 0.7926829268292683, "grad_norm": 1.6383683681488037, "learning_rate": 1.2841095695429102e-06, "loss": 0.5132, "step": 130 }, { "epoch": 0.8536585365853658, "grad_norm": 1.6192731857299805, "learning_rate": 1.2486369295002884e-06, "loss": 0.4783, "step": 140 }, { "epoch": 0.9146341463414634, "grad_norm": 1.8902620077133179, "learning_rate": 1.2131642894576666e-06, "loss": 0.4747, "step": 150 }, { "epoch": 0.975609756097561, "grad_norm": 2.1500866413116455, "learning_rate": 1.1776916494150447e-06, "loss": 0.5078, "step": 160 }, { "epoch": 1.0, "eval_accuracy": 0.7697016067329763, "eval_f1": 0.0960960960960961, "eval_loss": 0.4581395089626312, "eval_precision": 0.7619047619047619, "eval_recall": 0.05128205128205128, "eval_runtime": 4.8871, "eval_samples_per_second": 267.439, "eval_steps_per_second": 16.779, "step": 164 }, { "epoch": 1.0365853658536586, "grad_norm": 2.7179384231567383, "learning_rate": 1.1422190093724229e-06, "loss": 0.4381, "step": 170 }, { "epoch": 1.0975609756097562, "grad_norm": 1.8275049924850464, "learning_rate": 1.106746369329801e-06, "loss": 0.446, "step": 180 }, { "epoch": 1.1585365853658536, "grad_norm": 1.6741889715194702, "learning_rate": 1.0712737292871792e-06, "loss": 0.4451, "step": 190 }, { "epoch": 1.2195121951219512, "grad_norm": 1.8023988008499146, "learning_rate": 1.0358010892445574e-06, "loss": 0.4538, "step": 200 }, { "epoch": 1.2804878048780488, "grad_norm": 2.3258135318756104, "learning_rate": 1.0003284492019356e-06, "loss": 0.4531, "step": 210 }, { "epoch": 1.3414634146341464, "grad_norm": 2.654583215713501, "learning_rate": 9.648558091593137e-07, "loss": 0.426, "step": 220 }, { "epoch": 1.4024390243902438, "grad_norm": 2.7538747787475586, "learning_rate": 9.293831691166919e-07, "loss": 0.4056, "step": 230 }, { "epoch": 1.4634146341463414, "grad_norm": 1.887471079826355, "learning_rate": 8.939105290740702e-07, "loss": 0.448, "step": 240 }, { "epoch": 1.524390243902439, "grad_norm": 1.7449885606765747, "learning_rate": 8.584378890314483e-07, "loss": 0.4132, "step": 250 }, { "epoch": 1.5853658536585367, "grad_norm": 1.4998096227645874, "learning_rate": 8.229652489888265e-07, "loss": 0.4164, "step": 260 }, { "epoch": 1.6463414634146343, "grad_norm": 4.520744800567627, "learning_rate": 7.874926089462047e-07, "loss": 0.4662, "step": 270 }, { "epoch": 1.7073170731707317, "grad_norm": 1.9681720733642578, "learning_rate": 7.520199689035828e-07, "loss": 0.3977, "step": 280 }, { "epoch": 1.7682926829268293, "grad_norm": 3.49233341217041, "learning_rate": 7.16547328860961e-07, "loss": 0.4133, "step": 290 }, { "epoch": 1.8292682926829267, "grad_norm": 2.177455425262451, "learning_rate": 6.810746888183392e-07, "loss": 0.4403, "step": 300 }, { "epoch": 1.8902439024390243, "grad_norm": 3.338192939758301, "learning_rate": 6.456020487757174e-07, "loss": 0.4154, "step": 310 }, { "epoch": 1.951219512195122, "grad_norm": 2.005662441253662, "learning_rate": 6.101294087330955e-07, "loss": 0.4137, "step": 320 }, { "epoch": 2.0, "eval_accuracy": 0.7888293802601377, "eval_f1": 0.31, "eval_loss": 0.399868905544281, "eval_precision": 0.7045454545454546, "eval_recall": 0.1987179487179487, "eval_runtime": 4.9013, "eval_samples_per_second": 266.666, "eval_steps_per_second": 16.73, "step": 328 }, { "epoch": 2.0121951219512195, "grad_norm": 1.8187142610549927, "learning_rate": 5.746567686904737e-07, "loss": 0.4231, "step": 330 }, { "epoch": 2.073170731707317, "grad_norm": 2.4677720069885254, "learning_rate": 5.391841286478519e-07, "loss": 0.3832, "step": 340 }, { "epoch": 2.1341463414634148, "grad_norm": 2.2957379817962646, "learning_rate": 5.0371148860523e-07, "loss": 0.3671, "step": 350 }, { "epoch": 2.1951219512195124, "grad_norm": 1.6115466356277466, "learning_rate": 4.682388485626082e-07, "loss": 0.3776, "step": 360 }, { "epoch": 2.2560975609756095, "grad_norm": 2.2633934020996094, "learning_rate": 4.327662085199863e-07, "loss": 0.4199, "step": 370 }, { "epoch": 2.317073170731707, "grad_norm": 3.485372304916382, "learning_rate": 3.972935684773645e-07, "loss": 0.3784, "step": 380 }, { "epoch": 2.3780487804878048, "grad_norm": 1.9504722356796265, "learning_rate": 3.6182092843474265e-07, "loss": 0.3538, "step": 390 }, { "epoch": 2.4390243902439024, "grad_norm": 2.1430771350860596, "learning_rate": 3.263482883921208e-07, "loss": 0.4277, "step": 400 }, { "epoch": 2.5, "grad_norm": 1.9325039386749268, "learning_rate": 2.90875648349499e-07, "loss": 0.4083, "step": 410 }, { "epoch": 2.5609756097560976, "grad_norm": 1.617710828781128, "learning_rate": 2.5540300830687715e-07, "loss": 0.4297, "step": 420 }, { "epoch": 2.6219512195121952, "grad_norm": 3.280532121658325, "learning_rate": 2.1993036826425532e-07, "loss": 0.4056, "step": 430 }, { "epoch": 2.682926829268293, "grad_norm": 2.3135297298431396, "learning_rate": 1.8445772822163352e-07, "loss": 0.438, "step": 440 }, { "epoch": 2.7439024390243905, "grad_norm": 2.195956230163574, "learning_rate": 1.489850881790117e-07, "loss": 0.4187, "step": 450 }, { "epoch": 2.8048780487804876, "grad_norm": 2.371246099472046, "learning_rate": 1.1351244813638987e-07, "loss": 0.3459, "step": 460 }, { "epoch": 2.8658536585365852, "grad_norm": 3.243722915649414, "learning_rate": 7.803980809376802e-08, "loss": 0.3754, "step": 470 }, { "epoch": 2.926829268292683, "grad_norm": 2.472179412841797, "learning_rate": 4.25671680511462e-08, "loss": 0.4097, "step": 480 }, { "epoch": 2.9878048780487805, "grad_norm": 2.2248072624206543, "learning_rate": 7.094528008524367e-09, "loss": 0.3784, "step": 490 }, { "epoch": 3.0, "eval_accuracy": 0.791124713083397, "eval_f1": 0.3357664233576642, "eval_loss": 0.386940062046051, "eval_precision": 0.696969696969697, "eval_recall": 0.22115384615384615, "eval_runtime": 4.9814, "eval_samples_per_second": 262.374, "eval_steps_per_second": 16.461, "step": 492 } ], "logging_steps": 10, "max_steps": 492, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2071790114979840.0, "train_batch_size": 32, "trial_name": null, "trial_params": { "learning_rate": 1.745253890096994e-06, "num_train_epochs": 3, "per_device_train_batch_size": 32, "seed": 32 } }