{ "best_metric": 0.7983870967741935, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-qvrnjpot/checkpoint-800", "epoch": 4.0, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 1.7631841897964478, "learning_rate": 6.160315650515145e-05, "loss": 0.617, "step": 10 }, { "epoch": 0.1, "grad_norm": 2.8386940956115723, "learning_rate": 6.082336971394699e-05, "loss": 0.5112, "step": 20 }, { "epoch": 0.15, "grad_norm": 2.7043747901916504, "learning_rate": 6.004358292274255e-05, "loss": 0.4761, "step": 30 }, { "epoch": 0.2, "grad_norm": 2.6273417472839355, "learning_rate": 5.9263796131538093e-05, "loss": 0.5074, "step": 40 }, { "epoch": 0.25, "grad_norm": 5.05076265335083, "learning_rate": 5.8484009340333646e-05, "loss": 0.3454, "step": 50 }, { "epoch": 0.3, "grad_norm": 6.585201263427734, "learning_rate": 5.77042225491292e-05, "loss": 0.4747, "step": 60 }, { "epoch": 0.35, "grad_norm": 11.389758110046387, "learning_rate": 5.6924435757924745e-05, "loss": 0.5088, "step": 70 }, { "epoch": 0.4, "grad_norm": 17.25305938720703, "learning_rate": 5.6144648966720304e-05, "loss": 0.3702, "step": 80 }, { "epoch": 0.45, "grad_norm": 4.934868335723877, "learning_rate": 5.536486217551585e-05, "loss": 0.2679, "step": 90 }, { "epoch": 0.5, "grad_norm": 8.351325035095215, "learning_rate": 5.45850753843114e-05, "loss": 0.4069, "step": 100 }, { "epoch": 0.55, "grad_norm": 8.08082389831543, "learning_rate": 5.3805288593106956e-05, "loss": 0.5033, "step": 110 }, { "epoch": 0.6, "grad_norm": 1.6448326110839844, "learning_rate": 5.30255018019025e-05, "loss": 0.3679, "step": 120 }, { "epoch": 0.65, "grad_norm": 2.436563014984131, "learning_rate": 5.224571501069806e-05, "loss": 0.506, "step": 130 }, { "epoch": 0.7, "grad_norm": 1.7677550315856934, "learning_rate": 5.146592821949361e-05, "loss": 0.351, "step": 140 }, { "epoch": 0.75, "grad_norm": 5.091623783111572, "learning_rate": 5.068614142828916e-05, "loss": 0.4919, "step": 150 }, { "epoch": 0.8, "grad_norm": 12.155316352844238, "learning_rate": 4.990635463708471e-05, "loss": 0.4279, "step": 160 }, { "epoch": 0.85, "grad_norm": 2.3314337730407715, "learning_rate": 4.912656784588026e-05, "loss": 0.4913, "step": 170 }, { "epoch": 0.9, "grad_norm": 6.5636210441589355, "learning_rate": 4.834678105467582e-05, "loss": 0.3929, "step": 180 }, { "epoch": 0.95, "grad_norm": 2.8381412029266357, "learning_rate": 4.7566994263471364e-05, "loss": 0.2571, "step": 190 }, { "epoch": 1.0, "grad_norm": 0.629071056842804, "learning_rate": 4.678720747226692e-05, "loss": 0.2667, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8525, "eval_f1": 0.7704280155642024, "eval_loss": 0.35932326316833496, "eval_precision": 0.7388059701492538, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5245, "eval_samples_per_second": 262.373, "eval_steps_per_second": 16.398, "step": 200 }, { "epoch": 1.05, "grad_norm": 0.6173528432846069, "learning_rate": 4.600742068106247e-05, "loss": 0.1887, "step": 210 }, { "epoch": 1.1, "grad_norm": 0.25395506620407104, "learning_rate": 4.5227633889858016e-05, "loss": 0.2535, "step": 220 }, { "epoch": 1.15, "grad_norm": 25.887020111083984, "learning_rate": 4.4447847098653575e-05, "loss": 0.1868, "step": 230 }, { "epoch": 1.2, "grad_norm": 0.13451418280601501, "learning_rate": 4.366806030744912e-05, "loss": 0.3065, "step": 240 }, { "epoch": 1.25, "grad_norm": 0.36101168394088745, "learning_rate": 4.2888273516244674e-05, "loss": 0.249, "step": 250 }, { "epoch": 1.3, "grad_norm": 7.472021579742432, "learning_rate": 4.2108486725040227e-05, "loss": 0.2548, "step": 260 }, { "epoch": 1.35, "grad_norm": 8.727078437805176, "learning_rate": 4.132869993383577e-05, "loss": 0.2963, "step": 270 }, { "epoch": 1.4, "grad_norm": 13.003727912902832, "learning_rate": 4.054891314263133e-05, "loss": 0.3822, "step": 280 }, { "epoch": 1.45, "grad_norm": 0.511164665222168, "learning_rate": 3.976912635142688e-05, "loss": 0.2591, "step": 290 }, { "epoch": 1.5, "grad_norm": 0.3373236656188965, "learning_rate": 3.898933956022243e-05, "loss": 0.2428, "step": 300 }, { "epoch": 1.55, "grad_norm": 1.4264609813690186, "learning_rate": 3.8209552769017983e-05, "loss": 0.3269, "step": 310 }, { "epoch": 1.6, "grad_norm": 0.13309431076049805, "learning_rate": 3.742976597781353e-05, "loss": 0.2393, "step": 320 }, { "epoch": 1.65, "grad_norm": 10.90920639038086, "learning_rate": 3.664997918660909e-05, "loss": 0.306, "step": 330 }, { "epoch": 1.7, "grad_norm": 13.362682342529297, "learning_rate": 3.5870192395404635e-05, "loss": 0.3569, "step": 340 }, { "epoch": 1.75, "grad_norm": 4.543549537658691, "learning_rate": 3.509040560420019e-05, "loss": 0.3563, "step": 350 }, { "epoch": 1.8, "grad_norm": 4.552388668060303, "learning_rate": 3.431061881299574e-05, "loss": 0.3519, "step": 360 }, { "epoch": 1.85, "grad_norm": 6.557765960693359, "learning_rate": 3.3530832021791286e-05, "loss": 0.2307, "step": 370 }, { "epoch": 1.9, "grad_norm": 8.15699577331543, "learning_rate": 3.2751045230586846e-05, "loss": 0.2675, "step": 380 }, { "epoch": 1.95, "grad_norm": 0.4172225892543793, "learning_rate": 3.197125843938239e-05, "loss": 0.1495, "step": 390 }, { "epoch": 2.0, "grad_norm": 3.5596890449523926, "learning_rate": 3.1191471648177945e-05, "loss": 0.3743, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.88, "eval_f1": 0.7948717948717948, "eval_loss": 0.3793269395828247, "eval_precision": 0.8378378378378378, "eval_recall": 0.7560975609756098, "eval_runtime": 1.5273, "eval_samples_per_second": 261.897, "eval_steps_per_second": 16.369, "step": 400 }, { "epoch": 2.05, "grad_norm": 0.26775601506233215, "learning_rate": 3.0411684856973494e-05, "loss": 0.1442, "step": 410 }, { "epoch": 2.1, "grad_norm": 0.1751158982515335, "learning_rate": 2.9631898065769047e-05, "loss": 0.1926, "step": 420 }, { "epoch": 2.15, "grad_norm": 0.27150827646255493, "learning_rate": 2.88521112745646e-05, "loss": 0.1173, "step": 430 }, { "epoch": 2.2, "grad_norm": 11.543705940246582, "learning_rate": 2.8072324483360152e-05, "loss": 0.2252, "step": 440 }, { "epoch": 2.25, "grad_norm": 1.7563936710357666, "learning_rate": 2.72925376921557e-05, "loss": 0.2043, "step": 450 }, { "epoch": 2.3, "grad_norm": 0.22426572442054749, "learning_rate": 2.651275090095125e-05, "loss": 0.089, "step": 460 }, { "epoch": 2.35, "grad_norm": 0.09884244948625565, "learning_rate": 2.5732964109746804e-05, "loss": 0.0607, "step": 470 }, { "epoch": 2.4, "grad_norm": 0.5133824348449707, "learning_rate": 2.4953177318542356e-05, "loss": 0.2, "step": 480 }, { "epoch": 2.45, "grad_norm": 1.0894460678100586, "learning_rate": 2.417339052733791e-05, "loss": 0.2041, "step": 490 }, { "epoch": 2.5, "grad_norm": 20.391416549682617, "learning_rate": 2.339360373613346e-05, "loss": 0.3932, "step": 500 }, { "epoch": 2.55, "grad_norm": 1.9286866188049316, "learning_rate": 2.2613816944929008e-05, "loss": 0.0993, "step": 510 }, { "epoch": 2.6, "grad_norm": 0.09835375845432281, "learning_rate": 2.183403015372456e-05, "loss": 0.0856, "step": 520 }, { "epoch": 2.65, "grad_norm": 0.2403247058391571, "learning_rate": 2.1054243362520113e-05, "loss": 0.1441, "step": 530 }, { "epoch": 2.7, "grad_norm": 0.10543715208768845, "learning_rate": 2.0274456571315666e-05, "loss": 0.1147, "step": 540 }, { "epoch": 2.75, "grad_norm": 10.961527824401855, "learning_rate": 1.9494669780111215e-05, "loss": 0.2312, "step": 550 }, { "epoch": 2.8, "grad_norm": 29.004623413085938, "learning_rate": 1.8714882988906765e-05, "loss": 0.2722, "step": 560 }, { "epoch": 2.85, "grad_norm": 0.12045635282993317, "learning_rate": 1.7935096197702317e-05, "loss": 0.336, "step": 570 }, { "epoch": 2.9, "grad_norm": 20.202497482299805, "learning_rate": 1.715530940649787e-05, "loss": 0.0886, "step": 580 }, { "epoch": 2.95, "grad_norm": 5.752193450927734, "learning_rate": 1.6375522615293423e-05, "loss": 0.1171, "step": 590 }, { "epoch": 3.0, "grad_norm": 25.30577278137207, "learning_rate": 1.5595735824088972e-05, "loss": 0.1935, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.88, "eval_f1": 0.7931034482758621, "eval_loss": 0.4657374620437622, "eval_precision": 0.8440366972477065, "eval_recall": 0.7479674796747967, "eval_runtime": 1.5234, "eval_samples_per_second": 262.573, "eval_steps_per_second": 16.411, "step": 600 }, { "epoch": 3.05, "grad_norm": 0.058765921741724014, "learning_rate": 1.4815949032884523e-05, "loss": 0.0065, "step": 610 }, { "epoch": 3.1, "grad_norm": 0.05543431267142296, "learning_rate": 1.4036162241680076e-05, "loss": 0.1243, "step": 620 }, { "epoch": 3.15, "grad_norm": 4.4582014083862305, "learning_rate": 1.3256375450475625e-05, "loss": 0.0124, "step": 630 }, { "epoch": 3.2, "grad_norm": 0.7271829843521118, "learning_rate": 1.2476588659271178e-05, "loss": 0.0167, "step": 640 }, { "epoch": 3.25, "grad_norm": 0.07129786163568497, "learning_rate": 1.169680186806673e-05, "loss": 0.0047, "step": 650 }, { "epoch": 3.3, "grad_norm": 0.040439434349536896, "learning_rate": 1.091701507686228e-05, "loss": 0.0031, "step": 660 }, { "epoch": 3.35, "grad_norm": 0.032109107822179794, "learning_rate": 1.0137228285657833e-05, "loss": 0.0047, "step": 670 }, { "epoch": 3.4, "grad_norm": 0.068085677921772, "learning_rate": 9.357441494453382e-06, "loss": 0.1646, "step": 680 }, { "epoch": 3.45, "grad_norm": 0.031069407239556313, "learning_rate": 8.577654703248935e-06, "loss": 0.267, "step": 690 }, { "epoch": 3.5, "grad_norm": 0.11077981442213058, "learning_rate": 7.797867912044486e-06, "loss": 0.0763, "step": 700 }, { "epoch": 3.55, "grad_norm": 0.0862860158085823, "learning_rate": 7.018081120840038e-06, "loss": 0.0946, "step": 710 }, { "epoch": 3.6, "grad_norm": 0.03846760839223862, "learning_rate": 6.238294329635589e-06, "loss": 0.1012, "step": 720 }, { "epoch": 3.65, "grad_norm": 0.045732282102108, "learning_rate": 5.45850753843114e-06, "loss": 0.0695, "step": 730 }, { "epoch": 3.7, "grad_norm": 0.044342800974845886, "learning_rate": 4.678720747226691e-06, "loss": 0.0064, "step": 740 }, { "epoch": 3.75, "grad_norm": 0.06440162658691406, "learning_rate": 3.898933956022243e-06, "loss": 0.0971, "step": 750 }, { "epoch": 3.8, "grad_norm": 0.04895680025219917, "learning_rate": 3.1191471648177945e-06, "loss": 0.1129, "step": 760 }, { "epoch": 3.85, "grad_norm": 20.34134864807129, "learning_rate": 2.3393603736133456e-06, "loss": 0.1191, "step": 770 }, { "epoch": 3.9, "grad_norm": 0.12530703842639923, "learning_rate": 1.5595735824088973e-06, "loss": 0.0551, "step": 780 }, { "epoch": 3.95, "grad_norm": 0.05208105966448784, "learning_rate": 7.797867912044486e-07, "loss": 0.0498, "step": 790 }, { "epoch": 4.0, "grad_norm": 0.04303634166717529, "learning_rate": 0.0, "loss": 0.0832, "step": 800 }, { "epoch": 4.0, "eval_accuracy": 0.875, "eval_f1": 0.7983870967741935, "eval_loss": 0.5619988441467285, "eval_precision": 0.792, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5731, "eval_samples_per_second": 254.276, "eval_steps_per_second": 15.892, "step": 800 } ], "logging_steps": 10, "max_steps": 800, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 847261481803776.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 6.238294329635589e-05, "metric": "eval/loss", "num_train_epochs": 4, "per_device_train_batch_size": 8, "seed": 31 } }