{ "best_metric": 0.8064516129032258, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-rtpz8b71/checkpoint-800", "epoch": 4.0, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 1.5273020267486572, "learning_rate": 3.379454045530049e-05, "loss": 0.672, "step": 10 }, { "epoch": 0.1, "grad_norm": 7.657852649688721, "learning_rate": 3.336676146219542e-05, "loss": 0.53, "step": 20 }, { "epoch": 0.15, "grad_norm": 5.539841651916504, "learning_rate": 3.293898246909035e-05, "loss": 0.5481, "step": 30 }, { "epoch": 0.2, "grad_norm": 3.097090244293213, "learning_rate": 3.251120347598528e-05, "loss": 0.2841, "step": 40 }, { "epoch": 0.25, "grad_norm": 4.80979061126709, "learning_rate": 3.208342448288021e-05, "loss": 0.6117, "step": 50 }, { "epoch": 0.3, "grad_norm": 7.640440464019775, "learning_rate": 3.1655645489775144e-05, "loss": 0.5832, "step": 60 }, { "epoch": 0.35, "grad_norm": 3.1154446601867676, "learning_rate": 3.1227866496670074e-05, "loss": 0.4628, "step": 70 }, { "epoch": 0.4, "grad_norm": 2.84663724899292, "learning_rate": 3.0800087503565005e-05, "loss": 0.4354, "step": 80 }, { "epoch": 0.45, "grad_norm": 15.556452751159668, "learning_rate": 3.0372308510459932e-05, "loss": 0.3762, "step": 90 }, { "epoch": 0.5, "grad_norm": 5.357182025909424, "learning_rate": 2.9944529517354863e-05, "loss": 0.4135, "step": 100 }, { "epoch": 0.55, "grad_norm": 5.593770980834961, "learning_rate": 2.9516750524249797e-05, "loss": 0.3815, "step": 110 }, { "epoch": 0.6, "grad_norm": 4.29806661605835, "learning_rate": 2.9088971531144725e-05, "loss": 0.4898, "step": 120 }, { "epoch": 0.65, "grad_norm": 0.596214771270752, "learning_rate": 2.8661192538039655e-05, "loss": 0.2377, "step": 130 }, { "epoch": 0.7, "grad_norm": 13.040666580200195, "learning_rate": 2.8233413544934586e-05, "loss": 0.4523, "step": 140 }, { "epoch": 0.75, "grad_norm": 4.686233997344971, "learning_rate": 2.7805634551829517e-05, "loss": 0.2944, "step": 150 }, { "epoch": 0.8, "grad_norm": 2.4252870082855225, "learning_rate": 2.7377855558724448e-05, "loss": 0.4242, "step": 160 }, { "epoch": 0.85, "grad_norm": 5.249920845031738, "learning_rate": 2.695007656561938e-05, "loss": 0.3134, "step": 170 }, { "epoch": 0.9, "grad_norm": 4.831853866577148, "learning_rate": 2.652229757251431e-05, "loss": 0.4533, "step": 180 }, { "epoch": 0.95, "grad_norm": 5.20128059387207, "learning_rate": 2.6094518579409237e-05, "loss": 0.3014, "step": 190 }, { "epoch": 1.0, "grad_norm": 5.756709098815918, "learning_rate": 2.5666739586304167e-05, "loss": 0.4128, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.845, "eval_f1": 0.6804123711340206, "eval_loss": 0.3612925708293915, "eval_precision": 0.9295774647887324, "eval_recall": 0.5365853658536586, "eval_runtime": 1.5276, "eval_samples_per_second": 261.844, "eval_steps_per_second": 16.365, "step": 200 }, { "epoch": 1.05, "grad_norm": 5.490586757659912, "learning_rate": 2.52389605931991e-05, "loss": 0.3643, "step": 210 }, { "epoch": 1.1, "grad_norm": 6.718981742858887, "learning_rate": 2.481118160009403e-05, "loss": 0.2173, "step": 220 }, { "epoch": 1.15, "grad_norm": 0.35438114404678345, "learning_rate": 2.438340260698896e-05, "loss": 0.2357, "step": 230 }, { "epoch": 1.2, "grad_norm": 7.001437664031982, "learning_rate": 2.395562361388389e-05, "loss": 0.2616, "step": 240 }, { "epoch": 1.25, "grad_norm": 6.03153133392334, "learning_rate": 2.352784462077882e-05, "loss": 0.1749, "step": 250 }, { "epoch": 1.3, "grad_norm": 29.80844497680664, "learning_rate": 2.3100065627673752e-05, "loss": 0.3992, "step": 260 }, { "epoch": 1.35, "grad_norm": 3.6485345363616943, "learning_rate": 2.2672286634568683e-05, "loss": 0.1931, "step": 270 }, { "epoch": 1.4, "grad_norm": 0.2925957143306732, "learning_rate": 2.2244507641463614e-05, "loss": 0.3383, "step": 280 }, { "epoch": 1.45, "grad_norm": 11.773463249206543, "learning_rate": 2.181672864835854e-05, "loss": 0.4417, "step": 290 }, { "epoch": 1.5, "grad_norm": 0.6896932125091553, "learning_rate": 2.1388949655253475e-05, "loss": 0.3196, "step": 300 }, { "epoch": 1.55, "grad_norm": 0.8070700168609619, "learning_rate": 2.0961170662148406e-05, "loss": 0.1319, "step": 310 }, { "epoch": 1.6, "grad_norm": 0.24381496012210846, "learning_rate": 2.0533391669043333e-05, "loss": 0.3669, "step": 320 }, { "epoch": 1.65, "grad_norm": 7.7754669189453125, "learning_rate": 2.0105612675938267e-05, "loss": 0.397, "step": 330 }, { "epoch": 1.7, "grad_norm": 3.648085594177246, "learning_rate": 1.9677833682833195e-05, "loss": 0.4326, "step": 340 }, { "epoch": 1.75, "grad_norm": 19.058910369873047, "learning_rate": 1.9250054689728126e-05, "loss": 0.3497, "step": 350 }, { "epoch": 1.8, "grad_norm": 0.9193096160888672, "learning_rate": 1.882227569662306e-05, "loss": 0.3383, "step": 360 }, { "epoch": 1.85, "grad_norm": 5.150022029876709, "learning_rate": 1.8394496703517987e-05, "loss": 0.2809, "step": 370 }, { "epoch": 1.9, "grad_norm": 0.5875343680381775, "learning_rate": 1.7966717710412918e-05, "loss": 0.2199, "step": 380 }, { "epoch": 1.95, "grad_norm": 9.385665893554688, "learning_rate": 1.753893871730785e-05, "loss": 0.4043, "step": 390 }, { "epoch": 2.0, "grad_norm": 2.588594913482666, "learning_rate": 1.711115972420278e-05, "loss": 0.1863, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.865, "eval_f1": 0.7906976744186046, "eval_loss": 0.33189040422439575, "eval_precision": 0.7555555555555555, "eval_recall": 0.8292682926829268, "eval_runtime": 1.5225, "eval_samples_per_second": 262.722, "eval_steps_per_second": 16.42, "step": 400 }, { "epoch": 2.05, "grad_norm": 0.32351887226104736, "learning_rate": 1.668338073109771e-05, "loss": 0.1385, "step": 410 }, { "epoch": 2.1, "grad_norm": 0.6892914175987244, "learning_rate": 1.625560173799264e-05, "loss": 0.3113, "step": 420 }, { "epoch": 2.15, "grad_norm": 0.4579280912876129, "learning_rate": 1.5827822744887572e-05, "loss": 0.1769, "step": 430 }, { "epoch": 2.2, "grad_norm": 9.545488357543945, "learning_rate": 1.5400043751782503e-05, "loss": 0.2754, "step": 440 }, { "epoch": 2.25, "grad_norm": 2.9151611328125, "learning_rate": 1.4972264758677432e-05, "loss": 0.2701, "step": 450 }, { "epoch": 2.3, "grad_norm": 0.5319097638130188, "learning_rate": 1.4544485765572362e-05, "loss": 0.098, "step": 460 }, { "epoch": 2.35, "grad_norm": 0.9106752276420593, "learning_rate": 1.4116706772467293e-05, "loss": 0.0424, "step": 470 }, { "epoch": 2.4, "grad_norm": 4.542623043060303, "learning_rate": 1.3688927779362224e-05, "loss": 0.3205, "step": 480 }, { "epoch": 2.45, "grad_norm": 2.6717588901519775, "learning_rate": 1.3261148786257155e-05, "loss": 0.3126, "step": 490 }, { "epoch": 2.5, "grad_norm": 63.507198333740234, "learning_rate": 1.2833369793152084e-05, "loss": 0.2623, "step": 500 }, { "epoch": 2.55, "grad_norm": 33.924095153808594, "learning_rate": 1.2405590800047015e-05, "loss": 0.1062, "step": 510 }, { "epoch": 2.6, "grad_norm": 8.15170669555664, "learning_rate": 1.1977811806941945e-05, "loss": 0.3471, "step": 520 }, { "epoch": 2.65, "grad_norm": 34.99510192871094, "learning_rate": 1.1550032813836876e-05, "loss": 0.3089, "step": 530 }, { "epoch": 2.7, "grad_norm": 9.204129219055176, "learning_rate": 1.1122253820731807e-05, "loss": 0.2824, "step": 540 }, { "epoch": 2.75, "grad_norm": 7.578718185424805, "learning_rate": 1.0694474827626738e-05, "loss": 0.0619, "step": 550 }, { "epoch": 2.8, "grad_norm": 3.9372365474700928, "learning_rate": 1.0266695834521667e-05, "loss": 0.341, "step": 560 }, { "epoch": 2.85, "grad_norm": 5.813101291656494, "learning_rate": 9.838916841416597e-06, "loss": 0.0413, "step": 570 }, { "epoch": 2.9, "grad_norm": 54.38895034790039, "learning_rate": 9.41113784831153e-06, "loss": 0.2031, "step": 580 }, { "epoch": 2.95, "grad_norm": 42.406883239746094, "learning_rate": 8.983358855206459e-06, "loss": 0.2095, "step": 590 }, { "epoch": 3.0, "grad_norm": 0.2973996102809906, "learning_rate": 8.55557986210139e-06, "loss": 0.0812, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.8725, "eval_f1": 0.7866108786610879, "eval_loss": 0.4548227787017822, "eval_precision": 0.8103448275862069, "eval_recall": 0.7642276422764228, "eval_runtime": 1.5153, "eval_samples_per_second": 263.982, "eval_steps_per_second": 16.499, "step": 600 }, { "epoch": 3.05, "grad_norm": 0.10320937633514404, "learning_rate": 8.12780086899632e-06, "loss": 0.0107, "step": 610 }, { "epoch": 3.1, "grad_norm": 0.14922451972961426, "learning_rate": 7.700021875891251e-06, "loss": 0.0992, "step": 620 }, { "epoch": 3.15, "grad_norm": 58.235931396484375, "learning_rate": 7.272242882786181e-06, "loss": 0.0839, "step": 630 }, { "epoch": 3.2, "grad_norm": 0.5583060383796692, "learning_rate": 6.844463889681112e-06, "loss": 0.188, "step": 640 }, { "epoch": 3.25, "grad_norm": 0.6185976266860962, "learning_rate": 6.416684896576042e-06, "loss": 0.1084, "step": 650 }, { "epoch": 3.3, "grad_norm": 0.10940929502248764, "learning_rate": 5.988905903470973e-06, "loss": 0.0349, "step": 660 }, { "epoch": 3.35, "grad_norm": 1.7489935159683228, "learning_rate": 5.561126910365903e-06, "loss": 0.0542, "step": 670 }, { "epoch": 3.4, "grad_norm": 0.22723452746868134, "learning_rate": 5.133347917260833e-06, "loss": 0.1929, "step": 680 }, { "epoch": 3.45, "grad_norm": 0.11978468298912048, "learning_rate": 4.705568924155765e-06, "loss": 0.0968, "step": 690 }, { "epoch": 3.5, "grad_norm": 0.12295297533273697, "learning_rate": 4.277789931050695e-06, "loss": 0.0746, "step": 700 }, { "epoch": 3.55, "grad_norm": 1.1583808660507202, "learning_rate": 3.850010937945626e-06, "loss": 0.0119, "step": 710 }, { "epoch": 3.6, "grad_norm": 0.05860567465424538, "learning_rate": 3.422231944840556e-06, "loss": 0.1999, "step": 720 }, { "epoch": 3.65, "grad_norm": 0.3195663094520569, "learning_rate": 2.9944529517354863e-06, "loss": 0.1388, "step": 730 }, { "epoch": 3.7, "grad_norm": 0.07110361754894257, "learning_rate": 2.5666739586304167e-06, "loss": 0.2383, "step": 740 }, { "epoch": 3.75, "grad_norm": 0.10332443565130234, "learning_rate": 2.1388949655253474e-06, "loss": 0.1156, "step": 750 }, { "epoch": 3.8, "grad_norm": 0.222213014960289, "learning_rate": 1.711115972420278e-06, "loss": 0.0304, "step": 760 }, { "epoch": 3.85, "grad_norm": 0.5618990659713745, "learning_rate": 1.2833369793152083e-06, "loss": 0.1119, "step": 770 }, { "epoch": 3.9, "grad_norm": 5.86262845993042, "learning_rate": 8.55557986210139e-07, "loss": 0.1783, "step": 780 }, { "epoch": 3.95, "grad_norm": 0.07000931352376938, "learning_rate": 4.277789931050695e-07, "loss": 0.0181, "step": 790 }, { "epoch": 4.0, "grad_norm": 0.0716305747628212, "learning_rate": 0.0, "loss": 0.1302, "step": 800 }, { "epoch": 4.0, "eval_accuracy": 0.88, "eval_f1": 0.8064516129032258, "eval_loss": 0.5255711078643799, "eval_precision": 0.8, "eval_recall": 0.8130081300813008, "eval_runtime": 3.5354, "eval_samples_per_second": 113.14, "eval_steps_per_second": 7.071, "step": 800 } ], "logging_steps": 10, "max_steps": 800, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 847261481803776.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 3.422231944840556e-05, "metric": "eval/loss", "num_train_epochs": 4, "per_device_train_batch_size": 8, "seed": 28 } }