{ "best_metric": 0.7722007722007722, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-hq22ls8k/checkpoint-400", "epoch": 4.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 2.1560730934143066, "learning_rate": 9.426932053278163e-05, "loss": 0.595, "step": 10 }, { "epoch": 0.2, "grad_norm": 5.519632339477539, "learning_rate": 9.267153543900568e-05, "loss": 0.5409, "step": 20 }, { "epoch": 0.3, "grad_norm": 4.012811183929443, "learning_rate": 9.107375034522972e-05, "loss": 0.4944, "step": 30 }, { "epoch": 0.4, "grad_norm": 9.410440444946289, "learning_rate": 8.947596525145376e-05, "loss": 0.416, "step": 40 }, { "epoch": 0.5, "grad_norm": 2.913337230682373, "learning_rate": 8.78781801576778e-05, "loss": 0.4236, "step": 50 }, { "epoch": 0.6, "grad_norm": 1.957322359085083, "learning_rate": 8.628039506390185e-05, "loss": 0.4094, "step": 60 }, { "epoch": 0.7, "grad_norm": 2.4026949405670166, "learning_rate": 8.468260997012588e-05, "loss": 0.331, "step": 70 }, { "epoch": 0.8, "grad_norm": 1.6281132698059082, "learning_rate": 8.308482487634992e-05, "loss": 0.3211, "step": 80 }, { "epoch": 0.9, "grad_norm": 2.870759963989258, "learning_rate": 8.148703978257395e-05, "loss": 0.2933, "step": 90 }, { "epoch": 1.0, "grad_norm": 1.9348390102386475, "learning_rate": 7.9889254688798e-05, "loss": 0.4921, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.8375, "eval_f1": 0.7670250896057348, "eval_loss": 0.358794629573822, "eval_precision": 0.6858974358974359, "eval_recall": 0.8699186991869918, "eval_runtime": 1.5446, "eval_samples_per_second": 258.973, "eval_steps_per_second": 16.186, "step": 100 }, { "epoch": 1.1, "grad_norm": 2.657993793487549, "learning_rate": 7.829146959502203e-05, "loss": 0.3661, "step": 110 }, { "epoch": 1.2, "grad_norm": 9.199877738952637, "learning_rate": 7.669368450124608e-05, "loss": 0.3209, "step": 120 }, { "epoch": 1.3, "grad_norm": 6.665186882019043, "learning_rate": 7.509589940747012e-05, "loss": 0.3953, "step": 130 }, { "epoch": 1.4, "grad_norm": 5.048425197601318, "learning_rate": 7.349811431369416e-05, "loss": 0.2938, "step": 140 }, { "epoch": 1.5, "grad_norm": 3.8985488414764404, "learning_rate": 7.19003292199182e-05, "loss": 0.2387, "step": 150 }, { "epoch": 1.6, "grad_norm": 1.6217758655548096, "learning_rate": 7.030254412614223e-05, "loss": 0.2279, "step": 160 }, { "epoch": 1.7, "grad_norm": 13.243719100952148, "learning_rate": 6.870475903236628e-05, "loss": 0.243, "step": 170 }, { "epoch": 1.8, "grad_norm": 1.4423586130142212, "learning_rate": 6.710697393859032e-05, "loss": 0.3166, "step": 180 }, { "epoch": 1.9, "grad_norm": 3.1215100288391113, "learning_rate": 6.550918884481436e-05, "loss": 0.3601, "step": 190 }, { "epoch": 2.0, "grad_norm": 3.1280789375305176, "learning_rate": 6.39114037510384e-05, "loss": 0.2303, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.87, "eval_f1": 0.7699115044247787, "eval_loss": 0.30909162759780884, "eval_precision": 0.8446601941747572, "eval_recall": 0.7073170731707317, "eval_runtime": 1.5339, "eval_samples_per_second": 260.775, "eval_steps_per_second": 16.298, "step": 200 }, { "epoch": 2.1, "grad_norm": 2.1738266944885254, "learning_rate": 6.231361865726245e-05, "loss": 0.1502, "step": 210 }, { "epoch": 2.2, "grad_norm": 25.343351364135742, "learning_rate": 6.0715833563486475e-05, "loss": 0.1578, "step": 220 }, { "epoch": 2.3, "grad_norm": 3.0342938899993896, "learning_rate": 5.9118048469710523e-05, "loss": 0.1892, "step": 230 }, { "epoch": 2.4, "grad_norm": 15.658936500549316, "learning_rate": 5.752026337593456e-05, "loss": 0.2544, "step": 240 }, { "epoch": 2.5, "grad_norm": 15.843140602111816, "learning_rate": 5.5922478282158606e-05, "loss": 0.2167, "step": 250 }, { "epoch": 2.6, "grad_norm": 5.072413444519043, "learning_rate": 5.432469318838264e-05, "loss": 0.2226, "step": 260 }, { "epoch": 2.7, "grad_norm": 5.797332763671875, "learning_rate": 5.272690809460668e-05, "loss": 0.1347, "step": 270 }, { "epoch": 2.8, "grad_norm": 9.168190956115723, "learning_rate": 5.112912300083072e-05, "loss": 0.1976, "step": 280 }, { "epoch": 2.9, "grad_norm": 5.497272968292236, "learning_rate": 4.9531337907054765e-05, "loss": 0.2177, "step": 290 }, { "epoch": 3.0, "grad_norm": 3.3119521141052246, "learning_rate": 4.79335528132788e-05, "loss": 0.1684, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.845, "eval_f1": 0.743801652892562, "eval_loss": 0.47961556911468506, "eval_precision": 0.7563025210084033, "eval_recall": 0.7317073170731707, "eval_runtime": 1.5168, "eval_samples_per_second": 263.704, "eval_steps_per_second": 16.482, "step": 300 }, { "epoch": 3.1, "grad_norm": 1.9762777090072632, "learning_rate": 4.633576771950284e-05, "loss": 0.1612, "step": 310 }, { "epoch": 3.2, "grad_norm": 1.3597666025161743, "learning_rate": 4.473798262572688e-05, "loss": 0.0888, "step": 320 }, { "epoch": 3.3, "grad_norm": 4.3360419273376465, "learning_rate": 4.3140197531950924e-05, "loss": 0.1549, "step": 330 }, { "epoch": 3.4, "grad_norm": 0.6792251467704773, "learning_rate": 4.154241243817496e-05, "loss": 0.0805, "step": 340 }, { "epoch": 3.5, "grad_norm": 4.299612998962402, "learning_rate": 3.9944627344399e-05, "loss": 0.1412, "step": 350 }, { "epoch": 3.6, "grad_norm": 1.4792366027832031, "learning_rate": 3.834684225062304e-05, "loss": 0.0666, "step": 360 }, { "epoch": 3.7, "grad_norm": 0.11450085788965225, "learning_rate": 3.674905715684708e-05, "loss": 0.1294, "step": 370 }, { "epoch": 3.8, "grad_norm": 0.6610074639320374, "learning_rate": 3.515127206307112e-05, "loss": 0.0168, "step": 380 }, { "epoch": 3.9, "grad_norm": 0.08138225972652435, "learning_rate": 3.355348696929516e-05, "loss": 0.1562, "step": 390 }, { "epoch": 4.0, "grad_norm": 8.49350643157959, "learning_rate": 3.19557018755192e-05, "loss": 0.0895, "step": 400 }, { "epoch": 4.0, "eval_accuracy": 0.8525, "eval_f1": 0.7722007722007722, "eval_loss": 0.6465945243835449, "eval_precision": 0.7352941176470589, "eval_recall": 0.8130081300813008, "eval_runtime": 1.5173, "eval_samples_per_second": 263.626, "eval_steps_per_second": 16.477, "step": 400 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 847261481803776.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 9.58671056265576e-05, "metric": "eval/loss", "num_train_epochs": 6, "per_device_train_batch_size": 16, "seed": 14 } }