{ "best_metric": 0.8032786885245902, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-7dukmcwd/checkpoint-400", "epoch": 2.0, "eval_steps": 500, "global_step": 400, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 2.6303067207336426, "learning_rate": 3.5703870009677385e-05, "loss": 0.6604, "step": 10 }, { "epoch": 0.1, "grad_norm": 2.9712932109832764, "learning_rate": 3.47883861632754e-05, "loss": 0.5592, "step": 20 }, { "epoch": 0.15, "grad_norm": 3.1559805870056152, "learning_rate": 3.387290231687342e-05, "loss": 0.4817, "step": 30 }, { "epoch": 0.2, "grad_norm": 8.0116548538208, "learning_rate": 3.295741847047143e-05, "loss": 0.4318, "step": 40 }, { "epoch": 0.25, "grad_norm": 19.13960075378418, "learning_rate": 3.204193462406945e-05, "loss": 0.5086, "step": 50 }, { "epoch": 0.3, "grad_norm": 13.061105728149414, "learning_rate": 3.1126450777667465e-05, "loss": 0.527, "step": 60 }, { "epoch": 0.35, "grad_norm": 12.58017635345459, "learning_rate": 3.0210966931265478e-05, "loss": 0.4828, "step": 70 }, { "epoch": 0.4, "grad_norm": 5.202507495880127, "learning_rate": 2.9295483084863497e-05, "loss": 0.4613, "step": 80 }, { "epoch": 0.45, "grad_norm": 5.813719749450684, "learning_rate": 2.8379999238461513e-05, "loss": 0.4274, "step": 90 }, { "epoch": 0.5, "grad_norm": 8.482573509216309, "learning_rate": 2.746451539205953e-05, "loss": 0.4159, "step": 100 }, { "epoch": 0.55, "grad_norm": 9.623395919799805, "learning_rate": 2.654903154565754e-05, "loss": 0.3306, "step": 110 }, { "epoch": 0.6, "grad_norm": 5.012599468231201, "learning_rate": 2.5633547699255557e-05, "loss": 0.403, "step": 120 }, { "epoch": 0.65, "grad_norm": 3.260857343673706, "learning_rate": 2.4718063852853577e-05, "loss": 0.3636, "step": 130 }, { "epoch": 0.7, "grad_norm": 18.5455379486084, "learning_rate": 2.3802580006451593e-05, "loss": 0.3621, "step": 140 }, { "epoch": 0.75, "grad_norm": 3.035172700881958, "learning_rate": 2.2887096160049605e-05, "loss": 0.376, "step": 150 }, { "epoch": 0.8, "grad_norm": 6.068894386291504, "learning_rate": 2.197161231364762e-05, "loss": 0.3452, "step": 160 }, { "epoch": 0.85, "grad_norm": 3.0021464824676514, "learning_rate": 2.1056128467245637e-05, "loss": 0.3577, "step": 170 }, { "epoch": 0.9, "grad_norm": 3.3914709091186523, "learning_rate": 2.0140644620843656e-05, "loss": 0.4271, "step": 180 }, { "epoch": 0.95, "grad_norm": 8.317371368408203, "learning_rate": 1.922516077444167e-05, "loss": 0.289, "step": 190 }, { "epoch": 1.0, "grad_norm": 0.967580258846283, "learning_rate": 1.8309676928039685e-05, "loss": 0.3738, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.87, "eval_f1": 0.7657657657657657, "eval_loss": 0.32556435465812683, "eval_precision": 0.8585858585858586, "eval_recall": 0.6910569105691057, "eval_runtime": 1.5112, "eval_samples_per_second": 264.697, "eval_steps_per_second": 16.544, "step": 200 }, { "epoch": 1.05, "grad_norm": 4.126100540161133, "learning_rate": 1.73941930816377e-05, "loss": 0.2486, "step": 210 }, { "epoch": 1.1, "grad_norm": 5.108118057250977, "learning_rate": 1.6478709235235717e-05, "loss": 0.3034, "step": 220 }, { "epoch": 1.15, "grad_norm": 11.375035285949707, "learning_rate": 1.5563225388833733e-05, "loss": 0.1486, "step": 230 }, { "epoch": 1.2, "grad_norm": 8.199675559997559, "learning_rate": 1.4647741542431748e-05, "loss": 0.327, "step": 240 }, { "epoch": 1.25, "grad_norm": 4.900712013244629, "learning_rate": 1.3732257696029764e-05, "loss": 0.2753, "step": 250 }, { "epoch": 1.3, "grad_norm": 0.31448882818222046, "learning_rate": 1.2816773849627779e-05, "loss": 0.2028, "step": 260 }, { "epoch": 1.35, "grad_norm": 0.3391319513320923, "learning_rate": 1.1901290003225796e-05, "loss": 0.2992, "step": 270 }, { "epoch": 1.4, "grad_norm": 20.60084342956543, "learning_rate": 1.098580615682381e-05, "loss": 0.4703, "step": 280 }, { "epoch": 1.45, "grad_norm": 7.974413871765137, "learning_rate": 1.0070322310421828e-05, "loss": 0.2649, "step": 290 }, { "epoch": 1.5, "grad_norm": 11.488137245178223, "learning_rate": 9.154838464019842e-06, "loss": 0.438, "step": 300 }, { "epoch": 1.55, "grad_norm": 0.5850751399993896, "learning_rate": 8.239354617617858e-06, "loss": 0.1704, "step": 310 }, { "epoch": 1.6, "grad_norm": 3.258329391479492, "learning_rate": 7.323870771215874e-06, "loss": 0.226, "step": 320 }, { "epoch": 1.65, "grad_norm": 6.117366790771484, "learning_rate": 6.408386924813889e-06, "loss": 0.2219, "step": 330 }, { "epoch": 1.7, "grad_norm": 28.112499237060547, "learning_rate": 5.492903078411905e-06, "loss": 0.2595, "step": 340 }, { "epoch": 1.75, "grad_norm": 15.969998359680176, "learning_rate": 4.577419232009921e-06, "loss": 0.3709, "step": 350 }, { "epoch": 1.8, "grad_norm": 0.6372332572937012, "learning_rate": 3.661935385607937e-06, "loss": 0.2678, "step": 360 }, { "epoch": 1.85, "grad_norm": 0.20131894946098328, "learning_rate": 2.7464515392059526e-06, "loss": 0.4027, "step": 370 }, { "epoch": 1.9, "grad_norm": 12.212553024291992, "learning_rate": 1.8309676928039686e-06, "loss": 0.3156, "step": 380 }, { "epoch": 1.95, "grad_norm": 0.19253325462341309, "learning_rate": 9.154838464019843e-07, "loss": 0.1869, "step": 390 }, { "epoch": 2.0, "grad_norm": 14.685490608215332, "learning_rate": 0.0, "loss": 0.1768, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.88, "eval_f1": 0.8032786885245902, "eval_loss": 0.3457476794719696, "eval_precision": 0.8099173553719008, "eval_recall": 0.7967479674796748, "eval_runtime": 1.5723, "eval_samples_per_second": 254.401, "eval_steps_per_second": 15.9, "step": 400 } ], "logging_steps": 10, "max_steps": 400, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 423630740901888.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 3.661935385607937e-05, "metric": "eval/loss", "num_train_epochs": 2, "per_device_train_batch_size": 8, "seed": 36 } }