|
{ |
|
"best_metric": 0.8032786885245902, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-7dukmcwd/checkpoint-400", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.6303067207336426, |
|
"learning_rate": 3.5703870009677385e-05, |
|
"loss": 0.6604, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.9712932109832764, |
|
"learning_rate": 3.47883861632754e-05, |
|
"loss": 0.5592, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 3.1559805870056152, |
|
"learning_rate": 3.387290231687342e-05, |
|
"loss": 0.4817, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.0116548538208, |
|
"learning_rate": 3.295741847047143e-05, |
|
"loss": 0.4318, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 19.13960075378418, |
|
"learning_rate": 3.204193462406945e-05, |
|
"loss": 0.5086, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 13.061105728149414, |
|
"learning_rate": 3.1126450777667465e-05, |
|
"loss": 0.527, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 12.58017635345459, |
|
"learning_rate": 3.0210966931265478e-05, |
|
"loss": 0.4828, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 5.202507495880127, |
|
"learning_rate": 2.9295483084863497e-05, |
|
"loss": 0.4613, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 5.813719749450684, |
|
"learning_rate": 2.8379999238461513e-05, |
|
"loss": 0.4274, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 8.482573509216309, |
|
"learning_rate": 2.746451539205953e-05, |
|
"loss": 0.4159, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 9.623395919799805, |
|
"learning_rate": 2.654903154565754e-05, |
|
"loss": 0.3306, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.012599468231201, |
|
"learning_rate": 2.5633547699255557e-05, |
|
"loss": 0.403, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.260857343673706, |
|
"learning_rate": 2.4718063852853577e-05, |
|
"loss": 0.3636, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 18.5455379486084, |
|
"learning_rate": 2.3802580006451593e-05, |
|
"loss": 0.3621, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 3.035172700881958, |
|
"learning_rate": 2.2887096160049605e-05, |
|
"loss": 0.376, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 6.068894386291504, |
|
"learning_rate": 2.197161231364762e-05, |
|
"loss": 0.3452, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.0021464824676514, |
|
"learning_rate": 2.1056128467245637e-05, |
|
"loss": 0.3577, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.3914709091186523, |
|
"learning_rate": 2.0140644620843656e-05, |
|
"loss": 0.4271, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 8.317371368408203, |
|
"learning_rate": 1.922516077444167e-05, |
|
"loss": 0.289, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.967580258846283, |
|
"learning_rate": 1.8309676928039685e-05, |
|
"loss": 0.3738, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.87, |
|
"eval_f1": 0.7657657657657657, |
|
"eval_loss": 0.32556435465812683, |
|
"eval_precision": 0.8585858585858586, |
|
"eval_recall": 0.6910569105691057, |
|
"eval_runtime": 1.5112, |
|
"eval_samples_per_second": 264.697, |
|
"eval_steps_per_second": 16.544, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 4.126100540161133, |
|
"learning_rate": 1.73941930816377e-05, |
|
"loss": 0.2486, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 5.108118057250977, |
|
"learning_rate": 1.6478709235235717e-05, |
|
"loss": 0.3034, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 11.375035285949707, |
|
"learning_rate": 1.5563225388833733e-05, |
|
"loss": 0.1486, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 8.199675559997559, |
|
"learning_rate": 1.4647741542431748e-05, |
|
"loss": 0.327, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 4.900712013244629, |
|
"learning_rate": 1.3732257696029764e-05, |
|
"loss": 0.2753, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 0.31448882818222046, |
|
"learning_rate": 1.2816773849627779e-05, |
|
"loss": 0.2028, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 0.3391319513320923, |
|
"learning_rate": 1.1901290003225796e-05, |
|
"loss": 0.2992, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 20.60084342956543, |
|
"learning_rate": 1.098580615682381e-05, |
|
"loss": 0.4703, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 7.974413871765137, |
|
"learning_rate": 1.0070322310421828e-05, |
|
"loss": 0.2649, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 11.488137245178223, |
|
"learning_rate": 9.154838464019842e-06, |
|
"loss": 0.438, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.5850751399993896, |
|
"learning_rate": 8.239354617617858e-06, |
|
"loss": 0.1704, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 3.258329391479492, |
|
"learning_rate": 7.323870771215874e-06, |
|
"loss": 0.226, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 6.117366790771484, |
|
"learning_rate": 6.408386924813889e-06, |
|
"loss": 0.2219, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 28.112499237060547, |
|
"learning_rate": 5.492903078411905e-06, |
|
"loss": 0.2595, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 15.969998359680176, |
|
"learning_rate": 4.577419232009921e-06, |
|
"loss": 0.3709, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 0.6372332572937012, |
|
"learning_rate": 3.661935385607937e-06, |
|
"loss": 0.2678, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 0.20131894946098328, |
|
"learning_rate": 2.7464515392059526e-06, |
|
"loss": 0.4027, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 12.212553024291992, |
|
"learning_rate": 1.8309676928039686e-06, |
|
"loss": 0.3156, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.19253325462341309, |
|
"learning_rate": 9.154838464019843e-07, |
|
"loss": 0.1869, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.685490608215332, |
|
"learning_rate": 0.0, |
|
"loss": 0.1768, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.88, |
|
"eval_f1": 0.8032786885245902, |
|
"eval_loss": 0.3457476794719696, |
|
"eval_precision": 0.8099173553719008, |
|
"eval_recall": 0.7967479674796748, |
|
"eval_runtime": 1.5723, |
|
"eval_samples_per_second": 254.401, |
|
"eval_steps_per_second": 15.9, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 423630740901888.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 3.661935385607937e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 2, |
|
"per_device_train_batch_size": 8, |
|
"seed": 36 |
|
} |
|
} |
|
|