cvapict's picture
End of training
f52379d verified
{
"best_metric": 0.8032786885245902,
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-7dukmcwd/checkpoint-400",
"epoch": 2.0,
"eval_steps": 500,
"global_step": 400,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 2.6303067207336426,
"learning_rate": 3.5703870009677385e-05,
"loss": 0.6604,
"step": 10
},
{
"epoch": 0.1,
"grad_norm": 2.9712932109832764,
"learning_rate": 3.47883861632754e-05,
"loss": 0.5592,
"step": 20
},
{
"epoch": 0.15,
"grad_norm": 3.1559805870056152,
"learning_rate": 3.387290231687342e-05,
"loss": 0.4817,
"step": 30
},
{
"epoch": 0.2,
"grad_norm": 8.0116548538208,
"learning_rate": 3.295741847047143e-05,
"loss": 0.4318,
"step": 40
},
{
"epoch": 0.25,
"grad_norm": 19.13960075378418,
"learning_rate": 3.204193462406945e-05,
"loss": 0.5086,
"step": 50
},
{
"epoch": 0.3,
"grad_norm": 13.061105728149414,
"learning_rate": 3.1126450777667465e-05,
"loss": 0.527,
"step": 60
},
{
"epoch": 0.35,
"grad_norm": 12.58017635345459,
"learning_rate": 3.0210966931265478e-05,
"loss": 0.4828,
"step": 70
},
{
"epoch": 0.4,
"grad_norm": 5.202507495880127,
"learning_rate": 2.9295483084863497e-05,
"loss": 0.4613,
"step": 80
},
{
"epoch": 0.45,
"grad_norm": 5.813719749450684,
"learning_rate": 2.8379999238461513e-05,
"loss": 0.4274,
"step": 90
},
{
"epoch": 0.5,
"grad_norm": 8.482573509216309,
"learning_rate": 2.746451539205953e-05,
"loss": 0.4159,
"step": 100
},
{
"epoch": 0.55,
"grad_norm": 9.623395919799805,
"learning_rate": 2.654903154565754e-05,
"loss": 0.3306,
"step": 110
},
{
"epoch": 0.6,
"grad_norm": 5.012599468231201,
"learning_rate": 2.5633547699255557e-05,
"loss": 0.403,
"step": 120
},
{
"epoch": 0.65,
"grad_norm": 3.260857343673706,
"learning_rate": 2.4718063852853577e-05,
"loss": 0.3636,
"step": 130
},
{
"epoch": 0.7,
"grad_norm": 18.5455379486084,
"learning_rate": 2.3802580006451593e-05,
"loss": 0.3621,
"step": 140
},
{
"epoch": 0.75,
"grad_norm": 3.035172700881958,
"learning_rate": 2.2887096160049605e-05,
"loss": 0.376,
"step": 150
},
{
"epoch": 0.8,
"grad_norm": 6.068894386291504,
"learning_rate": 2.197161231364762e-05,
"loss": 0.3452,
"step": 160
},
{
"epoch": 0.85,
"grad_norm": 3.0021464824676514,
"learning_rate": 2.1056128467245637e-05,
"loss": 0.3577,
"step": 170
},
{
"epoch": 0.9,
"grad_norm": 3.3914709091186523,
"learning_rate": 2.0140644620843656e-05,
"loss": 0.4271,
"step": 180
},
{
"epoch": 0.95,
"grad_norm": 8.317371368408203,
"learning_rate": 1.922516077444167e-05,
"loss": 0.289,
"step": 190
},
{
"epoch": 1.0,
"grad_norm": 0.967580258846283,
"learning_rate": 1.8309676928039685e-05,
"loss": 0.3738,
"step": 200
},
{
"epoch": 1.0,
"eval_accuracy": 0.87,
"eval_f1": 0.7657657657657657,
"eval_loss": 0.32556435465812683,
"eval_precision": 0.8585858585858586,
"eval_recall": 0.6910569105691057,
"eval_runtime": 1.5112,
"eval_samples_per_second": 264.697,
"eval_steps_per_second": 16.544,
"step": 200
},
{
"epoch": 1.05,
"grad_norm": 4.126100540161133,
"learning_rate": 1.73941930816377e-05,
"loss": 0.2486,
"step": 210
},
{
"epoch": 1.1,
"grad_norm": 5.108118057250977,
"learning_rate": 1.6478709235235717e-05,
"loss": 0.3034,
"step": 220
},
{
"epoch": 1.15,
"grad_norm": 11.375035285949707,
"learning_rate": 1.5563225388833733e-05,
"loss": 0.1486,
"step": 230
},
{
"epoch": 1.2,
"grad_norm": 8.199675559997559,
"learning_rate": 1.4647741542431748e-05,
"loss": 0.327,
"step": 240
},
{
"epoch": 1.25,
"grad_norm": 4.900712013244629,
"learning_rate": 1.3732257696029764e-05,
"loss": 0.2753,
"step": 250
},
{
"epoch": 1.3,
"grad_norm": 0.31448882818222046,
"learning_rate": 1.2816773849627779e-05,
"loss": 0.2028,
"step": 260
},
{
"epoch": 1.35,
"grad_norm": 0.3391319513320923,
"learning_rate": 1.1901290003225796e-05,
"loss": 0.2992,
"step": 270
},
{
"epoch": 1.4,
"grad_norm": 20.60084342956543,
"learning_rate": 1.098580615682381e-05,
"loss": 0.4703,
"step": 280
},
{
"epoch": 1.45,
"grad_norm": 7.974413871765137,
"learning_rate": 1.0070322310421828e-05,
"loss": 0.2649,
"step": 290
},
{
"epoch": 1.5,
"grad_norm": 11.488137245178223,
"learning_rate": 9.154838464019842e-06,
"loss": 0.438,
"step": 300
},
{
"epoch": 1.55,
"grad_norm": 0.5850751399993896,
"learning_rate": 8.239354617617858e-06,
"loss": 0.1704,
"step": 310
},
{
"epoch": 1.6,
"grad_norm": 3.258329391479492,
"learning_rate": 7.323870771215874e-06,
"loss": 0.226,
"step": 320
},
{
"epoch": 1.65,
"grad_norm": 6.117366790771484,
"learning_rate": 6.408386924813889e-06,
"loss": 0.2219,
"step": 330
},
{
"epoch": 1.7,
"grad_norm": 28.112499237060547,
"learning_rate": 5.492903078411905e-06,
"loss": 0.2595,
"step": 340
},
{
"epoch": 1.75,
"grad_norm": 15.969998359680176,
"learning_rate": 4.577419232009921e-06,
"loss": 0.3709,
"step": 350
},
{
"epoch": 1.8,
"grad_norm": 0.6372332572937012,
"learning_rate": 3.661935385607937e-06,
"loss": 0.2678,
"step": 360
},
{
"epoch": 1.85,
"grad_norm": 0.20131894946098328,
"learning_rate": 2.7464515392059526e-06,
"loss": 0.4027,
"step": 370
},
{
"epoch": 1.9,
"grad_norm": 12.212553024291992,
"learning_rate": 1.8309676928039686e-06,
"loss": 0.3156,
"step": 380
},
{
"epoch": 1.95,
"grad_norm": 0.19253325462341309,
"learning_rate": 9.154838464019843e-07,
"loss": 0.1869,
"step": 390
},
{
"epoch": 2.0,
"grad_norm": 14.685490608215332,
"learning_rate": 0.0,
"loss": 0.1768,
"step": 400
},
{
"epoch": 2.0,
"eval_accuracy": 0.88,
"eval_f1": 0.8032786885245902,
"eval_loss": 0.3457476794719696,
"eval_precision": 0.8099173553719008,
"eval_recall": 0.7967479674796748,
"eval_runtime": 1.5723,
"eval_samples_per_second": 254.401,
"eval_steps_per_second": 15.9,
"step": 400
}
],
"logging_steps": 10,
"max_steps": 400,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 423630740901888.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": {
"_wandb": {},
"assignments": {},
"learning_rate": 3.661935385607937e-05,
"metric": "eval/loss",
"num_train_epochs": 2,
"per_device_train_batch_size": 8,
"seed": 36
}
}