cvapict's picture
End of training
f52379d verified
raw
history blame
15.1 kB
{
"best_metric": 0.7983870967741935,
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-qvrnjpot/checkpoint-800",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 800,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.05,
"grad_norm": 1.7631841897964478,
"learning_rate": 6.160315650515145e-05,
"loss": 0.617,
"step": 10
},
{
"epoch": 0.1,
"grad_norm": 2.8386940956115723,
"learning_rate": 6.082336971394699e-05,
"loss": 0.5112,
"step": 20
},
{
"epoch": 0.15,
"grad_norm": 2.7043747901916504,
"learning_rate": 6.004358292274255e-05,
"loss": 0.4761,
"step": 30
},
{
"epoch": 0.2,
"grad_norm": 2.6273417472839355,
"learning_rate": 5.9263796131538093e-05,
"loss": 0.5074,
"step": 40
},
{
"epoch": 0.25,
"grad_norm": 5.05076265335083,
"learning_rate": 5.8484009340333646e-05,
"loss": 0.3454,
"step": 50
},
{
"epoch": 0.3,
"grad_norm": 6.585201263427734,
"learning_rate": 5.77042225491292e-05,
"loss": 0.4747,
"step": 60
},
{
"epoch": 0.35,
"grad_norm": 11.389758110046387,
"learning_rate": 5.6924435757924745e-05,
"loss": 0.5088,
"step": 70
},
{
"epoch": 0.4,
"grad_norm": 17.25305938720703,
"learning_rate": 5.6144648966720304e-05,
"loss": 0.3702,
"step": 80
},
{
"epoch": 0.45,
"grad_norm": 4.934868335723877,
"learning_rate": 5.536486217551585e-05,
"loss": 0.2679,
"step": 90
},
{
"epoch": 0.5,
"grad_norm": 8.351325035095215,
"learning_rate": 5.45850753843114e-05,
"loss": 0.4069,
"step": 100
},
{
"epoch": 0.55,
"grad_norm": 8.08082389831543,
"learning_rate": 5.3805288593106956e-05,
"loss": 0.5033,
"step": 110
},
{
"epoch": 0.6,
"grad_norm": 1.6448326110839844,
"learning_rate": 5.30255018019025e-05,
"loss": 0.3679,
"step": 120
},
{
"epoch": 0.65,
"grad_norm": 2.436563014984131,
"learning_rate": 5.224571501069806e-05,
"loss": 0.506,
"step": 130
},
{
"epoch": 0.7,
"grad_norm": 1.7677550315856934,
"learning_rate": 5.146592821949361e-05,
"loss": 0.351,
"step": 140
},
{
"epoch": 0.75,
"grad_norm": 5.091623783111572,
"learning_rate": 5.068614142828916e-05,
"loss": 0.4919,
"step": 150
},
{
"epoch": 0.8,
"grad_norm": 12.155316352844238,
"learning_rate": 4.990635463708471e-05,
"loss": 0.4279,
"step": 160
},
{
"epoch": 0.85,
"grad_norm": 2.3314337730407715,
"learning_rate": 4.912656784588026e-05,
"loss": 0.4913,
"step": 170
},
{
"epoch": 0.9,
"grad_norm": 6.5636210441589355,
"learning_rate": 4.834678105467582e-05,
"loss": 0.3929,
"step": 180
},
{
"epoch": 0.95,
"grad_norm": 2.8381412029266357,
"learning_rate": 4.7566994263471364e-05,
"loss": 0.2571,
"step": 190
},
{
"epoch": 1.0,
"grad_norm": 0.629071056842804,
"learning_rate": 4.678720747226692e-05,
"loss": 0.2667,
"step": 200
},
{
"epoch": 1.0,
"eval_accuracy": 0.8525,
"eval_f1": 0.7704280155642024,
"eval_loss": 0.35932326316833496,
"eval_precision": 0.7388059701492538,
"eval_recall": 0.8048780487804879,
"eval_runtime": 1.5245,
"eval_samples_per_second": 262.373,
"eval_steps_per_second": 16.398,
"step": 200
},
{
"epoch": 1.05,
"grad_norm": 0.6173528432846069,
"learning_rate": 4.600742068106247e-05,
"loss": 0.1887,
"step": 210
},
{
"epoch": 1.1,
"grad_norm": 0.25395506620407104,
"learning_rate": 4.5227633889858016e-05,
"loss": 0.2535,
"step": 220
},
{
"epoch": 1.15,
"grad_norm": 25.887020111083984,
"learning_rate": 4.4447847098653575e-05,
"loss": 0.1868,
"step": 230
},
{
"epoch": 1.2,
"grad_norm": 0.13451418280601501,
"learning_rate": 4.366806030744912e-05,
"loss": 0.3065,
"step": 240
},
{
"epoch": 1.25,
"grad_norm": 0.36101168394088745,
"learning_rate": 4.2888273516244674e-05,
"loss": 0.249,
"step": 250
},
{
"epoch": 1.3,
"grad_norm": 7.472021579742432,
"learning_rate": 4.2108486725040227e-05,
"loss": 0.2548,
"step": 260
},
{
"epoch": 1.35,
"grad_norm": 8.727078437805176,
"learning_rate": 4.132869993383577e-05,
"loss": 0.2963,
"step": 270
},
{
"epoch": 1.4,
"grad_norm": 13.003727912902832,
"learning_rate": 4.054891314263133e-05,
"loss": 0.3822,
"step": 280
},
{
"epoch": 1.45,
"grad_norm": 0.511164665222168,
"learning_rate": 3.976912635142688e-05,
"loss": 0.2591,
"step": 290
},
{
"epoch": 1.5,
"grad_norm": 0.3373236656188965,
"learning_rate": 3.898933956022243e-05,
"loss": 0.2428,
"step": 300
},
{
"epoch": 1.55,
"grad_norm": 1.4264609813690186,
"learning_rate": 3.8209552769017983e-05,
"loss": 0.3269,
"step": 310
},
{
"epoch": 1.6,
"grad_norm": 0.13309431076049805,
"learning_rate": 3.742976597781353e-05,
"loss": 0.2393,
"step": 320
},
{
"epoch": 1.65,
"grad_norm": 10.90920639038086,
"learning_rate": 3.664997918660909e-05,
"loss": 0.306,
"step": 330
},
{
"epoch": 1.7,
"grad_norm": 13.362682342529297,
"learning_rate": 3.5870192395404635e-05,
"loss": 0.3569,
"step": 340
},
{
"epoch": 1.75,
"grad_norm": 4.543549537658691,
"learning_rate": 3.509040560420019e-05,
"loss": 0.3563,
"step": 350
},
{
"epoch": 1.8,
"grad_norm": 4.552388668060303,
"learning_rate": 3.431061881299574e-05,
"loss": 0.3519,
"step": 360
},
{
"epoch": 1.85,
"grad_norm": 6.557765960693359,
"learning_rate": 3.3530832021791286e-05,
"loss": 0.2307,
"step": 370
},
{
"epoch": 1.9,
"grad_norm": 8.15699577331543,
"learning_rate": 3.2751045230586846e-05,
"loss": 0.2675,
"step": 380
},
{
"epoch": 1.95,
"grad_norm": 0.4172225892543793,
"learning_rate": 3.197125843938239e-05,
"loss": 0.1495,
"step": 390
},
{
"epoch": 2.0,
"grad_norm": 3.5596890449523926,
"learning_rate": 3.1191471648177945e-05,
"loss": 0.3743,
"step": 400
},
{
"epoch": 2.0,
"eval_accuracy": 0.88,
"eval_f1": 0.7948717948717948,
"eval_loss": 0.3793269395828247,
"eval_precision": 0.8378378378378378,
"eval_recall": 0.7560975609756098,
"eval_runtime": 1.5273,
"eval_samples_per_second": 261.897,
"eval_steps_per_second": 16.369,
"step": 400
},
{
"epoch": 2.05,
"grad_norm": 0.26775601506233215,
"learning_rate": 3.0411684856973494e-05,
"loss": 0.1442,
"step": 410
},
{
"epoch": 2.1,
"grad_norm": 0.1751158982515335,
"learning_rate": 2.9631898065769047e-05,
"loss": 0.1926,
"step": 420
},
{
"epoch": 2.15,
"grad_norm": 0.27150827646255493,
"learning_rate": 2.88521112745646e-05,
"loss": 0.1173,
"step": 430
},
{
"epoch": 2.2,
"grad_norm": 11.543705940246582,
"learning_rate": 2.8072324483360152e-05,
"loss": 0.2252,
"step": 440
},
{
"epoch": 2.25,
"grad_norm": 1.7563936710357666,
"learning_rate": 2.72925376921557e-05,
"loss": 0.2043,
"step": 450
},
{
"epoch": 2.3,
"grad_norm": 0.22426572442054749,
"learning_rate": 2.651275090095125e-05,
"loss": 0.089,
"step": 460
},
{
"epoch": 2.35,
"grad_norm": 0.09884244948625565,
"learning_rate": 2.5732964109746804e-05,
"loss": 0.0607,
"step": 470
},
{
"epoch": 2.4,
"grad_norm": 0.5133824348449707,
"learning_rate": 2.4953177318542356e-05,
"loss": 0.2,
"step": 480
},
{
"epoch": 2.45,
"grad_norm": 1.0894460678100586,
"learning_rate": 2.417339052733791e-05,
"loss": 0.2041,
"step": 490
},
{
"epoch": 2.5,
"grad_norm": 20.391416549682617,
"learning_rate": 2.339360373613346e-05,
"loss": 0.3932,
"step": 500
},
{
"epoch": 2.55,
"grad_norm": 1.9286866188049316,
"learning_rate": 2.2613816944929008e-05,
"loss": 0.0993,
"step": 510
},
{
"epoch": 2.6,
"grad_norm": 0.09835375845432281,
"learning_rate": 2.183403015372456e-05,
"loss": 0.0856,
"step": 520
},
{
"epoch": 2.65,
"grad_norm": 0.2403247058391571,
"learning_rate": 2.1054243362520113e-05,
"loss": 0.1441,
"step": 530
},
{
"epoch": 2.7,
"grad_norm": 0.10543715208768845,
"learning_rate": 2.0274456571315666e-05,
"loss": 0.1147,
"step": 540
},
{
"epoch": 2.75,
"grad_norm": 10.961527824401855,
"learning_rate": 1.9494669780111215e-05,
"loss": 0.2312,
"step": 550
},
{
"epoch": 2.8,
"grad_norm": 29.004623413085938,
"learning_rate": 1.8714882988906765e-05,
"loss": 0.2722,
"step": 560
},
{
"epoch": 2.85,
"grad_norm": 0.12045635282993317,
"learning_rate": 1.7935096197702317e-05,
"loss": 0.336,
"step": 570
},
{
"epoch": 2.9,
"grad_norm": 20.202497482299805,
"learning_rate": 1.715530940649787e-05,
"loss": 0.0886,
"step": 580
},
{
"epoch": 2.95,
"grad_norm": 5.752193450927734,
"learning_rate": 1.6375522615293423e-05,
"loss": 0.1171,
"step": 590
},
{
"epoch": 3.0,
"grad_norm": 25.30577278137207,
"learning_rate": 1.5595735824088972e-05,
"loss": 0.1935,
"step": 600
},
{
"epoch": 3.0,
"eval_accuracy": 0.88,
"eval_f1": 0.7931034482758621,
"eval_loss": 0.4657374620437622,
"eval_precision": 0.8440366972477065,
"eval_recall": 0.7479674796747967,
"eval_runtime": 1.5234,
"eval_samples_per_second": 262.573,
"eval_steps_per_second": 16.411,
"step": 600
},
{
"epoch": 3.05,
"grad_norm": 0.058765921741724014,
"learning_rate": 1.4815949032884523e-05,
"loss": 0.0065,
"step": 610
},
{
"epoch": 3.1,
"grad_norm": 0.05543431267142296,
"learning_rate": 1.4036162241680076e-05,
"loss": 0.1243,
"step": 620
},
{
"epoch": 3.15,
"grad_norm": 4.4582014083862305,
"learning_rate": 1.3256375450475625e-05,
"loss": 0.0124,
"step": 630
},
{
"epoch": 3.2,
"grad_norm": 0.7271829843521118,
"learning_rate": 1.2476588659271178e-05,
"loss": 0.0167,
"step": 640
},
{
"epoch": 3.25,
"grad_norm": 0.07129786163568497,
"learning_rate": 1.169680186806673e-05,
"loss": 0.0047,
"step": 650
},
{
"epoch": 3.3,
"grad_norm": 0.040439434349536896,
"learning_rate": 1.091701507686228e-05,
"loss": 0.0031,
"step": 660
},
{
"epoch": 3.35,
"grad_norm": 0.032109107822179794,
"learning_rate": 1.0137228285657833e-05,
"loss": 0.0047,
"step": 670
},
{
"epoch": 3.4,
"grad_norm": 0.068085677921772,
"learning_rate": 9.357441494453382e-06,
"loss": 0.1646,
"step": 680
},
{
"epoch": 3.45,
"grad_norm": 0.031069407239556313,
"learning_rate": 8.577654703248935e-06,
"loss": 0.267,
"step": 690
},
{
"epoch": 3.5,
"grad_norm": 0.11077981442213058,
"learning_rate": 7.797867912044486e-06,
"loss": 0.0763,
"step": 700
},
{
"epoch": 3.55,
"grad_norm": 0.0862860158085823,
"learning_rate": 7.018081120840038e-06,
"loss": 0.0946,
"step": 710
},
{
"epoch": 3.6,
"grad_norm": 0.03846760839223862,
"learning_rate": 6.238294329635589e-06,
"loss": 0.1012,
"step": 720
},
{
"epoch": 3.65,
"grad_norm": 0.045732282102108,
"learning_rate": 5.45850753843114e-06,
"loss": 0.0695,
"step": 730
},
{
"epoch": 3.7,
"grad_norm": 0.044342800974845886,
"learning_rate": 4.678720747226691e-06,
"loss": 0.0064,
"step": 740
},
{
"epoch": 3.75,
"grad_norm": 0.06440162658691406,
"learning_rate": 3.898933956022243e-06,
"loss": 0.0971,
"step": 750
},
{
"epoch": 3.8,
"grad_norm": 0.04895680025219917,
"learning_rate": 3.1191471648177945e-06,
"loss": 0.1129,
"step": 760
},
{
"epoch": 3.85,
"grad_norm": 20.34134864807129,
"learning_rate": 2.3393603736133456e-06,
"loss": 0.1191,
"step": 770
},
{
"epoch": 3.9,
"grad_norm": 0.12530703842639923,
"learning_rate": 1.5595735824088973e-06,
"loss": 0.0551,
"step": 780
},
{
"epoch": 3.95,
"grad_norm": 0.05208105966448784,
"learning_rate": 7.797867912044486e-07,
"loss": 0.0498,
"step": 790
},
{
"epoch": 4.0,
"grad_norm": 0.04303634166717529,
"learning_rate": 0.0,
"loss": 0.0832,
"step": 800
},
{
"epoch": 4.0,
"eval_accuracy": 0.875,
"eval_f1": 0.7983870967741935,
"eval_loss": 0.5619988441467285,
"eval_precision": 0.792,
"eval_recall": 0.8048780487804879,
"eval_runtime": 1.5731,
"eval_samples_per_second": 254.276,
"eval_steps_per_second": 15.892,
"step": 800
}
],
"logging_steps": 10,
"max_steps": 800,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 847261481803776.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": {
"_wandb": {},
"assignments": {},
"learning_rate": 6.238294329635589e-05,
"metric": "eval/loss",
"num_train_epochs": 4,
"per_device_train_batch_size": 8,
"seed": 31
}
}