|
{ |
|
"best_metric": 0.7722007722007722, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-hq22ls8k/checkpoint-400", |
|
"epoch": 4.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.1560730934143066, |
|
"learning_rate": 9.426932053278163e-05, |
|
"loss": 0.595, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.519632339477539, |
|
"learning_rate": 9.267153543900568e-05, |
|
"loss": 0.5409, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 4.012811183929443, |
|
"learning_rate": 9.107375034522972e-05, |
|
"loss": 0.4944, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 9.410440444946289, |
|
"learning_rate": 8.947596525145376e-05, |
|
"loss": 0.416, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 2.913337230682373, |
|
"learning_rate": 8.78781801576778e-05, |
|
"loss": 0.4236, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.957322359085083, |
|
"learning_rate": 8.628039506390185e-05, |
|
"loss": 0.4094, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.4026949405670166, |
|
"learning_rate": 8.468260997012588e-05, |
|
"loss": 0.331, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.6281132698059082, |
|
"learning_rate": 8.308482487634992e-05, |
|
"loss": 0.3211, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.870759963989258, |
|
"learning_rate": 8.148703978257395e-05, |
|
"loss": 0.2933, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.9348390102386475, |
|
"learning_rate": 7.9889254688798e-05, |
|
"loss": 0.4921, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8375, |
|
"eval_f1": 0.7670250896057348, |
|
"eval_loss": 0.358794629573822, |
|
"eval_precision": 0.6858974358974359, |
|
"eval_recall": 0.8699186991869918, |
|
"eval_runtime": 1.5446, |
|
"eval_samples_per_second": 258.973, |
|
"eval_steps_per_second": 16.186, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 2.657993793487549, |
|
"learning_rate": 7.829146959502203e-05, |
|
"loss": 0.3661, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 9.199877738952637, |
|
"learning_rate": 7.669368450124608e-05, |
|
"loss": 0.3209, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 6.665186882019043, |
|
"learning_rate": 7.509589940747012e-05, |
|
"loss": 0.3953, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 5.048425197601318, |
|
"learning_rate": 7.349811431369416e-05, |
|
"loss": 0.2938, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.8985488414764404, |
|
"learning_rate": 7.19003292199182e-05, |
|
"loss": 0.2387, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.6217758655548096, |
|
"learning_rate": 7.030254412614223e-05, |
|
"loss": 0.2279, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 13.243719100952148, |
|
"learning_rate": 6.870475903236628e-05, |
|
"loss": 0.243, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 1.4423586130142212, |
|
"learning_rate": 6.710697393859032e-05, |
|
"loss": 0.3166, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 3.1215100288391113, |
|
"learning_rate": 6.550918884481436e-05, |
|
"loss": 0.3601, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.1280789375305176, |
|
"learning_rate": 6.39114037510384e-05, |
|
"loss": 0.2303, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.87, |
|
"eval_f1": 0.7699115044247787, |
|
"eval_loss": 0.30909162759780884, |
|
"eval_precision": 0.8446601941747572, |
|
"eval_recall": 0.7073170731707317, |
|
"eval_runtime": 1.5339, |
|
"eval_samples_per_second": 260.775, |
|
"eval_steps_per_second": 16.298, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 2.1738266944885254, |
|
"learning_rate": 6.231361865726245e-05, |
|
"loss": 0.1502, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 25.343351364135742, |
|
"learning_rate": 6.0715833563486475e-05, |
|
"loss": 0.1578, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 3.0342938899993896, |
|
"learning_rate": 5.9118048469710523e-05, |
|
"loss": 0.1892, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 15.658936500549316, |
|
"learning_rate": 5.752026337593456e-05, |
|
"loss": 0.2544, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 15.843140602111816, |
|
"learning_rate": 5.5922478282158606e-05, |
|
"loss": 0.2167, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 5.072413444519043, |
|
"learning_rate": 5.432469318838264e-05, |
|
"loss": 0.2226, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 5.797332763671875, |
|
"learning_rate": 5.272690809460668e-05, |
|
"loss": 0.1347, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 9.168190956115723, |
|
"learning_rate": 5.112912300083072e-05, |
|
"loss": 0.1976, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 5.497272968292236, |
|
"learning_rate": 4.9531337907054765e-05, |
|
"loss": 0.2177, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.3119521141052246, |
|
"learning_rate": 4.79335528132788e-05, |
|
"loss": 0.1684, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.845, |
|
"eval_f1": 0.743801652892562, |
|
"eval_loss": 0.47961556911468506, |
|
"eval_precision": 0.7563025210084033, |
|
"eval_recall": 0.7317073170731707, |
|
"eval_runtime": 1.5168, |
|
"eval_samples_per_second": 263.704, |
|
"eval_steps_per_second": 16.482, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 1.9762777090072632, |
|
"learning_rate": 4.633576771950284e-05, |
|
"loss": 0.1612, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 1.3597666025161743, |
|
"learning_rate": 4.473798262572688e-05, |
|
"loss": 0.0888, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 4.3360419273376465, |
|
"learning_rate": 4.3140197531950924e-05, |
|
"loss": 0.1549, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 0.6792251467704773, |
|
"learning_rate": 4.154241243817496e-05, |
|
"loss": 0.0805, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"grad_norm": 4.299612998962402, |
|
"learning_rate": 3.9944627344399e-05, |
|
"loss": 0.1412, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 1.4792366027832031, |
|
"learning_rate": 3.834684225062304e-05, |
|
"loss": 0.0666, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"grad_norm": 0.11450085788965225, |
|
"learning_rate": 3.674905715684708e-05, |
|
"loss": 0.1294, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"grad_norm": 0.6610074639320374, |
|
"learning_rate": 3.515127206307112e-05, |
|
"loss": 0.0168, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 0.08138225972652435, |
|
"learning_rate": 3.355348696929516e-05, |
|
"loss": 0.1562, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 8.49350643157959, |
|
"learning_rate": 3.19557018755192e-05, |
|
"loss": 0.0895, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8525, |
|
"eval_f1": 0.7722007722007722, |
|
"eval_loss": 0.6465945243835449, |
|
"eval_precision": 0.7352941176470589, |
|
"eval_recall": 0.8130081300813008, |
|
"eval_runtime": 1.5173, |
|
"eval_samples_per_second": 263.626, |
|
"eval_steps_per_second": 16.477, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 847261481803776.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 9.58671056265576e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 6, |
|
"per_device_train_batch_size": 16, |
|
"seed": 14 |
|
} |
|
} |
|
|