|
{ |
|
"best_metric": 0.8127490039840638, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-xcydzmg4/checkpoint-300", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 3.299391984939575, |
|
"learning_rate": 3.163832728034676e-05, |
|
"loss": 0.6312, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.2213969230651855, |
|
"learning_rate": 3.0547350477576185e-05, |
|
"loss": 0.5159, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 1.8511892557144165, |
|
"learning_rate": 2.9456373674805607e-05, |
|
"loss": 0.5112, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 3.693004608154297, |
|
"learning_rate": 2.836539687203503e-05, |
|
"loss": 0.3914, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 6.287681579589844, |
|
"learning_rate": 2.727442006926445e-05, |
|
"loss": 0.3439, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 3.235572576522827, |
|
"learning_rate": 2.6183443266493874e-05, |
|
"loss": 0.4263, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.508350372314453, |
|
"learning_rate": 2.5092466463723296e-05, |
|
"loss": 0.3324, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.4181947708129883, |
|
"learning_rate": 2.4001489660952715e-05, |
|
"loss": 0.4191, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.315913200378418, |
|
"learning_rate": 2.2910512858182137e-05, |
|
"loss": 0.3647, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.4039485454559326, |
|
"learning_rate": 2.181953605541156e-05, |
|
"loss": 0.2834, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.87, |
|
"eval_f1": 0.792, |
|
"eval_loss": 0.31944602727890015, |
|
"eval_precision": 0.7795275590551181, |
|
"eval_recall": 0.8048780487804879, |
|
"eval_runtime": 1.5401, |
|
"eval_samples_per_second": 259.717, |
|
"eval_steps_per_second": 16.232, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 8.336113929748535, |
|
"learning_rate": 2.072855925264098e-05, |
|
"loss": 0.2983, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 9.28201675415039, |
|
"learning_rate": 1.9637582449870403e-05, |
|
"loss": 0.3149, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 6.901111125946045, |
|
"learning_rate": 1.8546605647099826e-05, |
|
"loss": 0.3158, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 1.6393898725509644, |
|
"learning_rate": 1.7455628844329248e-05, |
|
"loss": 0.2477, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 3.3284659385681152, |
|
"learning_rate": 1.636465204155867e-05, |
|
"loss": 0.1975, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 12.852226257324219, |
|
"learning_rate": 1.5273675238788092e-05, |
|
"loss": 0.2251, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 10.670600891113281, |
|
"learning_rate": 1.4182698436017515e-05, |
|
"loss": 0.263, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 2.7927801609039307, |
|
"learning_rate": 1.3091721633246937e-05, |
|
"loss": 0.168, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 1.2662396430969238, |
|
"learning_rate": 1.2000744830476357e-05, |
|
"loss": 0.2702, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 7.965029239654541, |
|
"learning_rate": 1.090976802770578e-05, |
|
"loss": 0.2884, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.87, |
|
"eval_f1": 0.7815126050420168, |
|
"eval_loss": 0.3038490414619446, |
|
"eval_precision": 0.808695652173913, |
|
"eval_recall": 0.7560975609756098, |
|
"eval_runtime": 1.5371, |
|
"eval_samples_per_second": 260.227, |
|
"eval_steps_per_second": 16.264, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 6.486904144287109, |
|
"learning_rate": 9.818791224935202e-06, |
|
"loss": 0.1707, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 1.786522626876831, |
|
"learning_rate": 8.727814422164624e-06, |
|
"loss": 0.2129, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 8.180079460144043, |
|
"learning_rate": 7.636837619394046e-06, |
|
"loss": 0.1224, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 9.521204948425293, |
|
"learning_rate": 6.545860816623468e-06, |
|
"loss": 0.1735, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 4.741390705108643, |
|
"learning_rate": 5.45488401385289e-06, |
|
"loss": 0.1549, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 2.954021453857422, |
|
"learning_rate": 4.363907211082312e-06, |
|
"loss": 0.1712, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 14.713294982910156, |
|
"learning_rate": 3.272930408311734e-06, |
|
"loss": 0.2493, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.9680449366569519, |
|
"learning_rate": 2.181953605541156e-06, |
|
"loss": 0.1804, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 14.325002670288086, |
|
"learning_rate": 1.090976802770578e-06, |
|
"loss": 0.2148, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 1.2194037437438965, |
|
"learning_rate": 0.0, |
|
"loss": 0.1031, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8825, |
|
"eval_f1": 0.8127490039840638, |
|
"eval_loss": 0.3291611075401306, |
|
"eval_precision": 0.796875, |
|
"eval_recall": 0.8292682926829268, |
|
"eval_runtime": 1.5817, |
|
"eval_samples_per_second": 252.886, |
|
"eval_steps_per_second": 15.805, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 635446111352832.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 3.272930408311734e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 16, |
|
"seed": 40 |
|
} |
|
} |
|
|