|
{ |
|
"best_metric": 0.7951807228915663, |
|
"best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-jvfupcpb/checkpoint-600", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 600, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 2.15091609954834, |
|
"learning_rate": 4.3626180253789615e-05, |
|
"loss": 0.5952, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 2.9856648445129395, |
|
"learning_rate": 4.288675346982709e-05, |
|
"loss": 0.5841, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 1.2374624013900757, |
|
"learning_rate": 4.2147326685864546e-05, |
|
"loss": 0.4749, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.3952836990356445, |
|
"learning_rate": 4.140789990190201e-05, |
|
"loss": 0.5499, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.6773223876953125, |
|
"learning_rate": 4.0668473117939477e-05, |
|
"loss": 0.4286, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.308351993560791, |
|
"learning_rate": 3.992904633397694e-05, |
|
"loss": 0.5564, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 5.943718910217285, |
|
"learning_rate": 3.918961955001441e-05, |
|
"loss": 0.6481, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 2.2899556159973145, |
|
"learning_rate": 3.845019276605187e-05, |
|
"loss": 0.3176, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.657326698303223, |
|
"learning_rate": 3.771076598208933e-05, |
|
"loss": 0.332, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 3.0114917755126953, |
|
"learning_rate": 3.6971339198126796e-05, |
|
"loss": 0.5015, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.8786035776138306, |
|
"learning_rate": 3.623191241416426e-05, |
|
"loss": 0.4642, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.8745856285095215, |
|
"learning_rate": 3.549248563020173e-05, |
|
"loss": 0.4042, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 7.431813716888428, |
|
"learning_rate": 3.475305884623919e-05, |
|
"loss": 0.278, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.6287291049957275, |
|
"learning_rate": 3.401363206227666e-05, |
|
"loss": 0.3648, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 8.826187133789062, |
|
"learning_rate": 3.3274205278314116e-05, |
|
"loss": 0.4816, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 3.6470208168029785, |
|
"learning_rate": 3.253477849435158e-05, |
|
"loss": 0.4655, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 3.792525053024292, |
|
"learning_rate": 3.1795351710389047e-05, |
|
"loss": 0.391, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 2.4055352210998535, |
|
"learning_rate": 3.1055924926426505e-05, |
|
"loss": 0.3063, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.7378880977630615, |
|
"learning_rate": 3.0316498142463974e-05, |
|
"loss": 0.4052, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.600724697113037, |
|
"learning_rate": 2.9577071358501436e-05, |
|
"loss": 0.2775, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8675, |
|
"eval_f1": 0.7854251012145749, |
|
"eval_loss": 0.3443775475025177, |
|
"eval_precision": 0.782258064516129, |
|
"eval_recall": 0.7886178861788617, |
|
"eval_runtime": 1.5164, |
|
"eval_samples_per_second": 263.79, |
|
"eval_steps_per_second": 16.487, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 0.5725231766700745, |
|
"learning_rate": 2.8837644574538904e-05, |
|
"loss": 0.2948, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 9.40677547454834, |
|
"learning_rate": 2.8098217790576366e-05, |
|
"loss": 0.2623, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 0.5530999898910522, |
|
"learning_rate": 2.735879100661383e-05, |
|
"loss": 0.2702, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 15.44442081451416, |
|
"learning_rate": 2.6619364222651293e-05, |
|
"loss": 0.5376, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.7881362438201904, |
|
"learning_rate": 2.587993743868876e-05, |
|
"loss": 0.3554, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 4.4421067237854, |
|
"learning_rate": 2.514051065472622e-05, |
|
"loss": 0.1891, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 4.921718120574951, |
|
"learning_rate": 2.440108387076369e-05, |
|
"loss": 0.2338, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 3.178603172302246, |
|
"learning_rate": 2.366165708680115e-05, |
|
"loss": 0.3782, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 0.419980525970459, |
|
"learning_rate": 2.2922230302838617e-05, |
|
"loss": 0.1908, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 8.025728225708008, |
|
"learning_rate": 2.218280351887608e-05, |
|
"loss": 0.2183, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 0.9326677918434143, |
|
"learning_rate": 2.1443376734913544e-05, |
|
"loss": 0.3055, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 5.646274566650391, |
|
"learning_rate": 2.0703949950951006e-05, |
|
"loss": 0.2639, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 14.51612663269043, |
|
"learning_rate": 1.996452316698847e-05, |
|
"loss": 0.2841, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 2.912120819091797, |
|
"learning_rate": 1.9225096383025936e-05, |
|
"loss": 0.1919, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 11.444270133972168, |
|
"learning_rate": 1.8485669599063398e-05, |
|
"loss": 0.1804, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.0050899982452393, |
|
"learning_rate": 1.7746242815100863e-05, |
|
"loss": 0.166, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 24.114974975585938, |
|
"learning_rate": 1.700681603113833e-05, |
|
"loss": 0.3688, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 0.28243792057037354, |
|
"learning_rate": 1.626738924717579e-05, |
|
"loss": 0.3356, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 0.6977348327636719, |
|
"learning_rate": 1.5527962463213253e-05, |
|
"loss": 0.4389, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 21.475341796875, |
|
"learning_rate": 1.4788535679250718e-05, |
|
"loss": 0.2895, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.87, |
|
"eval_f1": 0.792, |
|
"eval_loss": 0.33494508266448975, |
|
"eval_precision": 0.7795275590551181, |
|
"eval_recall": 0.8048780487804879, |
|
"eval_runtime": 1.5155, |
|
"eval_samples_per_second": 263.946, |
|
"eval_steps_per_second": 16.497, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 4.863823413848877, |
|
"learning_rate": 1.4049108895288183e-05, |
|
"loss": 0.1339, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 21.860219955444336, |
|
"learning_rate": 1.3309682111325647e-05, |
|
"loss": 0.2416, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 0.2086387276649475, |
|
"learning_rate": 1.257025532736311e-05, |
|
"loss": 0.1266, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 0.49468812346458435, |
|
"learning_rate": 1.1830828543400576e-05, |
|
"loss": 0.2281, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 4.723079204559326, |
|
"learning_rate": 1.109140175943804e-05, |
|
"loss": 0.0827, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 14.231110572814941, |
|
"learning_rate": 1.0351974975475503e-05, |
|
"loss": 0.1488, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 0.563101053237915, |
|
"learning_rate": 9.612548191512968e-06, |
|
"loss": 0.31, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 20.05416488647461, |
|
"learning_rate": 8.873121407550432e-06, |
|
"loss": 0.1491, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 7.252449989318848, |
|
"learning_rate": 8.133694623587895e-06, |
|
"loss": 0.2171, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 1.7218003273010254, |
|
"learning_rate": 7.394267839625359e-06, |
|
"loss": 0.0633, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 0.06682740151882172, |
|
"learning_rate": 6.654841055662823e-06, |
|
"loss": 0.2153, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 0.25739797949790955, |
|
"learning_rate": 5.915414271700288e-06, |
|
"loss": 0.0267, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.05432148650288582, |
|
"learning_rate": 5.175987487737751e-06, |
|
"loss": 0.1115, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 3.8702685832977295, |
|
"learning_rate": 4.436560703775216e-06, |
|
"loss": 0.249, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 28.443191528320312, |
|
"learning_rate": 3.6971339198126795e-06, |
|
"loss": 0.3686, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 0.1164081022143364, |
|
"learning_rate": 2.957707135850144e-06, |
|
"loss": 0.0713, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.35371047258377075, |
|
"learning_rate": 2.218280351887608e-06, |
|
"loss": 0.1125, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 17.213769912719727, |
|
"learning_rate": 1.478853567925072e-06, |
|
"loss": 0.2296, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 0.3524082899093628, |
|
"learning_rate": 7.39426783962536e-07, |
|
"loss": 0.1454, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 0.2137354463338852, |
|
"learning_rate": 0.0, |
|
"loss": 0.0681, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8725, |
|
"eval_f1": 0.7951807228915663, |
|
"eval_loss": 0.4910232424736023, |
|
"eval_precision": 0.7857142857142857, |
|
"eval_recall": 0.8048780487804879, |
|
"eval_runtime": 1.5682, |
|
"eval_samples_per_second": 255.073, |
|
"eval_steps_per_second": 15.942, |
|
"step": 600 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 600, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 635446111352832.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": { |
|
"_wandb": {}, |
|
"assignments": {}, |
|
"learning_rate": 4.436560703775216e-05, |
|
"metric": "eval/loss", |
|
"num_train_epochs": 3, |
|
"per_device_train_batch_size": 8, |
|
"seed": 40 |
|
} |
|
} |
|
|