{ "best_metric": 0.7951807228915663, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-jvfupcpb/checkpoint-600", "epoch": 3.0, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 2.15091609954834, "learning_rate": 4.3626180253789615e-05, "loss": 0.5952, "step": 10 }, { "epoch": 0.1, "grad_norm": 2.9856648445129395, "learning_rate": 4.288675346982709e-05, "loss": 0.5841, "step": 20 }, { "epoch": 0.15, "grad_norm": 1.2374624013900757, "learning_rate": 4.2147326685864546e-05, "loss": 0.4749, "step": 30 }, { "epoch": 0.2, "grad_norm": 2.3952836990356445, "learning_rate": 4.140789990190201e-05, "loss": 0.5499, "step": 40 }, { "epoch": 0.25, "grad_norm": 6.6773223876953125, "learning_rate": 4.0668473117939477e-05, "loss": 0.4286, "step": 50 }, { "epoch": 0.3, "grad_norm": 3.308351993560791, "learning_rate": 3.992904633397694e-05, "loss": 0.5564, "step": 60 }, { "epoch": 0.35, "grad_norm": 5.943718910217285, "learning_rate": 3.918961955001441e-05, "loss": 0.6481, "step": 70 }, { "epoch": 0.4, "grad_norm": 2.2899556159973145, "learning_rate": 3.845019276605187e-05, "loss": 0.3176, "step": 80 }, { "epoch": 0.45, "grad_norm": 8.657326698303223, "learning_rate": 3.771076598208933e-05, "loss": 0.332, "step": 90 }, { "epoch": 0.5, "grad_norm": 3.0114917755126953, "learning_rate": 3.6971339198126796e-05, "loss": 0.5015, "step": 100 }, { "epoch": 0.55, "grad_norm": 1.8786035776138306, "learning_rate": 3.623191241416426e-05, "loss": 0.4642, "step": 110 }, { "epoch": 0.6, "grad_norm": 1.8745856285095215, "learning_rate": 3.549248563020173e-05, "loss": 0.4042, "step": 120 }, { "epoch": 0.65, "grad_norm": 7.431813716888428, "learning_rate": 3.475305884623919e-05, "loss": 0.278, "step": 130 }, { "epoch": 0.7, "grad_norm": 2.6287291049957275, "learning_rate": 3.401363206227666e-05, "loss": 0.3648, "step": 140 }, { "epoch": 0.75, "grad_norm": 8.826187133789062, "learning_rate": 3.3274205278314116e-05, "loss": 0.4816, "step": 150 }, { "epoch": 0.8, "grad_norm": 3.6470208168029785, "learning_rate": 3.253477849435158e-05, "loss": 0.4655, "step": 160 }, { "epoch": 0.85, "grad_norm": 3.792525053024292, "learning_rate": 3.1795351710389047e-05, "loss": 0.391, "step": 170 }, { "epoch": 0.9, "grad_norm": 2.4055352210998535, "learning_rate": 3.1055924926426505e-05, "loss": 0.3063, "step": 180 }, { "epoch": 0.95, "grad_norm": 2.7378880977630615, "learning_rate": 3.0316498142463974e-05, "loss": 0.4052, "step": 190 }, { "epoch": 1.0, "grad_norm": 5.600724697113037, "learning_rate": 2.9577071358501436e-05, "loss": 0.2775, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.8675, "eval_f1": 0.7854251012145749, "eval_loss": 0.3443775475025177, "eval_precision": 0.782258064516129, "eval_recall": 0.7886178861788617, "eval_runtime": 1.5164, "eval_samples_per_second": 263.79, "eval_steps_per_second": 16.487, "step": 200 }, { "epoch": 1.05, "grad_norm": 0.5725231766700745, "learning_rate": 2.8837644574538904e-05, "loss": 0.2948, "step": 210 }, { "epoch": 1.1, "grad_norm": 9.40677547454834, "learning_rate": 2.8098217790576366e-05, "loss": 0.2623, "step": 220 }, { "epoch": 1.15, "grad_norm": 0.5530999898910522, "learning_rate": 2.735879100661383e-05, "loss": 0.2702, "step": 230 }, { "epoch": 1.2, "grad_norm": 15.44442081451416, "learning_rate": 2.6619364222651293e-05, "loss": 0.5376, "step": 240 }, { "epoch": 1.25, "grad_norm": 0.7881362438201904, "learning_rate": 2.587993743868876e-05, "loss": 0.3554, "step": 250 }, { "epoch": 1.3, "grad_norm": 4.4421067237854, "learning_rate": 2.514051065472622e-05, "loss": 0.1891, "step": 260 }, { "epoch": 1.35, "grad_norm": 4.921718120574951, "learning_rate": 2.440108387076369e-05, "loss": 0.2338, "step": 270 }, { "epoch": 1.4, "grad_norm": 3.178603172302246, "learning_rate": 2.366165708680115e-05, "loss": 0.3782, "step": 280 }, { "epoch": 1.45, "grad_norm": 0.419980525970459, "learning_rate": 2.2922230302838617e-05, "loss": 0.1908, "step": 290 }, { "epoch": 1.5, "grad_norm": 8.025728225708008, "learning_rate": 2.218280351887608e-05, "loss": 0.2183, "step": 300 }, { "epoch": 1.55, "grad_norm": 0.9326677918434143, "learning_rate": 2.1443376734913544e-05, "loss": 0.3055, "step": 310 }, { "epoch": 1.6, "grad_norm": 5.646274566650391, "learning_rate": 2.0703949950951006e-05, "loss": 0.2639, "step": 320 }, { "epoch": 1.65, "grad_norm": 14.51612663269043, "learning_rate": 1.996452316698847e-05, "loss": 0.2841, "step": 330 }, { "epoch": 1.7, "grad_norm": 2.912120819091797, "learning_rate": 1.9225096383025936e-05, "loss": 0.1919, "step": 340 }, { "epoch": 1.75, "grad_norm": 11.444270133972168, "learning_rate": 1.8485669599063398e-05, "loss": 0.1804, "step": 350 }, { "epoch": 1.8, "grad_norm": 3.0050899982452393, "learning_rate": 1.7746242815100863e-05, "loss": 0.166, "step": 360 }, { "epoch": 1.85, "grad_norm": 24.114974975585938, "learning_rate": 1.700681603113833e-05, "loss": 0.3688, "step": 370 }, { "epoch": 1.9, "grad_norm": 0.28243792057037354, "learning_rate": 1.626738924717579e-05, "loss": 0.3356, "step": 380 }, { "epoch": 1.95, "grad_norm": 0.6977348327636719, "learning_rate": 1.5527962463213253e-05, "loss": 0.4389, "step": 390 }, { "epoch": 2.0, "grad_norm": 21.475341796875, "learning_rate": 1.4788535679250718e-05, "loss": 0.2895, "step": 400 }, { "epoch": 2.0, "eval_accuracy": 0.87, "eval_f1": 0.792, "eval_loss": 0.33494508266448975, "eval_precision": 0.7795275590551181, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5155, "eval_samples_per_second": 263.946, "eval_steps_per_second": 16.497, "step": 400 }, { "epoch": 2.05, "grad_norm": 4.863823413848877, "learning_rate": 1.4049108895288183e-05, "loss": 0.1339, "step": 410 }, { "epoch": 2.1, "grad_norm": 21.860219955444336, "learning_rate": 1.3309682111325647e-05, "loss": 0.2416, "step": 420 }, { "epoch": 2.15, "grad_norm": 0.2086387276649475, "learning_rate": 1.257025532736311e-05, "loss": 0.1266, "step": 430 }, { "epoch": 2.2, "grad_norm": 0.49468812346458435, "learning_rate": 1.1830828543400576e-05, "loss": 0.2281, "step": 440 }, { "epoch": 2.25, "grad_norm": 4.723079204559326, "learning_rate": 1.109140175943804e-05, "loss": 0.0827, "step": 450 }, { "epoch": 2.3, "grad_norm": 14.231110572814941, "learning_rate": 1.0351974975475503e-05, "loss": 0.1488, "step": 460 }, { "epoch": 2.35, "grad_norm": 0.563101053237915, "learning_rate": 9.612548191512968e-06, "loss": 0.31, "step": 470 }, { "epoch": 2.4, "grad_norm": 20.05416488647461, "learning_rate": 8.873121407550432e-06, "loss": 0.1491, "step": 480 }, { "epoch": 2.45, "grad_norm": 7.252449989318848, "learning_rate": 8.133694623587895e-06, "loss": 0.2171, "step": 490 }, { "epoch": 2.5, "grad_norm": 1.7218003273010254, "learning_rate": 7.394267839625359e-06, "loss": 0.0633, "step": 500 }, { "epoch": 2.55, "grad_norm": 0.06682740151882172, "learning_rate": 6.654841055662823e-06, "loss": 0.2153, "step": 510 }, { "epoch": 2.6, "grad_norm": 0.25739797949790955, "learning_rate": 5.915414271700288e-06, "loss": 0.0267, "step": 520 }, { "epoch": 2.65, "grad_norm": 0.05432148650288582, "learning_rate": 5.175987487737751e-06, "loss": 0.1115, "step": 530 }, { "epoch": 2.7, "grad_norm": 3.8702685832977295, "learning_rate": 4.436560703775216e-06, "loss": 0.249, "step": 540 }, { "epoch": 2.75, "grad_norm": 28.443191528320312, "learning_rate": 3.6971339198126795e-06, "loss": 0.3686, "step": 550 }, { "epoch": 2.8, "grad_norm": 0.1164081022143364, "learning_rate": 2.957707135850144e-06, "loss": 0.0713, "step": 560 }, { "epoch": 2.85, "grad_norm": 0.35371047258377075, "learning_rate": 2.218280351887608e-06, "loss": 0.1125, "step": 570 }, { "epoch": 2.9, "grad_norm": 17.213769912719727, "learning_rate": 1.478853567925072e-06, "loss": 0.2296, "step": 580 }, { "epoch": 2.95, "grad_norm": 0.3524082899093628, "learning_rate": 7.39426783962536e-07, "loss": 0.1454, "step": 590 }, { "epoch": 3.0, "grad_norm": 0.2137354463338852, "learning_rate": 0.0, "loss": 0.0681, "step": 600 }, { "epoch": 3.0, "eval_accuracy": 0.8725, "eval_f1": 0.7951807228915663, "eval_loss": 0.4910232424736023, "eval_precision": 0.7857142857142857, "eval_recall": 0.8048780487804879, "eval_runtime": 1.5682, "eval_samples_per_second": 255.073, "eval_steps_per_second": 15.942, "step": 600 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 635446111352832.0, "train_batch_size": 8, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 4.436560703775216e-05, "metric": "eval/loss", "num_train_epochs": 3, "per_device_train_batch_size": 8, "seed": 40 } }