{ "best_metric": 0.8722264728385616, "best_model_checkpoint": "distilbert-base-multilingual-cased-aoe-hyper/run-3/checkpoint-328", "epoch": 4.0, "eval_steps": 500, "global_step": 328, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12195121951219512, "grad_norm": 1.4140416383743286, "learning_rate": 9.720486892282597e-06, "loss": 0.5947, "step": 10 }, { "epoch": 0.24390243902439024, "grad_norm": 0.7245877981185913, "learning_rate": 9.477474719975531e-06, "loss": 0.5154, "step": 20 }, { "epoch": 0.36585365853658536, "grad_norm": 0.9261638522148132, "learning_rate": 9.234462547668467e-06, "loss": 0.479, "step": 30 }, { "epoch": 0.4878048780487805, "grad_norm": 1.6241005659103394, "learning_rate": 8.991450375361402e-06, "loss": 0.4377, "step": 40 }, { "epoch": 0.6097560975609756, "grad_norm": 1.7590631246566772, "learning_rate": 8.748438203054338e-06, "loss": 0.4112, "step": 50 }, { "epoch": 0.7317073170731707, "grad_norm": 2.0623185634613037, "learning_rate": 8.505426030747272e-06, "loss": 0.3754, "step": 60 }, { "epoch": 0.8536585365853658, "grad_norm": 1.2114999294281006, "learning_rate": 8.262413858440207e-06, "loss": 0.3731, "step": 70 }, { "epoch": 0.975609756097561, "grad_norm": 1.9598636627197266, "learning_rate": 8.019401686133143e-06, "loss": 0.3791, "step": 80 }, { "epoch": 1.0, "eval_accuracy": 0.8607498087222647, "eval_f1": 0.7225609756097561, "eval_loss": 0.33913305401802063, "eval_precision": 0.688953488372093, "eval_recall": 0.7596153846153846, "eval_runtime": 4.8898, "eval_samples_per_second": 267.29, "eval_steps_per_second": 16.77, "step": 82 }, { "epoch": 1.0975609756097562, "grad_norm": 1.7537786960601807, "learning_rate": 7.776389513826078e-06, "loss": 0.3125, "step": 90 }, { "epoch": 1.2195121951219512, "grad_norm": 4.089900493621826, "learning_rate": 7.533377341519012e-06, "loss": 0.332, "step": 100 }, { "epoch": 1.3414634146341464, "grad_norm": 1.488580346107483, "learning_rate": 7.290365169211947e-06, "loss": 0.286, "step": 110 }, { "epoch": 1.4634146341463414, "grad_norm": 1.6245107650756836, "learning_rate": 7.0473529969048826e-06, "loss": 0.2952, "step": 120 }, { "epoch": 1.5853658536585367, "grad_norm": 2.5228676795959473, "learning_rate": 6.804340824597818e-06, "loss": 0.2956, "step": 130 }, { "epoch": 1.7073170731707317, "grad_norm": 5.439332008361816, "learning_rate": 6.561328652290753e-06, "loss": 0.2699, "step": 140 }, { "epoch": 1.8292682926829267, "grad_norm": 6.190291404724121, "learning_rate": 6.318316479983688e-06, "loss": 0.2851, "step": 150 }, { "epoch": 1.951219512195122, "grad_norm": 2.851015567779541, "learning_rate": 6.075304307676622e-06, "loss": 0.2729, "step": 160 }, { "epoch": 2.0, "eval_accuracy": 0.8684009181331293, "eval_f1": 0.7329192546583851, "eval_loss": 0.29830601811408997, "eval_precision": 0.7108433734939759, "eval_recall": 0.7564102564102564, "eval_runtime": 4.8964, "eval_samples_per_second": 266.931, "eval_steps_per_second": 16.747, "step": 164 }, { "epoch": 2.073170731707317, "grad_norm": 1.8465652465820312, "learning_rate": 5.832292135369558e-06, "loss": 0.2559, "step": 170 }, { "epoch": 2.1951219512195124, "grad_norm": 1.781294822692871, "learning_rate": 5.5892799630624935e-06, "loss": 0.2523, "step": 180 }, { "epoch": 2.317073170731707, "grad_norm": 4.9645256996154785, "learning_rate": 5.346267790755428e-06, "loss": 0.2737, "step": 190 }, { "epoch": 2.4390243902439024, "grad_norm": 2.3662397861480713, "learning_rate": 5.103255618448363e-06, "loss": 0.2607, "step": 200 }, { "epoch": 2.5609756097560976, "grad_norm": 3.443523645401001, "learning_rate": 4.8602434461412986e-06, "loss": 0.2301, "step": 210 }, { "epoch": 2.682926829268293, "grad_norm": 4.988992691040039, "learning_rate": 4.617231273834233e-06, "loss": 0.2451, "step": 220 }, { "epoch": 2.8048780487804876, "grad_norm": 6.013027667999268, "learning_rate": 4.374219101527169e-06, "loss": 0.2386, "step": 230 }, { "epoch": 2.926829268292683, "grad_norm": 4.056751728057861, "learning_rate": 4.131206929220104e-06, "loss": 0.236, "step": 240 }, { "epoch": 3.0, "eval_accuracy": 0.8661055853098699, "eval_f1": 0.7422680412371134, "eval_loss": 0.30293241143226624, "eval_precision": 0.6866485013623979, "eval_recall": 0.8076923076923077, "eval_runtime": 4.9202, "eval_samples_per_second": 265.637, "eval_steps_per_second": 16.666, "step": 246 }, { "epoch": 3.048780487804878, "grad_norm": 5.498621940612793, "learning_rate": 3.888194756913039e-06, "loss": 0.2761, "step": 250 }, { "epoch": 3.1707317073170733, "grad_norm": 2.0462112426757812, "learning_rate": 3.6451825846059735e-06, "loss": 0.2074, "step": 260 }, { "epoch": 3.292682926829268, "grad_norm": 4.663333892822266, "learning_rate": 3.402170412298909e-06, "loss": 0.2357, "step": 270 }, { "epoch": 3.4146341463414633, "grad_norm": 4.013532638549805, "learning_rate": 3.159158239991844e-06, "loss": 0.2385, "step": 280 }, { "epoch": 3.5365853658536586, "grad_norm": 4.440213680267334, "learning_rate": 2.916146067684779e-06, "loss": 0.226, "step": 290 }, { "epoch": 3.658536585365854, "grad_norm": 4.662286758422852, "learning_rate": 2.673133895377714e-06, "loss": 0.2129, "step": 300 }, { "epoch": 3.7804878048780486, "grad_norm": 4.345000267028809, "learning_rate": 2.4301217230706493e-06, "loss": 0.2366, "step": 310 }, { "epoch": 3.902439024390244, "grad_norm": 2.3199422359466553, "learning_rate": 2.1871095507635844e-06, "loss": 0.2462, "step": 320 }, { "epoch": 4.0, "eval_accuracy": 0.8722264728385616, "eval_f1": 0.7481146304675717, "eval_loss": 0.3029697835445404, "eval_precision": 0.7065527065527065, "eval_recall": 0.7948717948717948, "eval_runtime": 4.8827, "eval_samples_per_second": 267.68, "eval_steps_per_second": 16.794, "step": 328 } ], "logging_steps": 10, "max_steps": 410, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2704984280555520.0, "train_batch_size": 64, "trial_name": null, "trial_params": { "learning_rate": 9.963499064589662e-06, "num_train_epochs": 5, "per_device_train_batch_size": 64, "seed": 9 } }