{ "best_metric": 0.823045267489712, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-ud8wy1bz/checkpoint-200", "epoch": 2.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 2.0627684593200684, "learning_rate": 5.8944331187986266e-05, "loss": 0.5729, "step": 10 }, { "epoch": 0.2, "grad_norm": 3.5791327953338623, "learning_rate": 5.7945274727172945e-05, "loss": 0.505, "step": 20 }, { "epoch": 0.3, "grad_norm": 6.052925109863281, "learning_rate": 5.694621826635961e-05, "loss": 0.507, "step": 30 }, { "epoch": 0.4, "grad_norm": 1.8351855278015137, "learning_rate": 5.594716180554629e-05, "loss": 0.4524, "step": 40 }, { "epoch": 0.5, "grad_norm": 6.351097583770752, "learning_rate": 5.494810534473296e-05, "loss": 0.3062, "step": 50 }, { "epoch": 0.6, "grad_norm": 1.3773590326309204, "learning_rate": 5.394904888391964e-05, "loss": 0.3473, "step": 60 }, { "epoch": 0.7, "grad_norm": 3.009101629257202, "learning_rate": 5.2949992423106304e-05, "loss": 0.3905, "step": 70 }, { "epoch": 0.8, "grad_norm": 3.680708646774292, "learning_rate": 5.195093596229298e-05, "loss": 0.3812, "step": 80 }, { "epoch": 0.9, "grad_norm": 3.7381820678710938, "learning_rate": 5.0951879501479655e-05, "loss": 0.3667, "step": 90 }, { "epoch": 1.0, "grad_norm": 4.209010124206543, "learning_rate": 4.995282304066633e-05, "loss": 0.2821, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.845, "eval_f1": 0.693069306930693, "eval_loss": 0.41840919852256775, "eval_precision": 0.8860759493670886, "eval_recall": 0.5691056910569106, "eval_runtime": 1.5344, "eval_samples_per_second": 260.687, "eval_steps_per_second": 16.293, "step": 100 }, { "epoch": 1.1, "grad_norm": 12.662829399108887, "learning_rate": 4.8953766579853e-05, "loss": 0.2514, "step": 110 }, { "epoch": 1.2, "grad_norm": 4.311421871185303, "learning_rate": 4.795471011903968e-05, "loss": 0.2339, "step": 120 }, { "epoch": 1.3, "grad_norm": 7.38905668258667, "learning_rate": 4.695565365822635e-05, "loss": 0.2277, "step": 130 }, { "epoch": 1.4, "grad_norm": 2.4141809940338135, "learning_rate": 4.595659719741303e-05, "loss": 0.306, "step": 140 }, { "epoch": 1.5, "grad_norm": 2.5219364166259766, "learning_rate": 4.49575407365997e-05, "loss": 0.2245, "step": 150 }, { "epoch": 1.6, "grad_norm": 2.283900499343872, "learning_rate": 4.3958484275786364e-05, "loss": 0.2222, "step": 160 }, { "epoch": 1.7, "grad_norm": 7.031118392944336, "learning_rate": 4.295942781497304e-05, "loss": 0.3507, "step": 170 }, { "epoch": 1.8, "grad_norm": 3.3153152465820312, "learning_rate": 4.1960371354159715e-05, "loss": 0.305, "step": 180 }, { "epoch": 1.9, "grad_norm": 5.0271315574646, "learning_rate": 4.096131489334639e-05, "loss": 0.2648, "step": 190 }, { "epoch": 2.0, "grad_norm": 2.9085686206817627, "learning_rate": 3.996225843253306e-05, "loss": 0.1931, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.8925, "eval_f1": 0.823045267489712, "eval_loss": 0.30675894021987915, "eval_precision": 0.8333333333333334, "eval_recall": 0.8130081300813008, "eval_runtime": 1.5679, "eval_samples_per_second": 255.111, "eval_steps_per_second": 15.944, "step": 200 } ], "logging_steps": 10, "max_steps": 600, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 423630740901888.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 5.9943387648799594e-05, "metric": "eval/loss", "num_train_epochs": 6, "per_device_train_batch_size": 16, "seed": 31 } }