{ "best_metric": 80.53869377255532, "best_model_checkpoint": "/root/turkic_qa/en_uzn_models/en_uzn_xlm_roberta_large_squad_model/checkpoint-2760", "epoch": 5.0, "eval_steps": 500, "global_step": 3450, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "step": 690, "train_exact_match": 69.83016983016984, "train_f1": 83.62002651699788, "train_runtime": 28.9765, "train_samples_per_second": 43.553, "train_steps_per_second": 1.587 }, { "epoch": 1.0, "grad_norm": 4.982762813568115, "learning_rate": 1e-05, "loss": 1.164, "step": 690 }, { "epoch": 1.0, "eval_exact_match": 64.75, "eval_f1": 78.91171401645128, "eval_runtime": 92.299, "eval_samples_per_second": 43.901, "eval_steps_per_second": 1.571, "step": 690 }, { "epoch": 2.0, "step": 1380, "train_exact_match": 77.22277722277722, "train_f1": 89.40203728575936, "train_runtime": 29.1697, "train_samples_per_second": 43.641, "train_steps_per_second": 1.577 }, { "epoch": 2.0, "grad_norm": 21.06043243408203, "learning_rate": 7.500000000000001e-06, "loss": 0.7976, "step": 1380 }, { "epoch": 2.0, "eval_exact_match": 66.375, "eval_f1": 79.72828200772302, "eval_runtime": 92.6779, "eval_samples_per_second": 43.721, "eval_steps_per_second": 1.565, "step": 1380 }, { "epoch": 3.0, "step": 2070, "train_exact_match": 82.71728271728271, "train_f1": 92.4650831936441, "train_runtime": 29.7118, "train_samples_per_second": 43.585, "train_steps_per_second": 1.582 }, { "epoch": 3.0, "grad_norm": 33.353965759277344, "learning_rate": 5e-06, "loss": 0.5401, "step": 2070 }, { "epoch": 3.0, "eval_exact_match": 66.46875, "eval_f1": 80.21293859314422, "eval_runtime": 92.4601, "eval_samples_per_second": 43.824, "eval_steps_per_second": 1.568, "step": 2070 }, { "epoch": 4.0, "step": 2760, "train_exact_match": 87.61238761238761, "train_f1": 95.04445785242362, "train_runtime": 29.8676, "train_samples_per_second": 43.358, "train_steps_per_second": 1.574 }, { "epoch": 4.0, "grad_norm": 337.98419189453125, "learning_rate": 2.5e-06, "loss": 0.38, "step": 2760 }, { "epoch": 4.0, "eval_exact_match": 67.125, "eval_f1": 80.53869377255532, "eval_runtime": 92.9481, "eval_samples_per_second": 43.594, "eval_steps_per_second": 1.56, "step": 2760 }, { "epoch": 5.0, "step": 3450, "train_exact_match": 89.01098901098901, "train_f1": 95.70481727554154, "train_runtime": 28.4706, "train_samples_per_second": 43.519, "train_steps_per_second": 1.581 }, { "epoch": 5.0, "grad_norm": 27.529285430908203, "learning_rate": 0.0, "loss": 0.2769, "step": 3450 }, { "epoch": 5.0, "eval_exact_match": 66.5625, "eval_f1": 80.1141164891758, "eval_runtime": 92.4845, "eval_samples_per_second": 43.813, "eval_steps_per_second": 1.568, "step": 3450 }, { "epoch": 5.0, "step": 3450, "total_flos": 6.719078999672064e+16, "train_loss": 0.6317114832781363, "train_runtime": 6178.5968, "train_samples_per_second": 15.613, "train_steps_per_second": 0.558 } ], "logging_steps": 500, "max_steps": 3450, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 6.719078999672064e+16, "train_batch_size": 28, "trial_name": null, "trial_params": null }