{ "best_metric": 0.7773279352226721, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-vrcn3dxi/checkpoint-300", "epoch": 3.0, "eval_steps": 500, "global_step": 300, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 2.9803097248077393, "learning_rate": 5.0385074972904444e-05, "loss": 0.6147, "step": 10 }, { "epoch": 0.2, "grad_norm": 2.1125147342681885, "learning_rate": 4.9356808136722715e-05, "loss": 0.4868, "step": 20 }, { "epoch": 0.3, "grad_norm": 2.646878719329834, "learning_rate": 4.832854130054099e-05, "loss": 0.4208, "step": 30 }, { "epoch": 0.4, "grad_norm": 2.7291347980499268, "learning_rate": 4.730027446435928e-05, "loss": 0.4988, "step": 40 }, { "epoch": 0.5, "grad_norm": 4.168182849884033, "learning_rate": 4.627200762817755e-05, "loss": 0.3829, "step": 50 }, { "epoch": 0.6, "grad_norm": 5.242916584014893, "learning_rate": 4.524374079199583e-05, "loss": 0.371, "step": 60 }, { "epoch": 0.7, "grad_norm": 6.568434715270996, "learning_rate": 4.42154739558141e-05, "loss": 0.3587, "step": 70 }, { "epoch": 0.8, "grad_norm": 6.341504096984863, "learning_rate": 4.318720711963238e-05, "loss": 0.2841, "step": 80 }, { "epoch": 0.9, "grad_norm": 6.951382160186768, "learning_rate": 4.2158940283450655e-05, "loss": 0.5014, "step": 90 }, { "epoch": 1.0, "grad_norm": 2.481485605239868, "learning_rate": 4.113067344726893e-05, "loss": 0.3564, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.825, "eval_f1": 0.75177304964539, "eval_loss": 0.38982895016670227, "eval_precision": 0.6666666666666666, "eval_recall": 0.8617886178861789, "eval_runtime": 1.5252, "eval_samples_per_second": 262.256, "eval_steps_per_second": 16.391, "step": 100 }, { "epoch": 1.1, "grad_norm": 3.624406099319458, "learning_rate": 4.010240661108721e-05, "loss": 0.3017, "step": 110 }, { "epoch": 1.2, "grad_norm": 11.477983474731445, "learning_rate": 3.907413977490549e-05, "loss": 0.414, "step": 120 }, { "epoch": 1.3, "grad_norm": 3.667001724243164, "learning_rate": 3.804587293872376e-05, "loss": 0.4467, "step": 130 }, { "epoch": 1.4, "grad_norm": 2.0119214057922363, "learning_rate": 3.701760610254204e-05, "loss": 0.2403, "step": 140 }, { "epoch": 1.5, "grad_norm": 4.618969440460205, "learning_rate": 3.5989339266360316e-05, "loss": 0.2895, "step": 150 }, { "epoch": 1.6, "grad_norm": 6.767045497894287, "learning_rate": 3.4961072430178594e-05, "loss": 0.2201, "step": 160 }, { "epoch": 1.7, "grad_norm": 4.485604286193848, "learning_rate": 3.393280559399687e-05, "loss": 0.3093, "step": 170 }, { "epoch": 1.8, "grad_norm": 4.330203056335449, "learning_rate": 3.290453875781515e-05, "loss": 0.2316, "step": 180 }, { "epoch": 1.9, "grad_norm": 1.8137776851654053, "learning_rate": 3.187627192163342e-05, "loss": 0.2012, "step": 190 }, { "epoch": 2.0, "grad_norm": 0.9346457719802856, "learning_rate": 3.08480050854517e-05, "loss": 0.2902, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.8525, "eval_f1": 0.7686274509803922, "eval_loss": 0.34935688972473145, "eval_precision": 0.7424242424242424, "eval_recall": 0.7967479674796748, "eval_runtime": 1.5077, "eval_samples_per_second": 265.312, "eval_steps_per_second": 16.582, "step": 200 }, { "epoch": 2.1, "grad_norm": 2.6348159313201904, "learning_rate": 2.9819738249269974e-05, "loss": 0.144, "step": 210 }, { "epoch": 2.2, "grad_norm": 6.430622100830078, "learning_rate": 2.8791471413088256e-05, "loss": 0.1821, "step": 220 }, { "epoch": 2.3, "grad_norm": 4.538208484649658, "learning_rate": 2.776320457690653e-05, "loss": 0.3038, "step": 230 }, { "epoch": 2.4, "grad_norm": 3.8831608295440674, "learning_rate": 2.673493774072481e-05, "loss": 0.1824, "step": 240 }, { "epoch": 2.5, "grad_norm": 1.0167384147644043, "learning_rate": 2.5706670904543083e-05, "loss": 0.1242, "step": 250 }, { "epoch": 2.6, "grad_norm": 1.3187135457992554, "learning_rate": 2.4678404068361358e-05, "loss": 0.145, "step": 260 }, { "epoch": 2.7, "grad_norm": 6.20335054397583, "learning_rate": 2.365013723217964e-05, "loss": 0.2031, "step": 270 }, { "epoch": 2.8, "grad_norm": 15.89792251586914, "learning_rate": 2.2621870395997914e-05, "loss": 0.1694, "step": 280 }, { "epoch": 2.9, "grad_norm": 8.923992156982422, "learning_rate": 2.159360355981619e-05, "loss": 0.1949, "step": 290 }, { "epoch": 3.0, "grad_norm": 0.10263983905315399, "learning_rate": 2.0565336723634466e-05, "loss": 0.0461, "step": 300 }, { "epoch": 3.0, "eval_accuracy": 0.8625, "eval_f1": 0.7773279352226721, "eval_loss": 0.46338924765586853, "eval_precision": 0.7741935483870968, "eval_recall": 0.7804878048780488, "eval_runtime": 1.5254, "eval_samples_per_second": 262.232, "eval_steps_per_second": 16.39, "step": 300 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 635446111352832.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 5.1413341809086166e-05, "metric": "eval/loss", "num_train_epochs": 5, "per_device_train_batch_size": 16, "seed": 1 } }