{ "best_metric": 0.6307053941908713, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-qtr0udgj/checkpoint-200", "epoch": 2.0, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 2.049558639526367, "learning_rate": 8.243172962606281e-06, "loss": 0.663, "step": 10 }, { "epoch": 0.2, "grad_norm": 1.8758213520050049, "learning_rate": 7.809321754048056e-06, "loss": 0.5872, "step": 20 }, { "epoch": 0.3, "grad_norm": 1.6676493883132935, "learning_rate": 7.375470545489831e-06, "loss": 0.5806, "step": 30 }, { "epoch": 0.4, "grad_norm": 2.160999298095703, "learning_rate": 6.941619336931606e-06, "loss": 0.5375, "step": 40 }, { "epoch": 0.5, "grad_norm": 3.5581603050231934, "learning_rate": 6.507768128373381e-06, "loss": 0.5219, "step": 50 }, { "epoch": 0.6, "grad_norm": 3.43050479888916, "learning_rate": 6.0739169198151545e-06, "loss": 0.4933, "step": 60 }, { "epoch": 0.7, "grad_norm": 3.4380669593811035, "learning_rate": 5.64006571125693e-06, "loss": 0.4417, "step": 70 }, { "epoch": 0.8, "grad_norm": 3.310192823410034, "learning_rate": 5.2062145026987045e-06, "loss": 0.484, "step": 80 }, { "epoch": 0.9, "grad_norm": 5.5063323974609375, "learning_rate": 4.772363294140479e-06, "loss": 0.5157, "step": 90 }, { "epoch": 1.0, "grad_norm": 2.589657783508301, "learning_rate": 4.3385120855822536e-06, "loss": 0.4096, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.755, "eval_f1": 0.5882352941176471, "eval_loss": 0.4866395890712738, "eval_precision": 0.6086956521739131, "eval_recall": 0.5691056910569106, "eval_runtime": 1.5211, "eval_samples_per_second": 262.961, "eval_steps_per_second": 16.435, "step": 100 }, { "epoch": 1.1, "grad_norm": 3.2699267864227295, "learning_rate": 3.904660877024028e-06, "loss": 0.4491, "step": 110 }, { "epoch": 1.2, "grad_norm": 3.8382904529571533, "learning_rate": 3.470809668465803e-06, "loss": 0.4236, "step": 120 }, { "epoch": 1.3, "grad_norm": 3.1422176361083984, "learning_rate": 3.0369584599075772e-06, "loss": 0.4383, "step": 130 }, { "epoch": 1.4, "grad_norm": 2.356064558029175, "learning_rate": 2.6031072513493522e-06, "loss": 0.4434, "step": 140 }, { "epoch": 1.5, "grad_norm": 3.1500184535980225, "learning_rate": 2.1692560427911268e-06, "loss": 0.3509, "step": 150 }, { "epoch": 1.6, "grad_norm": 2.9784765243530273, "learning_rate": 1.7354048342329016e-06, "loss": 0.4451, "step": 160 }, { "epoch": 1.7, "grad_norm": 8.244532585144043, "learning_rate": 1.3015536256746761e-06, "loss": 0.381, "step": 170 }, { "epoch": 1.8, "grad_norm": 8.15170669555664, "learning_rate": 8.677024171164508e-07, "loss": 0.4256, "step": 180 }, { "epoch": 1.9, "grad_norm": 3.7399942874908447, "learning_rate": 4.338512085582254e-07, "loss": 0.3909, "step": 190 }, { "epoch": 2.0, "grad_norm": 3.0128397941589355, "learning_rate": 0.0, "loss": 0.3897, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.7775, "eval_f1": 0.6307053941908713, "eval_loss": 0.4450126588344574, "eval_precision": 0.6440677966101694, "eval_recall": 0.6178861788617886, "eval_runtime": 1.5626, "eval_samples_per_second": 255.985, "eval_steps_per_second": 15.999, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 423630740901888.0, "train_batch_size": 16, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 8.677024171164507e-06, "metric": "eval/loss", "num_train_epochs": 2, "per_device_train_batch_size": 16, "seed": 37 } }