{"per_device_train_batch_size": 8, "per_device_eval_batch_size": 8, "gradient_accumulation_steps": 4, "learning_rate": 0.001, "num_train_epochs": 12, "max_steps": -1} |
{"per_device_train_batch_size": 8, "per_device_eval_batch_size": 8, "gradient_accumulation_steps": 4, "learning_rate": 0.001, "num_train_epochs": 12, "max_steps": -1} |