{ "model_name": "unsloth/Llama-3.2-1B-Instruct", "max_seq_length": 2048, "dtype": null, "load_in_4bit": false, "lora_params": { "r": 16, "target_modules": [ "q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj" ], "lora_alpha": 16, "lora_dropout": 0, "bias": "none", "use_gradient_checkpointing": "unsloth", "random_state": 3407, "use_rslora": false }, "training_args": { "per_device_train_batch_size": 4, "gradient_accumulation_steps": 4, "warmup_steps": 5, "num_train_epochs": 1, "learning_rate": 0.0002, "fp16": false, "bf16": false, "logging_steps": 3, "optim": "adamw_8bit", "weight_decay": 0.01, "lr_scheduler_type": "linear", "seed": 3407, "output_dir": "outputs", "report_to": "none" }, "data_config": { "dataset_name": "mlabonne/FineTome-100k", "dataset_split": "train", "dataset_rows": 15000, "conversation_format": "llama-3.1", "max_seq_length": 2048, "train_on_responses_only": true }, "inference_config": { "temperature": 1.5, "min_p": 0.1, "max_new_tokens": 600 } }