adapter_name_or_path: saves\LLaMA3.1-8B-Chat\lora\Luminia-8B-RP bf16: true cutoff_len: 4000 dataset: qa-unc-dpo dataset_dir: data ddp_timeout: 180000000 do_train: true finetuning_type: lora flash_attn: fa2 gradient_accumulation_steps: 1 include_num_input_tokens_seen: true learning_rate: 5.0e-05 logging_steps: 10 lora_alpha: 64 lora_dropout: 0.35 lora_rank: 32 lora_target: all lr_scheduler_type: cosine max_grad_norm: 1.0 max_samples: 100000 model_name_or_path: NousResearch/Meta-Llama-3.1-8B-Instruct neftune_noise_alpha: 5 num_train_epochs: 1.0 optim: adamw_8bit output_dir: saves\LLaMA3.1-8B-Chat\lora\Luminia-8B-RP-DPO packing: true per_device_train_batch_size: 1 plot_loss: true pref_beta: 0.1 pref_ftx: 0 pref_loss: orpo preprocessing_num_workers: 16 quantization_bit: 4 quantization_method: bitsandbytes report_to: none rope_scaling: linear save_steps: 1000 stage: dpo template: alpaca warmup_steps: 0