dataset: name: alpaca_clean dataset_config: name: default path: yahma/alpaca-cleaned chunk_size: 1024 concat_data: true cache_dir: "data/alpaca" pretrained_model_config: pretrained_model_name_or_path: "mistralai/Mistral-7B-v0.1" # will be updated based on model_config cache_dir: "/data_persistent2/sim_data/" preprocess_config: null dataloader: batch_size: 1 num_workers: 2 drop_last: false pin_memory: true optimizer: optim: adamw_torch_fused lr: 1e-4 weight_decay: 0.0 lr_scheduler: lr_scheduler_type: reduce_lr_on_plateau mode: min factor: 0.1 patience: 10 min_lr: 0.00001 trainer: # HuggingFace Trainer-like arguments name: default_lm bf16: true train_split: train val_split: validation num_train_epochs: 2 gradient_accumulation_steps: 8 seed: 42 batch_size: 1 load_best_model_at_end: true greater_is_better: false metric_for_best_model: eval/loss # eval/rouge/geometric_mean logging_steps: 100 evaluation_strategy: steps max_steps: -1 eval_steps: 100 max_eval_batches: null num_save_ckpt_steps: 200 finetune: method: lora kwargs: r: 8 lora_alpha: 16 lora_dropout: 0 # 0.05 target_modules: ["q_proj", "k_proj", "v_proj", "o_proj"] trainable_weights: ['feature_map_q.mlp.layer', 'feature_map_k.mlp.layer', 'window_factors']