Iker commited on
Commit
3786308
1 Parent(s): d2c5c92

Upload openchat-3.5-0106_LoRA.yaml

Browse files
Files changed (1) hide show
  1. openchat-3.5-0106_LoRA.yaml +90 -0
openchat-3.5-0106_LoRA.yaml ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #Training args
2
+ model_name_or_path: openchat/openchat-3.5-0106
3
+ torch_dtype: bfloat16
4
+ use_lora: true
5
+ quantization: 4
6
+ quantization_inference: null
7
+ gradient_checkpointing: true
8
+ force_auto_device_map: false
9
+ use_flash_attention: true
10
+ generation_config: generation_config.json
11
+ stop_words:
12
+ - "<|end_of_turn|>"
13
+ - "GPT4 Correct User:"
14
+ - "GPT4 Correct Assistant:"
15
+ - "</s>"
16
+ - "<s>"
17
+ - "\\n"
18
+
19
+ # dataset arguments
20
+ train_datasets:
21
+ - train
22
+ validation_datasets:
23
+ - validation
24
+ test_datasets:
25
+ - test
26
+
27
+ max_seq_length: 8192
28
+ generation_max_length: 8192
29
+ prompt_loss_weight: 0.0
30
+
31
+ # checkpoint settings
32
+ output_dir: results/finetune/openchat-3.5-0106_Lora
33
+ overwrite_output_dir: true
34
+ load_best_model_at_end: false
35
+ metric_for_best_model: eval_validation_predictions_validation/rouge
36
+ greater_is_better: true
37
+ save_strategy: "epoch"
38
+ save_only_model: true
39
+ save_total_limit: 1
40
+
41
+ # evaluation
42
+ do_train: true
43
+ do_eval: true
44
+ do_predict: true
45
+ evaluation_strategy: "epoch"
46
+ predict_with_generate: true
47
+ evaluate_all_checkpoints: true
48
+
49
+ # batch size: 2 batch size * 16 gradaccum * 2 GPUs = 64
50
+ per_device_train_batch_size: 8
51
+ per_device_eval_batch_size: 4
52
+ gradient_accumulation_steps: 8
53
+ generation_num_beams: 1
54
+
55
+ # optimizer settings
56
+
57
+ optim: adamw_torch_fused
58
+ learning_rate: 0.0003
59
+ weight_decay: 0.001
60
+ num_train_epochs: 3
61
+ lr_scheduler_type: cosine
62
+ warmup_ratio: 0.1
63
+ adam_beta1: 0.9
64
+ adam_beta2: 0.95
65
+ adam_epsilon: 1e-12
66
+
67
+ # lora settings
68
+ lora_r: 128
69
+ lora_alpha: 256
70
+ lora_dropout: 0.05
71
+ lora_target_modules:
72
+ - all
73
+
74
+ # reporting
75
+ logging_strategy: steps
76
+ logging_first_step: true
77
+ logging_steps: 5
78
+ report_to: wandb
79
+ run_name: "openchat-3.5-0106_Lora"
80
+ disable_tqdm: false
81
+
82
+ # hub settings
83
+ push_to_hub: false
84
+ resume_from_checkpoint: false
85
+
86
+ # performance
87
+ bf16: true
88
+ fp16: false
89
+ torch_compile: false
90
+ ddp_find_unused_parameters: false