arg v0.3
Browse files
README.md
CHANGED
@@ -1,10 +1,6 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
-
|
4 |
-
|
5 |
-
---
|
6 |
-
license: apache-2.0
|
7 |
-
base_model: NousResearch/Meta-Llama-3.1-8B-Instruct
|
8 |
library_name: peft
|
9 |
tags:
|
10 |
- llama-factory
|
@@ -20,14 +16,14 @@ Fine-tuning of ‘Llama-3.1-8B’ with a focus on RP and uncensored.
|
|
20 |
<details>
|
21 |
<summary>This training can be replicated using LLaMA-Factory. </summary>
|
22 |
|
23 |
-
Stage A SFT
|
24 |
```
|
25 |
-
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage sft --do_train True --model_name_or_path
|
26 |
```
|
27 |
|
28 |
-
Stage B:
|
29 |
```
|
30 |
-
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage dpo --do_train True --model_name_or_path
|
31 |
```
|
32 |
|
33 |
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
|
|
|
|
|
|
|
|
|
4 |
library_name: peft
|
5 |
tags:
|
6 |
- llama-factory
|
|
|
16 |
<details>
|
17 |
<summary>This training can be replicated using LLaMA-Factory. </summary>
|
18 |
|
19 |
+
Stage A: SFT
|
20 |
```
|
21 |
+
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage sft --do_train True --model_name_or_path meta-llama/Meta-Llama-3.1-8B-Instruct --preprocessing_num_workers 16 --finetuning_type lora --template alpaca --rope_scaling linear --flash_attn fa2 --dataset_dir data --dataset psy_mental_health,faproulette_co-OCR-fixer,ascii_art,Uncensored_DAN,Lumimaid-v2,Degrees_of_Lewdity,qa-unc-sft --cutoff_len 8192 --learning_rate 5e-05 --num_train_epochs 1.0 --max_samples 100000 --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 10 --save_steps 1000 --warmup_steps 1000 --neftune_noise_alpha 5 --optim adamw_8bit --packing True --neat_packing True --report_to none --output_dir saves\LLaMA3.1-8B-Chat\lora\Luminia-8B-RP --bf16 True --plot_loss True --ddp_timeout 180000000 --include_num_input_tokens_seen True --quantization_bit 4 --quantization_method bitsandbytes --lora_rank 32 --lora_alpha 64 --lora_dropout 0.15 --lora_target all --use_adam_mini True --create_new_adapter True
|
22 |
```
|
23 |
|
24 |
+
Stage B: Continued, `orpo`
|
25 |
```
|
26 |
+
set CUDA_VISIBLE_DEVICES=0 && llamafactory-cli train --stage dpo --do_train True --model_name_or_path meta-llama/Meta-Llama-3.1-8B-Instruct --preprocessing_num_workers 16 --finetuning_type lora --template alpaca --rope_scaling linear --flash_attn fa2 --dataset_dir data --dataset qa-unc-dpo --cutoff_len 4000 --learning_rate 5e-05 --num_train_epochs 1.0 --max_samples 100000 --per_device_train_batch_size 1 --gradient_accumulation_steps 1 --lr_scheduler_type cosine --max_grad_norm 1.0 --logging_steps 10 --save_steps 1000 --warmup_steps 0 --neftune_noise_alpha 5 --optim adamw_8bit --packing True --report_to none --output_dir saves\LLaMA3.1-8B-Chat\lora\Luminia-8B-RP-DPO --bf16 True --plot_loss True --ddp_timeout 180000000 --include_num_input_tokens_seen True --quantization_bit 4 --quantization_method bitsandbytes --lora_rank 32 --lora_alpha 64 --lora_dropout 0.35 --lora_target all --pref_beta 0.1 --pref_ftx 0 --pref_loss orpo --adapter_name_or_path saves\LLaMA3.1-8B-Chat\lora\Luminia-8B-RP
|
27 |
```
|
28 |
|
29 |
|