eibeel commited on
Commit
09e20c2
1 Parent(s): 33a00b6

Upload 3 files

Browse files
LLAMA_DatosEntrenamiento.txt ADDED
The diff for this file is too large to render. See raw diff
 
LLAMA_DatosValidacion.txt ADDED
The diff for this file is too large to render. See raw diff
 
LLAMA_Fine-Tuning.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """LLAMA_Fine-Tuning.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1C-kNPOgPiCC9ybxVKhOkWB9ts53APbOb
8
+
9
+ # Fine-tune Llama 2 in Google Colab
10
+ """
11
+
12
+ !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
13
+
14
+ !pip install datasets
15
+
16
+ !pip install --upgrade accelerate peft bitsandbytes transformers trl
17
+
18
+ import os
19
+ import torch
20
+ from datasets import load_dataset
21
+ from transformers import (
22
+ AutoModelForCausalLM,
23
+ AutoTokenizer,
24
+ BitsAndBytesConfig,
25
+ HfArgumentParser,
26
+ TrainingArguments,
27
+ pipeline,
28
+ logging,
29
+ )
30
+ from peft import LoraConfig, PeftModel
31
+ from trl import SFTTrainer
32
+
33
+ # The model that you want to train from the Hugging Face hub
34
+ model_name = "unsloth/llama-3-8b-bnb-4bit"
35
+
36
+ # The Hugging Face token
37
+ token_name = "XXXX"
38
+
39
+
40
+ # Fine-tuned model name
41
+ new_model = "llama-2-7b-minipython"
42
+
43
+ ################################################################################
44
+ # QLoRA parameters
45
+ ################################################################################
46
+
47
+ # LoRA attention dimension
48
+ lora_r = 64
49
+
50
+ # Alpha parameter for LoRA scaling
51
+ lora_alpha = 16
52
+
53
+ # Dropout probability for LoRA layers
54
+ lora_dropout = 0.1
55
+
56
+ ################################################################################
57
+ # bitsandbytes parameters
58
+ ################################################################################
59
+
60
+ # Activate 4-bit precision base model loading
61
+ use_4bit = True
62
+
63
+ # Compute dtype for 4-bit base models
64
+ bnb_4bit_compute_dtype = "float16"
65
+
66
+ # Quantization type (fp4 or nf4)
67
+ bnb_4bit_quant_type = "nf4"
68
+
69
+ # Activate nested quantization for 4-bit base models (double quantization)
70
+ use_nested_quant = False
71
+
72
+ ################################################################################
73
+ # TrainingArguments parameters
74
+ ################################################################################
75
+
76
+ # Output directory where the model predictions and checkpoints will be stored
77
+ output_dir = "./results"
78
+
79
+ # Number of training epochs
80
+ num_train_epochs = 1
81
+
82
+ # Enable fp16/bf16 training (set bf16 to True with an A100)
83
+ fp16 = False
84
+ bf16 = False
85
+
86
+ # Batch size per GPU for training
87
+ per_device_train_batch_size = 4
88
+
89
+ # Batch size per GPU for evaluation
90
+ per_device_eval_batch_size = 4
91
+
92
+ # Number of update steps to accumulate the gradients for
93
+ gradient_accumulation_steps = 1
94
+
95
+ # Enable gradient checkpointing
96
+ gradient_checkpointing = True
97
+
98
+ # Maximum gradient normal (gradient clipping)
99
+ max_grad_norm = 0.3
100
+
101
+ # Initial learning rate (AdamW optimizer)
102
+ learning_rate = 2e-4
103
+
104
+ # Weight decay to apply to all layers except bias/LayerNorm weights
105
+ weight_decay = 0.001
106
+
107
+ # Optimizer to use
108
+ optim = "paged_adamw_32bit"
109
+
110
+ # Learning rate schedule
111
+ lr_scheduler_type = "cosine"
112
+
113
+ # Number of training steps (overrides num_train_epochs)
114
+ max_steps = -1
115
+
116
+ # Ratio of steps for a linear warmup (from 0 to learning rate)
117
+ warmup_ratio = 0.03
118
+
119
+ # Group sequences into batches with same length
120
+ # Saves memory and speeds up training considerably
121
+ group_by_length = True
122
+
123
+ # Save checkpoint every X updates steps
124
+ save_steps = 0
125
+
126
+ # Log every X updates steps
127
+ logging_steps = 25
128
+
129
+ ################################################################################
130
+ # SFT parameters
131
+ ################################################################################
132
+
133
+ # Maximum sequence length to use
134
+ max_seq_length = None
135
+
136
+ # Pack multiple short examples in the same input sequence to increase efficiency
137
+ packing = False
138
+
139
+ # Load the entire model on the GPU 0
140
+ device_map = {"": 0}
141
+
142
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding
143
+ from datasets import Dataset
144
+
145
+ def load_text_file(file_path):
146
+ with open(file_path, 'r', encoding='utf-8') as f:
147
+ return [line.strip() for line in f if line.strip()]
148
+
149
+ train_texts = load_text_file('LLAMA_DatosEntrenamiento.txt')
150
+ val_texts = load_text_file('LLAMA_DatosValidacion.txt')
151
+
152
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=token_name)
153
+
154
+ def tokenize_and_encode(texts):
155
+ encodings = tokenizer(texts, truncation=True, padding="longest", max_length=512, return_tensors="pt")
156
+ encodings['labels'] = encodings['input_ids'].clone() # Duplicar input_ids para usar como labels
157
+ return encodings
158
+
159
+ train_encodings = tokenize_and_encode(train_texts)
160
+ val_encodings = tokenize_and_encode(val_texts)
161
+
162
+ train_dataset = Dataset.from_dict({key: val.numpy() for key, val in train_encodings.items()})
163
+ val_dataset = Dataset.from_dict({key: val.numpy() for key, val in val_encodings.items()})
164
+
165
+ training_arguments = TrainingArguments(
166
+ output_dir=output_dir,
167
+ evaluation_strategy="steps", # Evaluar basado en el número de pasos
168
+ eval_steps=500, # Evaluar cada 500 pasos
169
+ num_train_epochs=1,
170
+ per_device_train_batch_size=2,
171
+ logging_steps=logging_steps,
172
+ save_steps=1000, # Guardar el modelo cada 1000 pasos para reducir la frecuencia de escritura en disco
173
+ learning_rate=2e-4,
174
+ weight_decay=0.001,
175
+ lr_scheduler_type="cosine",
176
+ warmup_ratio=0.03,
177
+ report_to="tensorboard",
178
+ fp16=False # Desactivar la precisión mixta para simplificar el entrenamiento
179
+ )
180
+
181
+ model = AutoModelForCausalLM.from_pretrained(model_name, token=token_name)
182
+
183
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
184
+
185
+ trainer = Trainer(
186
+ model=model,
187
+ args=training_arguments,
188
+ train_dataset=train_dataset,
189
+ eval_dataset=val_dataset,
190
+ data_collator=data_collator
191
+ )
192
+
193
+ trainer.train()
194
+
195
+ model.save_pretrained(new_model)
196
+
197
+ # Commented out IPython magic to ensure Python compatibility.
198
+ # %load_ext tensorboard
199
+ # %tensorboard --logdir results/runs
200
+
201
+ train_texts[2]
202
+
203
+ # Ignore warnings
204
+ logging.set_verbosity(logging.CRITICAL)
205
+
206
+ # Run text generation pipeline with our next model
207
+ prompt = "Como puedo encontrar trabajo de ingeniero?"
208
+ pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
209
+ result = pipe(f"<s>[INST] {prompt} [/INST]")
210
+ print(result[0]['generated_text'])
211
+
212
+ # Empty VRAM
213
+ del model
214
+ del pipe
215
+ del trainer
216
+ import gc
217
+ gc.collect()
218
+ gc.collect()
219
+
220
+ # Reload model in FP16 and merge it with LoRA weights
221
+ base_model = AutoModelForCausalLM.from_pretrained(
222
+ model_name,
223
+ low_cpu_mem_usage=True,
224
+ return_dict=True,
225
+ torch_dtype=torch.float16,
226
+ device_map=device_map,
227
+ )
228
+ model = PeftModel.from_pretrained(base_model, new_model)
229
+ model = model.merge_and_unload()
230
+
231
+ # Reload tokenizer to save it
232
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
233
+ tokenizer.pad_token = tokenizer.eos_token
234
+ tokenizer.padding_side = "right"