Upload 3 files
Browse files- MIXTRAL_DatosEntrenamiento.txt +0 -0
- MIXTRAL_DatosValidacion.txt +0 -0
- MIXTRAL_Fine-Tuning.py +156 -0
MIXTRAL_DatosEntrenamiento.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
MIXTRAL_DatosValidacion.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
MIXTRAL_Fine-Tuning.py
ADDED
@@ -0,0 +1,156 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""MIXTRAL_Mixtral-8x7B (QLoRA)
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1GFRi-ND2WTbqCfPuhLkXvB6D3LieCpjK
|
8 |
+
|
9 |
+
This notebook shows how to fine-tune Mixtral-8x7b on a sample of ultrachat with QLoRA.
|
10 |
+
|
11 |
+
It requires at least 32 GB of VRAM (at least 2*16 GB GPUs if you want to use consumer hardware). On Google Colab, you can use the A100.
|
12 |
+
|
13 |
+
First, we need all these dependencies:
|
14 |
+
"""
|
15 |
+
|
16 |
+
!pip install -q bitsandbytes
|
17 |
+
!pip install -q transformers
|
18 |
+
!pip install -q peft
|
19 |
+
!pip install -q accelerate
|
20 |
+
!pip install -q datasets
|
21 |
+
!pip install -q trl
|
22 |
+
!pip install -q huggingface_hub
|
23 |
+
!pip install -q diffusers
|
24 |
+
|
25 |
+
import torch
|
26 |
+
from datasets import Dataset
|
27 |
+
from peft import LoraConfig, PeftModel, prepare_model_for_kbit_training
|
28 |
+
from transformers import (
|
29 |
+
AutoModelForCausalLM,
|
30 |
+
AutoTokenizer,
|
31 |
+
BitsAndBytesConfig,
|
32 |
+
TrainingArguments,
|
33 |
+
Trainer
|
34 |
+
)
|
35 |
+
from trl import SFTTrainer
|
36 |
+
|
37 |
+
"""Load the tokenizer and configure padding"""
|
38 |
+
|
39 |
+
import os
|
40 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
|
41 |
+
from peft import PeftModel, prepare_model_for_kbit_training, LoraConfig
|
42 |
+
|
43 |
+
# Assuming you have the correct token set as an environment variable or directly in your script
|
44 |
+
os.environ['HF_TOKEN'] = 'XXXX'
|
45 |
+
|
46 |
+
# Name of the model you want to load
|
47 |
+
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
48 |
+
|
49 |
+
try:
|
50 |
+
# Attempt to load the tokenizer
|
51 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id, force_download=True)
|
52 |
+
tokenizer.pad_token = tokenizer.unk_token
|
53 |
+
tokenizer.pad_token_id = tokenizer.unk_token_id
|
54 |
+
tokenizer.padding_side = 'right'
|
55 |
+
print("Tokenizer loaded successfully.")
|
56 |
+
|
57 |
+
# Attempt to load the model
|
58 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, force_download=True)
|
59 |
+
print("Model loaded successfully.")
|
60 |
+
except Exception as e:
|
61 |
+
print(f"Error loading the tokenizer or model: {e}")
|
62 |
+
|
63 |
+
"""Load and preprocess the version of ultrachat prepared by Hugging Face.
|
64 |
+
Since each row is a full dialog that can be very long, I only kept the first two turns to reduce the sequence length of the training examples.
|
65 |
+
"""
|
66 |
+
|
67 |
+
# Carga de datos
|
68 |
+
def load_custom_dataset(file_path):
|
69 |
+
with open(file_path, "r", encoding="utf-8") as file:
|
70 |
+
lines = file.readlines()
|
71 |
+
texts = [line.strip() for line in lines if line.strip()]
|
72 |
+
return Dataset.from_dict({"text": texts})
|
73 |
+
|
74 |
+
# Actualiza las rutas a los archivos correctos
|
75 |
+
dataset_train_sft = load_custom_dataset("MIXTRAL_DatosEntrenamiento.txt")
|
76 |
+
dataset_test_sft = load_custom_dataset("MIXTRAL_DatosValidacion.txt")
|
77 |
+
|
78 |
+
"""Load the model and prepare it to be fine-tuned with QLoRA."""
|
79 |
+
|
80 |
+
compute_dtype = getattr(torch, "float16")
|
81 |
+
bnb_config = BitsAndBytesConfig(
|
82 |
+
load_in_4bit=True,
|
83 |
+
bnb_4bit_quant_type="nf4",
|
84 |
+
bnb_4bit_compute_dtype=compute_dtype,
|
85 |
+
bnb_4bit_use_double_quant=True,
|
86 |
+
)
|
87 |
+
model = AutoModelForCausalLM.from_pretrained(
|
88 |
+
model_id, quantization_config=bnb_config, device_map={"": 0}
|
89 |
+
)
|
90 |
+
model = prepare_model_for_kbit_training(model)
|
91 |
+
model.config.pad_token_id = tokenizer.pad_token_id
|
92 |
+
model.config.use_cache = False # Gradient checkpointing is used by default but not compatible with caching
|
93 |
+
|
94 |
+
"""The following cell only prints the architecture of the model."""
|
95 |
+
|
96 |
+
print(model)
|
97 |
+
|
98 |
+
"""Define the configuration of LoRA."""
|
99 |
+
|
100 |
+
peft_config = LoraConfig(
|
101 |
+
lora_alpha=16,
|
102 |
+
lora_dropout=0.1,
|
103 |
+
r=64,
|
104 |
+
)
|
105 |
+
|
106 |
+
"""For this demonstration, I trained for only 300 steps. You should train for at least 3000 steps. One epoch would be ideal.
|
107 |
+
|
108 |
+
from sklearn.metrics import accuracy_score, f1_score
|
109 |
+
|
110 |
+
def compute_metrics(eval_pred):
|
111 |
+
logits, labels = eval_pred
|
112 |
+
predictions = np.argmax(logits, axis=-1)
|
113 |
+
return {
|
114 |
+
"accuracy": accuracy_score(labels, predictions),
|
115 |
+
"f1": f1_score(labels, predictions, average='macro')
|
116 |
+
}
|
117 |
+
"""
|
118 |
+
|
119 |
+
training_arguments = TrainingArguments(
|
120 |
+
output_dir="./results_mixtral_sft/",
|
121 |
+
evaluation_strategy="steps",
|
122 |
+
do_eval=True,
|
123 |
+
optim="paged_adamw_8bit",
|
124 |
+
num_train_epochs=1,
|
125 |
+
per_device_train_batch_size=4,
|
126 |
+
gradient_accumulation_steps=2,
|
127 |
+
per_device_eval_batch_size=4,
|
128 |
+
log_level="debug",
|
129 |
+
save_steps=1000,
|
130 |
+
logging_steps=100,
|
131 |
+
learning_rate=2e-4,
|
132 |
+
eval_steps=500,
|
133 |
+
max_steps=-1,
|
134 |
+
lr_scheduler_type="linear",
|
135 |
+
report_to="tensorboard" # Ensure TensorBoard is enabled
|
136 |
+
)
|
137 |
+
|
138 |
+
"""Start training:"""
|
139 |
+
|
140 |
+
trainer = SFTTrainer(
|
141 |
+
model=model,
|
142 |
+
train_dataset=dataset_train_sft,
|
143 |
+
eval_dataset=dataset_test_sft,
|
144 |
+
peft_config=peft_config,
|
145 |
+
dataset_text_field="text",
|
146 |
+
max_seq_length=512,
|
147 |
+
tokenizer=tokenizer,
|
148 |
+
args=training_arguments,
|
149 |
+
)
|
150 |
+
|
151 |
+
trainer.train()
|
152 |
+
|
153 |
+
# Commented out IPython magic to ensure Python compatibility.
|
154 |
+
# Activar TensorBoard para visualizar gráficos
|
155 |
+
# %load_ext tensorboard
|
156 |
+
# %tensorboard --logdir results_mixtral_sft/runs
|