# -*- coding: utf-8 -*- """LLAMA_Fine-Tuning.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1C-kNPOgPiCC9ybxVKhOkWB9ts53APbOb # Fine-tune Llama 3 in Google Colab """ !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 !pip install datasets !pip install --upgrade accelerate peft bitsandbytes transformers trl import os import torch from datasets import load_dataset from transformers import ( AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline, logging, ) from peft import LoraConfig, PeftModel from trl import SFTTrainer # The model that you want to train from the Hugging Face hub model_name = "unsloth/llama-3-8b-bnb-4bit" # The Hugging Face token token_name = "XXXX" # Fine-tuned model name new_model = "llama3_python_TFG" ################################################################################ # QLoRA parameters ################################################################################ # LoRA attention dimension lora_r = 64 # Alpha parameter for LoRA scaling lora_alpha = 16 # Dropout probability for LoRA layers lora_dropout = 0.1 ################################################################################ # bitsandbytes parameters ################################################################################ # Activate 4-bit precision base model loading use_4bit = True # Compute dtype for 4-bit base models bnb_4bit_compute_dtype = "float16" # Quantization type (fp4 or nf4) bnb_4bit_quant_type = "nf4" # Activate nested quantization for 4-bit base models (double quantization) use_nested_quant = False ################################################################################ # TrainingArguments parameters ################################################################################ # Output directory where the model predictions and checkpoints will be stored output_dir = "./results" # Number of training epochs num_train_epochs = 1 # Enable fp16/bf16 training (set bf16 to True with an A100) fp16 = False bf16 = False # Batch size per GPU for training per_device_train_batch_size = 4 # Batch size per GPU for evaluation per_device_eval_batch_size = 4 # Number of update steps to accumulate the gradients for gradient_accumulation_steps = 1 # Enable gradient checkpointing gradient_checkpointing = True # Maximum gradient normal (gradient clipping) max_grad_norm = 0.3 # Initial learning rate (AdamW optimizer) learning_rate = 2e-4 # Weight decay to apply to all layers except bias/LayerNorm weights weight_decay = 0.001 # Optimizer to use optim = "paged_adamw_32bit" # Learning rate schedule lr_scheduler_type = "cosine" # Number of training steps (overrides num_train_epochs) max_steps = -1 # Ratio of steps for a linear warmup (from 0 to learning rate) warmup_ratio = 0.03 # Group sequences into batches with same length # Saves memory and speeds up training considerably group_by_length = True # Save checkpoint every X updates steps save_steps = 0 # Log every X updates steps logging_steps = 25 ################################################################################ # SFT parameters ################################################################################ # Maximum sequence length to use max_seq_length = None # Pack multiple short examples in the same input sequence to increase efficiency packing = False # Load the entire model on the GPU 0 device_map = {"": 0} from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding from datasets import Dataset def load_text_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: return [line.strip() for line in f if line.strip()] train_texts = load_text_file('LLAMA_DatosEntrenamiento.txt') val_texts = load_text_file('LLAMA_DatosValidacion.txt') tokenizer = AutoTokenizer.from_pretrained(model_name, token=token_name) def tokenize_and_encode(texts): encodings = tokenizer(texts, truncation=True, padding="longest", max_length=512, return_tensors="pt") encodings['labels'] = encodings['input_ids'].clone() # Duplicar input_ids para usar como labels return encodings train_encodings = tokenize_and_encode(train_texts) val_encodings = tokenize_and_encode(val_texts) train_dataset = Dataset.from_dict({key: val.numpy() for key, val in train_encodings.items()}) val_dataset = Dataset.from_dict({key: val.numpy() for key, val in val_encodings.items()}) training_arguments = TrainingArguments( output_dir=output_dir, evaluation_strategy="steps", # Evaluar basado en el número de pasos eval_steps=500, # Evaluar cada 500 pasos num_train_epochs=1, per_device_train_batch_size=2, logging_steps=logging_steps, save_steps=1000, # Guardar el modelo cada 1000 pasos para reducir la frecuencia de escritura en disco learning_rate=2e-4, weight_decay=0.001, lr_scheduler_type="cosine", warmup_ratio=0.03, report_to="tensorboard", fp16=False # Desactivar la precisión mixta para simplificar el entrenamiento ) model = AutoModelForCausalLM.from_pretrained(model_name, token=token_name) data_collator = DataCollatorWithPadding(tokenizer=tokenizer) trainer = Trainer( model=model, args=training_arguments, train_dataset=train_dataset, eval_dataset=val_dataset, data_collator=data_collator ) trainer.train() model.save_pretrained(new_model) from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer, DataCollatorWithPadding from datasets import Dataset def load_text_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: return [line.strip() for line in f if line.strip()] train_texts = load_text_file('LLAMA_DatosEntrenamiento.txt') val_texts = load_text_file('LLAMA_DatosValidacion.txt') tokenizer = AutoTokenizer.from_pretrained(model_name, token=token_name) def tokenize_and_encode(texts): encodings = tokenizer(texts, truncation=True, padding="longest", max_length=512, return_tensors="pt") encodings['labels'] = encodings['input_ids'].clone() # Duplicar input_ids para usar como labels return encodings train_encodings = tokenize_and_encode(train_texts) val_encodings = tokenize_and_encode(val_texts) train_dataset = Dataset.from_dict({key: val.numpy() for key, val in train_encodings.items()}) val_dataset = Dataset.from_dict({key: val.numpy() for key, val in val_encodings.items()}) training_arguments = TrainingArguments( output_dir=output_dir, evaluation_strategy="steps", # Evaluar basado en el número de pasos eval_steps=500, # Evaluar cada 500 pasos num_train_epochs=1, per_device_train_batch_size=2, logging_steps=logging_steps, save_steps=1000, # Guardar el modelo cada 1000 pasos para reducir la frecuencia de escritura en disco learning_rate=2e-4, weight_decay=0.001, lr_scheduler_type="cosine", warmup_ratio=0.03, report_to="tensorboard", fp16=False # Desactivar la precisión mixta para simplificar el entrenamiento ) model = AutoModelForCausalLM.from_pretrained(model_name, token=token_name) data_collator = DataCollatorWithPadding(tokenizer=tokenizer) trainer = Trainer( model=model, args=training_arguments, train_dataset=train_dataset, eval_dataset=val_dataset, data_collator=data_collator ) trainer.train() model.save_pretrained(new_model) model.push_to_hub("eibeel/llama3-python-TFG") # Commented out IPython magic to ensure Python compatibility. # %load_ext tensorboard # %tensorboard --logdir results/runs train_texts[2] # Ignore warnings logging.set_verbosity(logging.CRITICAL) # Run text generation pipeline with our next model prompt = "Como puedo encontrar trabajo de ingeniero?" pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) result = pipe(f"[INST] {prompt} [/INST]") print(result[0]['generated_text']) # Empty VRAM del model del pipe del trainer import gc gc.collect() gc.collect() # Reload model in FP16 and merge it with LoRA weights base_model = AutoModelForCausalLM.from_pretrained( model_name, low_cpu_mem_usage=True, return_dict=True, torch_dtype=torch.float16, device_map=device_map, ) model = PeftModel.from_pretrained(base_model, new_model) model = model.merge_and_unload() # Reload tokenizer to save it tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right"