error while downloading model

#102
by heikhama1982 - opened

raise ValueError(
ValueError: Unknown quantization type, got fp8 - supported types are: ['awq', 'bitsandbytes_4bit', 'bitsandbytes_8bit', 'gptq', 'aqlm', 'quanto', 'eetq', 'higgs', 'hqq', 'compressed-tensors', 'fbgemm_fp8', 'torchao', 'bitnet', 'vptq']

Same issue found here. I was using Colab

I modified the python code to download and train. use this code:
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig
from datasets import DatasetDict, load_dataset, Dataset
from transformers import TrainerCallback, Trainer
import os
import logging
from typing import Optional, Union, Dict, Any, Tuple
from huggingface_hub import upload_file
import torch
from sklearn.model_selection import train_test_split

Set up logging for better tracking of the process

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(name)

class ModelLoader:
def init(self, model_name: str, trust_remote_code: bool = True, device_map={"": "cpu"}, quantization_type: str = "bitsandbytes_8bit", cache_dir: str = "D:/cache/ollama_model", output_dir: str = "./fine_tuned_deepseek"):
# device_map: Optional[Union[str, Dict[str, Any]]] = "auto"
self.model_name = model_name
self.trust_remote_code = trust_remote_code
self.device_map = device_map
self.quantization_type = quantization_type
self.cache_dir = cache_dir
self.output_dir = output_dir
self.tokenizer = None
self.model = None

def _load_quantization_config(self):
    """
    Load the appropriate quantization configuration based on the desired quantization type.
    """
    try:
        if self.quantization_type == "bitsandbytes_8bit":
            return BitsAndBytesConfig(load_in_8bit=True)
        else:
            logger.warning(f"Quantization type '{self.quantization_type}' is not directly supported. Proceeding without quantization.")
            return None
    except Exception as e:
        logger.error(f"Error in loading quantization configuration: {str(e)}")
        return None

def _load_model_with_fallback(self):
    """
    Load the model with a fallback mechanism in case quantization is not supported.
    """
    quantization_config = BitsAndBytesConfig(
        load_in_4bit=True,  # Enable 4-bit quantization
        bnb_4bit_compute_dtype=torch.float16,  # Use float16 for computations
        bnb_4bit_use_double_quant=True,  # Enable double quantization
    )
    try:
        # Try loading the model with the specified quantization config
        # quantization_config = self._load_quantization_config()
        model = AutoModelForCausalLM.from_pretrained(
            self.model_name,
            quantization_config=quantization_config,
            trust_remote_code=self.trust_remote_code,
            device_map=self.device_map, #{"": "cpu"},
            offload_state_dict=False,  # Disable offloading                
            cache_dir=self.cache_dir
        )
        logger.info("Model loaded successfully with quantization.")
        return model
    except ValueError as e:
        if "Unknown quantization type" in str(e):
            logger.warning("Quantization type not supported directly. Attempting to load without quantization.")
            
            # Fallback: Remove quantization from config
            config = AutoConfig.from_pretrained(self.model_name, trust_remote_code=self.trust_remote_code)
            if hasattr(config, "quantization_config"):
                delattr(config, "quantization_config")

            try:
                # Try loading the model without quantization
                model = AutoModelForCausalLM.from_pretrained(
                    self.model_name,
                    config=config,
                    trust_remote_code=self.trust_remote_code,
                    offload_state_dict=False,  # Disable offloading
                    device_map=self.device_map, #{"": "cpu"},
                    cache_dir=self.cache_dir
                )
                logger.info("Model loaded successfully without quantization.")
                return model
            except Exception as inner_e:
                logger.error(f"Failed to load model without quantization: {str(inner_e)}")
                raise
        else:
            logger.error(f"Unexpected error during model loading: {str(e)}")
            raise

def _load_tokenizer(self):
    """
    Load the tokenizer for the model.
    """
    try:
        tokenizer = AutoTokenizer.from_pretrained(
            self.model_name, 
            trust_remote_code=self.trust_remote_code,
            offload_state_dict=False,  # Disable offloading
            cache_dir=self.cache_dir)
        logger.info("Tokenizer loaded successfully.")
        return tokenizer
    except Exception as e:
        logger.error(f"Error loading tokenizer: {str(e)}")
        raise

def load(self) -> Tuple:
    """
    Load both model and tokenizer with fallbacks and quantization handling.
    """
    logger.info("Starting model and tokenizer loading...")
    
    # Load the model with fallback if needed
    self.model = self._load_model_with_fallback()
    # Ensure the model is fully on the GPU (or CPU if no GPU is available)
    # self.model = self.model.to("cpu")
    # Load the tokenizer
    self.tokenizer = self._load_tokenizer()

    return self.model, self.tokenizer

def fine_tune(self, dataset_name: str, output_dir: str = None):
    """
    Fine-tune the model on the specified dataset.
    """
    if output_dir is None:
        output_dir = self.output_dir

    logger.info("Loading dataset...")
    dataset = load_dataset(dataset_name)

    # If the dataset has no validation split, split the train set
    if "validation" not in dataset:
        train_data = dataset["train"]
        train_data = [example for example in train_data if example["source"] != "..." and example["target"] != "..."]

        train_data, val_data = train_test_split(train_data, test_size=0.1)
        
        # Convert train_data and val_data to the correct dictionary format
        train_data_dict = {
            "source": [item["source"] for item in train_data],
            "target": [item["target"] for item in train_data],
        }

        val_data_dict = {
            "source": [item["source"] for item in val_data],
            "target": [item["target"] for item in val_data],
        }

        # Create DatasetDict with the correct format
        dataset = DatasetDict({
            'train': Dataset.from_dict(train_data_dict),
            'validation': Dataset.from_dict(val_data_dict),
        })

    # Preprocess the data

    dataset = self.preprocess_data(dataset)

    # Set up training arguments
    
    training_args = TrainingArguments(
        evaluation_strategy="epoch",
        num_train_epochs=3,
        output_dir=output_dir,
        per_device_train_batch_size=2,  
        per_device_eval_batch_size=2,
        gradient_accumulation_steps=4,  
        dataloader_num_workers=4,  
        dataloader_pin_memory=True,  
        fsdp="full_shard auto_wrap",  # Enables Fully Sharded Data Parallelism
        fsdp_config={"offload_params": True},  # Offloads parameters to CPU
        no_cuda=True  # Forces CPU execution
    )

    # Initialize Trainer
    trainer = Trainer(
        model=self.model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["validation"],
        tokenizer=self.tokenizer,
    )

    # Start training
    trainer.train()

    # Save the fine-tuned model
    logger.info("Saving fine-tuned model...")
    self.model.save_pretrained(output_dir)
    self.tokenizer.save_pretrained(output_dir)

def upload_to_hub(self, repo_id: str, token: str):
    """
        Upload the model to Hugging Face Model Hub after ensuring it's on the same device.
    """
    logger.info("Ensuring the model is fully loaded on the device...")

    # Ensure the model is on a single device (either GPU or CPU)
    if torch.cuda.is_available():
        self.model = self.model.to("cuda")
    else:
        self.model = self.model.to("cpu")

    logger.info("Uploading model to Hugging Face Model Hub...")
    self.model.push_to_hub(repo_id=repo_id, use_auth_token=token)
    self.tokenizer.push_to_hub(repo_id=repo_id, use_auth_token=token)
    logger.info("Model uploaded successfully to Hugging Face Hub.")


def evaluate(self, dataset_name: str):
    """Evaluate the model on a test dataset."""
    logger.info("Evaluating model...")
    dataset = load_dataset(dataset_name)
    eval_dataset = dataset["test"]  # Assuming the dataset has a 'test' split

    # Use the Trainer to evaluate the model
    trainer = Trainer(
        model=self.model,
        tokenizer=self.tokenizer,
    )

    # Evaluate on test dataset
    results = trainer.evaluate(eval_dataset=eval_dataset)

    # For classification tasks, accuracy might be available directly
    logger.info(f"Evaluation results: {results}")
    return results

def test_model_with_prompt(self, prompt: str):
    """Generate a response using the fine-tuned model based on the given prompt."""
    logger.info(f"Generating response for prompt: {prompt}")
    
    # Generate a response from the model
    inputs = self.tokenizer(prompt, return_tensors="pt")
    output = self.model.generate(inputs['input_ids'], max_length=50, num_return_sequences=1)

    # Decode the output
    response = self.tokenizer.decode(output[0], skip_special_tokens=True)
    logger.info(f"Response: {response}")
    return response

def preprocess_data(self, dataset):
    """Preprocess the dataset for training and evaluation."""
    # Map the "source" and "target" columns into inputs and labels for training
    def preprocess_function(examples):
        # Ensure that "source" is the input and "target" is the label
        inputs = examples["source"]
        targets = examples["target"]
        model_inputs = self.tokenizer(inputs, padding="max_length", truncation=True, max_length=512)
        labels = self.tokenizer(targets, padding="max_length", truncation=True, max_length=512)
        model_inputs["labels"] = labels["input_ids"]
        return model_inputs

    # Apply preprocessing
    dataset = dataset.map(preprocess_function, batched=True)
    return dataset

Sign up or log in to comment