import os import torch from transformers import AutoConfig, AutoTokenizer, AutoModelForCausalLM from huggingface_hub import HfApi, login class MultimodalAI: def __init__(self): # Obtain Hugging Face token in .env file self.HUGGINGFACE_TOKEN = os.environ["HUGGINGFACE_TOKEN"] # Check if the token is retrieved successfully if self.HUGGINGFACE_TOKEN is None: raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.") # Authenticate with Hugging Face self.api = HfApi() login(token=self.HUGGINGFACE_TOKEN) # Model selection self.model_name = "meta-llama/Llama-2-7b-hf" # Check if a CUDA-enabled GPU is available. # If available, move the model to the GPU (cuda:0) for faster computation. # Otherwise, move the model to the CPU. self.device = "cuda" if torch.cuda.is_available() else "cpu" # Load the model and tokenizer self._load_model_and_tokenizer() def _load_model_and_tokenizer(self): # Load LLama model and tokenizer self.model = AutoModelForCausalLM.from_pretrained(self.model_name, token=self.HUGGINGFACE_TOKEN).to(self.device) self.tokenizer = AutoTokenizer.from_pretrained(self.model_name, token=self.HUGGINGFACE_TOKEN) def generate_response(self, text_input, max_new_tokens=50): # Tokenize input text inputs = self.tokenizer(text_input, return_tensors="pt").to(self.device) # Generate response with torch.no_grad(): outputs = self.model.generate(**inputs, max_new_tokens=max_new_tokens, pad_token_id=self.tokenizer.pad_token_id) # Decode and return the response response_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True) return response_text