DeepCoder / Model.py
JVictor-CC's picture
Add Model Class and requirements
e3edd55
raw
history blame
2.14 kB
import os
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
class Model:
def __init__(self, model_url) -> None:
self.model_url = model_url
self.tokenizer = None
self.model = None
self.device = "cpu"
self.dir_name = None
def download_model(self) -> bool:
self.dir_name = "model"
if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name):
os.makedirs(self.dir_name)
tokenizer = AutoTokenizer.from_pretrained(self.model_url)
model = AutoModelForCausalLM.from_pretrained(self.model_url)
model.save_pretrained(self.dir_name)
tokenizer.save_pretrained(self.dir_name)
print(f"Model saved on '{self.dir_name}' directory.")
return True
else:
print("Model is already downloaded and ready to use.")
return False
def load_local_model(self):
tokenizer = AutoTokenizer.from_pretrained(self.dir_name)
model = AutoModelForCausalLM.from_pretrained(self.dir_name)
if self.device == "cuda" and torch.cuda.is_available():
model.to("cuda")
self.model = model
self.tokenizer = tokenizer
def inference(self, prompt_list) -> list:
if self.model != None and self.tokenizer != None:
self.model.eval()
model_inferences = []
for prompt in prompt_list:
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
with torch.no_grad():
outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512)
response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
model_inferences.append(response)
return model_inferences
else:
print("Model was not able to make inference, make sure you've loaded the model.")
def set_cuda(self) -> str:
self.device = "cuda"
def set_cpu(self) -> str:
self.device = "cpu"