Spaces:

JVictor-CC
/

DeepCoder

Sleeping

JVictor-CC commited on Jan 2, 2024

Commit

e3edd55

1 Parent(s): e8afcbe

Add Model Class and requirements

The Model Class has 3 main methods.
- `download_model` that gets an model url from huggingface and download indo a directory called 'model'.
- `load_local_model` that loads the local model on 'model' directory.
- `inference` that needs a prompt list and generate responses from de model.

Files changed (2) hide show

Model.py +61 -0
requirements.txt +5 -0

Model.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import os
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+class Model:
+    def __init__(self, model_url) -> None:
+        self.model_url = model_url
+        self.tokenizer = None
+        self.model = None
+        self.device = "cpu"
+        self.dir_name = None
+    def download_model(self) -> bool:
+        self.dir_name = "model"
+        if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name):
+            os.makedirs(self.dir_name)
+            tokenizer = AutoTokenizer.from_pretrained(self.model_url)
+            model = AutoModelForCausalLM.from_pretrained(self.model_url)
+            model.save_pretrained(self.dir_name)
+            tokenizer.save_pretrained(self.dir_name)
+            print(f"Model saved on '{self.dir_name}' directory.")
+            return True
+        else:
+            print("Model is already downloaded and ready to use.")
+            return False
+    def load_local_model(self):
+        tokenizer = AutoTokenizer.from_pretrained(self.dir_name)
+        model = AutoModelForCausalLM.from_pretrained(self.dir_name)
+        if self.device == "cuda" and torch.cuda.is_available():
+            model.to("cuda")
+        self.model = model
+        self.tokenizer = tokenizer
+    def inference(self, prompt_list) -> list:
+        if self.model != None and self.tokenizer != None:
+            self.model.eval()
+            model_inferences = []
+            for prompt in prompt_list:
+                inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
+                with torch.no_grad():
+                    outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512)
+                    response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0]
+                model_inferences.append(response)
+            return model_inferences
+        else:
+            print("Model was not able to make inference, make sure you've loaded the model.")
+    def set_cuda(self) -> str:
+        self.device = "cuda"
+    def set_cpu(self) -> str:
+        self.device = "cpu"

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+os
+gradio
+transformers
+huggingface-hub