Spaces:

MedTiouti
/

SandHillRoadPodcast

Runtime error

Med Tiouti commited on Jan 17

Commit

c22681b

•

1 Parent(s): ca90067

Set weights cache directory

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,27 +21,21 @@ from langchain import PromptTemplate, LLMChain
 # vector stores
 from langchain.vectorstores import FAISS
-def get_model():
-    model_repo = 'daryl149/llama-2-13b-chat-hf'
-    tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        model_repo,
-        device_map='auto',
-        load_in_4bit=True,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True
-    )
-    max_len = 8192
-    return tokenizer,model,max_len
-tokenizer, model, max_len = get_model()
 temperature = 0,
@@ -50,7 +44,7 @@ repetition_penalty = 1.15
 pipe = pipeline(
     task = "text-generation",
-    model = "daryl149/llama-2-13b-chat-hf",
     tokenizer = tokenizer,
     pad_token_id = tokenizer.eos_token_id,
     max_length = max_len,
@@ -61,14 +55,9 @@ pipe = pipeline(
 llm = HuggingFacePipeline(pipeline = pipe)
 # similar passages
 k = 3
 embeddings_shl_path ="faiss_index_shl"
 embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
 ### download embeddings model

 # vector stores
 from langchain.vectorstores import FAISS
+cache_path = "./runpod-volume"
+model_repo = 'daryl149/llama-2-13b-chat-hf'
+tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True, cache_dir=cache_path)
+model = AutoModelForCausalLM.from_pretrained(
+    model_repo,
+    device_map='auto',
+    load_in_4bit=True,
+    torch_dtype=torch.float16,
+    low_cpu_mem_usage=True,
+    trust_remote_code=True
+    cache_dir=cache_path
+)
+max_len = 8192
 temperature = 0,
 pipe = pipeline(
     task = "text-generation",
+    model = model,
     tokenizer = tokenizer,
     pad_token_id = tokenizer.eos_token_id,
     max_length = max_len,
 llm = HuggingFacePipeline(pipeline = pipe)
 # similar passages
 k = 3
 embeddings_shl_path ="faiss_index_shl"
 embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
 ### download embeddings model