Med Tiouti commited on
Commit
c22681b
1 Parent(s): ca90067

Set weights cache directory

Browse files
Files changed (1) hide show
  1. app.py +14 -25
app.py CHANGED
@@ -21,27 +21,21 @@ from langchain import PromptTemplate, LLMChain
21
  # vector stores
22
  from langchain.vectorstores import FAISS
23
 
 
 
24
 
25
- def get_model():
26
- model_repo = 'daryl149/llama-2-13b-chat-hf'
27
-
28
- tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
29
-
30
- model = AutoModelForCausalLM.from_pretrained(
31
- model_repo,
32
- device_map='auto',
33
- load_in_4bit=True,
34
- torch_dtype=torch.float16,
35
- low_cpu_mem_usage=True,
36
- trust_remote_code=True
37
- )
38
- max_len = 8192
39
-
40
- return tokenizer,model,max_len
41
-
42
-
43
- tokenizer, model, max_len = get_model()
44
 
 
 
 
 
 
 
 
 
 
 
45
 
46
 
47
  temperature = 0,
@@ -50,7 +44,7 @@ repetition_penalty = 1.15
50
 
51
  pipe = pipeline(
52
  task = "text-generation",
53
- model = "daryl149/llama-2-13b-chat-hf",
54
  tokenizer = tokenizer,
55
  pad_token_id = tokenizer.eos_token_id,
56
  max_length = max_len,
@@ -61,14 +55,9 @@ pipe = pipeline(
61
 
62
  llm = HuggingFacePipeline(pipeline = pipe)
63
 
64
-
65
-
66
-
67
  # similar passages
68
  k = 3
69
 
70
-
71
-
72
  embeddings_shl_path ="faiss_index_shl"
73
  embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
74
  ### download embeddings model
 
21
  # vector stores
22
  from langchain.vectorstores import FAISS
23
 
24
+ cache_path = "./runpod-volume"
25
+ model_repo = 'daryl149/llama-2-13b-chat-hf'
26
 
27
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True, cache_dir=cache_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ model_repo,
31
+ device_map='auto',
32
+ load_in_4bit=True,
33
+ torch_dtype=torch.float16,
34
+ low_cpu_mem_usage=True,
35
+ trust_remote_code=True
36
+ cache_dir=cache_path
37
+ )
38
+ max_len = 8192
39
 
40
 
41
  temperature = 0,
 
44
 
45
  pipe = pipeline(
46
  task = "text-generation",
47
+ model = model,
48
  tokenizer = tokenizer,
49
  pad_token_id = tokenizer.eos_token_id,
50
  max_length = max_len,
 
55
 
56
  llm = HuggingFacePipeline(pipeline = pipe)
57
 
 
 
 
58
  # similar passages
59
  k = 3
60
 
 
 
61
  embeddings_shl_path ="faiss_index_shl"
62
  embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
63
  ### download embeddings model