Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
·
2084d31
1
Parent(s):
d7147ea
feat: add websocket
Browse files- constants.py +2 -2
- load_models.py +2 -0
- prompt_template_utils.py +6 -7
constants.py
CHANGED
@@ -32,13 +32,13 @@ CHROMA_SETTINGS = Settings(
|
|
32 |
)
|
33 |
|
34 |
# Context Window and Max New Tokens
|
35 |
-
CONTEXT_WINDOW_SIZE =
|
36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
37 |
|
38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
39 |
|
40 |
N_GPU_LAYERS = 40 # Llama-2-70B has 83 layers
|
41 |
-
N_BATCH =
|
42 |
|
43 |
### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
|
44 |
# N_GPU_LAYERS = 20
|
|
|
32 |
)
|
33 |
|
34 |
# Context Window and Max New Tokens
|
35 |
+
CONTEXT_WINDOW_SIZE = 2048
|
36 |
MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE # int(CONTEXT_WINDOW_SIZE/4)
|
37 |
|
38 |
#### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
|
39 |
|
40 |
N_GPU_LAYERS = 40 # Llama-2-70B has 83 layers
|
41 |
+
N_BATCH = 1024
|
42 |
|
43 |
### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
|
44 |
# N_GPU_LAYERS = 20
|
load_models.py
CHANGED
@@ -58,6 +58,8 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
|
|
|
|
61 |
# set this based on your GPU & CPU RAM
|
62 |
}
|
63 |
if device_type.lower() == "mps":
|
|
|
58 |
"model_path": model_path,
|
59 |
"n_ctx": CONTEXT_WINDOW_SIZE,
|
60 |
"max_tokens": MAX_NEW_TOKENS,
|
61 |
+
"n_batch": MAX_NEW_TOKENS,
|
62 |
+
|
63 |
# set this based on your GPU & CPU RAM
|
64 |
}
|
65 |
if device_type.lower() == "mps":
|
prompt_template_utils.py
CHANGED
@@ -9,15 +9,14 @@ from langchain.prompts import PromptTemplate
|
|
9 |
|
10 |
# this is specific to Llama-2.
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
|
16 |
# system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
|
17 |
|
18 |
-
system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
|
19 |
-
Read the context provided before answering the questions and think step by step.
|
20 |
-
If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
|
21 |
|
22 |
def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
|
23 |
if promptTemplate_type == "llama":
|
@@ -85,7 +84,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
85 |
)
|
86 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
87 |
|
88 |
-
memory = ConversationBufferMemory(input_key="question", memory_key="history"
|
89 |
|
90 |
return (
|
91 |
prompt,
|
|
|
9 |
|
10 |
# this is specific to Llama-2.
|
11 |
|
12 |
+
system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
|
13 |
+
Read the given context before answering questions and think step by step. If you can not answer a user question based on
|
14 |
+
the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
|
15 |
|
16 |
# system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
|
17 |
|
18 |
+
# system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
|
19 |
+
# Read the context provided before answering the questions and think step by step. If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
|
|
|
20 |
|
21 |
def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
|
22 |
if promptTemplate_type == "llama":
|
|
|
84 |
)
|
85 |
prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
|
86 |
|
87 |
+
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
88 |
|
89 |
return (
|
90 |
prompt,
|