Spaces:

dkdaniz
/

katara

Paused

App Files Files Community

Daniel Marques commited on Oct 19, 2023

Commit

2084d31

1 Parent(s): d7147ea

feat: add websocket

Browse files

Files changed (3) hide show

constants.py +2 -2
load_models.py +2 -0
prompt_template_utils.py +6 -7

constants.py CHANGED Viewed

@@ -32,13 +32,13 @@ CHROMA_SETTINGS = Settings(
 )
 # Context Window and Max New Tokens
-CONTEXT_WINDOW_SIZE = 4096
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 N_GPU_LAYERS = 40  # Llama-2-70B has 83 layers
-N_BATCH = 512
 ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
 # N_GPU_LAYERS = 20

 )
 # Context Window and Max New Tokens
+CONTEXT_WINDOW_SIZE = 2048
 MAX_NEW_TOKENS = CONTEXT_WINDOW_SIZE  # int(CONTEXT_WINDOW_SIZE/4)
 #### If you get a "not enough space in the buffer" error, you should reduce the values below, start with half of the original values and keep halving the value until the error stops appearing
 N_GPU_LAYERS = 40  # Llama-2-70B has 83 layers
+N_BATCH = 1024
 ### From experimenting with the Llama-2-7B-Chat-GGML model on 8GB VRAM, these values work:
 # N_GPU_LAYERS = 20

load_models.py CHANGED Viewed

@@ -58,6 +58,8 @@ def load_quantized_model_gguf_ggml(model_id, model_basename, device_type, loggin
             "model_path": model_path,
             "n_ctx": CONTEXT_WINDOW_SIZE,
             "max_tokens": MAX_NEW_TOKENS,
              # set this based on your GPU & CPU RAM
         }
         if device_type.lower() == "mps":

             "model_path": model_path,
             "n_ctx": CONTEXT_WINDOW_SIZE,
             "max_tokens": MAX_NEW_TOKENS,
+            "n_batch": MAX_NEW_TOKENS,
              # set this based on your GPU & CPU RAM
         }
         if device_type.lower() == "mps":

prompt_template_utils.py CHANGED Viewed

@@ -9,15 +9,14 @@ from langchain.prompts import PromptTemplate
 # this is specific to Llama-2.
-# system_prompt = """You are a helpful assistant, you will use the context and documents provided in the training to answer users questions.
-# Read the context provided before answering questions and think step by step. If you can't answer a user's question based on the
-# context provided, inform the user. Don't use any other information to answer the user."""
 # system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
-system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
-Read the context provided before answering the questions and think step by step. Your answer cannot be more than 2000 words long.
-If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
 def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
     if promptTemplate_type == "llama":
@@ -85,7 +84,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
             )
             prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
-    memory = ConversationBufferMemory(input_key="question", memory_key="history", max_token_limit=10)
     return (
         prompt,

 # this is specific to Llama-2.
+system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
+Read the given context before answering questions and think step by step. If you can not answer a user question based on
+the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
 # system_prompt = """You are a helpful assistant, and you will use the context and documents provided in the training to answer users' questions. Please read the context provided carefully before responding to questions and follow a step-by-step thought process. If you cannot answer a user's question based on the provided context, please inform the user. Do not use any other information to answer the user. Provide a detailed response based on the content of locally trained documents."""
+# system_prompt = """It's a useful assistant that will use the context and documents provided in the training to answer users' questions.
+# Read the context provided before answering the questions and think step by step. If you can't answer, just say "I don't know" and don't try to work out an answer to respond to the user."""
 def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
     if promptTemplate_type == "llama":
             )
             prompt = PromptTemplate(input_variables=["context", "question"], template=prompt_template)
+    memory = ConversationBufferMemory(input_key="question", memory_key="history")
     return (
         prompt,