Daniel Marques commited on
Commit
0b74b4d
·
1 Parent(s): dca490a

fix: add websocket in handlerToken

Browse files
Files changed (4) hide show
  1. Dockerfile +7 -1
  2. main.py +10 -10
  3. prompt_template_utils.py +7 -8
  4. run.sh +1 -1
Dockerfile CHANGED
@@ -4,7 +4,6 @@
4
  FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
5
 
6
 
7
-
8
  RUN apt-get update && apt-get upgrade -y \
9
  && apt-get install -y git build-essential libpq-dev gcc \
10
  wget ocl-icd-opencl-dev opencl-headers clinfo \
@@ -14,11 +13,14 @@ RUN apt-get update && apt-get upgrade -y \
14
 
15
  WORKDIR /app
16
 
 
 
17
  COPY . .
18
 
19
  # setting build related env vars
20
  ENV CUDA_DOCKER_ARCH=all
21
  ENV LLAMA_CUBLAS=1
 
22
 
23
  # Install depencencies
24
  RUN python -m pip install --upgrade pip pytest cmake \
@@ -29,6 +31,10 @@ RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
29
  RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 PIP_ROOT_USER_ACTION=ignore pip install --ignore-installed --timeout 100 -r requirements.txt
30
  RUN pip install uvicorn
31
 
 
 
 
 
32
  # RUN useradd -m -u 1000 user
33
  # USER user
34
 
 
4
  FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
5
 
6
 
 
7
  RUN apt-get update && apt-get upgrade -y \
8
  && apt-get install -y git build-essential libpq-dev gcc \
9
  wget ocl-icd-opencl-dev opencl-headers clinfo \
 
13
 
14
  WORKDIR /app
15
 
16
+ RUN mkdir "./cache"
17
+
18
  COPY . .
19
 
20
  # setting build related env vars
21
  ENV CUDA_DOCKER_ARCH=all
22
  ENV LLAMA_CUBLAS=1
23
+ ENV TRANSFORMERS_CACHE="./cache"
24
 
25
  # Install depencencies
26
  RUN python -m pip install --upgrade pip pytest cmake \
 
31
  RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 PIP_ROOT_USER_ACTION=ignore pip install --ignore-installed --timeout 100 -r requirements.txt
32
  RUN pip install uvicorn
33
 
34
+
35
+
36
+
37
+
38
  # RUN useradd -m -u 1000 user
39
  # USER user
40
 
main.py CHANGED
@@ -4,7 +4,6 @@ import shutil
4
  import subprocess
5
  import asyncio
6
 
7
-
8
  from typing import Any, Dict, List
9
 
10
  from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
@@ -20,6 +19,8 @@ from langchain.memory import ConversationBufferMemory
20
  from langchain.callbacks.base import BaseCallbackHandler
21
  from langchain.schema import LLMResult
22
 
 
 
23
  # from langchain.embeddings import HuggingFaceEmbeddings
24
  from load_models import load_model
25
 
@@ -77,15 +78,13 @@ handlerToken = MyCustomSyncHandler()
77
 
78
  LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
79
 
80
- template = """You are a helpful, respectful and honest assistant.
81
- Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
82
- Context: {context}
83
- Question: {question}
84
- """
85
-
86
- memory = ConversationBufferMemory(input_key="question", memory_key="history")
87
 
88
- QA_CHAIN_PROMPT = PromptTemplate(input_variables=["context", "question"], template=template)
89
 
90
  QA = RetrievalQA.from_chain_type(
91
  llm=LLM,
@@ -93,7 +92,8 @@ QA = RetrievalQA.from_chain_type(
93
  retriever=RETRIEVER,
94
  return_source_documents=SHOW_SOURCES,
95
  chain_type_kwargs={
96
- "prompt": QA_CHAIN_PROMPT,
 
97
  },
98
  )
99
 
 
4
  import subprocess
5
  import asyncio
6
 
 
7
  from typing import Any, Dict, List
8
 
9
  from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
 
19
  from langchain.callbacks.base import BaseCallbackHandler
20
  from langchain.schema import LLMResult
21
 
22
+ from prompt_template_utils import get_prompt_template
23
+
24
  # from langchain.embeddings import HuggingFaceEmbeddings
25
  from load_models import load_model
26
 
 
78
 
79
  LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
80
 
81
+ # template = """You are a helpful, respectful and honest assistant.
82
+ # Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
83
+ # Context: {context}
84
+ # Question: {question}
85
+ # """
 
 
86
 
87
+ prompt, memory = get_prompt_template(promptTemplate_type="llama", history=True)
88
 
89
  QA = RetrievalQA.from_chain_type(
90
  llm=LLM,
 
92
  retriever=RETRIEVER,
93
  return_source_documents=SHOW_SOURCES,
94
  chain_type_kwargs={
95
+ "prompt": prompt,
96
+ "memory": memory
97
  },
98
  )
99
 
prompt_template_utils.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- This file implements prompt template for llama based models.
3
- Modify the prompt template based on the model you select.
4
  This seems to have significant impact on the output of the LLM.
5
  """
6
 
@@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
10
  # this is specific to Llama-2.
11
 
12
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
13
- Read the given context before answering questions and think step by step. If you can not answer a user question based on
14
  the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
15
 
16
-
17
  def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
18
  if promptTemplate_type == "llama":
19
  B_INST, E_INST = "[INST]", "[/INST]"
@@ -40,7 +39,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
40
  B_INST
41
  + system_prompt
42
  + """
43
-
44
  Context: {history} \n {context}
45
  User: {question}"""
46
  + E_INST
@@ -51,7 +50,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
51
  B_INST
52
  + system_prompt
53
  + """
54
-
55
  Context: {context}
56
  User: {question}"""
57
  + E_INST
@@ -63,7 +62,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
63
  prompt_template = (
64
  system_prompt
65
  + """
66
-
67
  Context: {history} \n {context}
68
  User: {question}
69
  Answer:"""
@@ -73,7 +72,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
73
  prompt_template = (
74
  system_prompt
75
  + """
76
-
77
  Context: {context}
78
  User: {question}
79
  Answer:"""
 
1
  """
2
+ This file implements prompt template for llama based models.
3
+ Modify the prompt template based on the model you select.
4
  This seems to have significant impact on the output of the LLM.
5
  """
6
 
 
10
  # this is specific to Llama-2.
11
 
12
  system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
13
+ Read the given context before answering questions and think step by step. If you can not answer a user question based on
14
  the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
15
 
 
16
  def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
17
  if promptTemplate_type == "llama":
18
  B_INST, E_INST = "[INST]", "[/INST]"
 
39
  B_INST
40
  + system_prompt
41
  + """
42
+
43
  Context: {history} \n {context}
44
  User: {question}"""
45
  + E_INST
 
50
  B_INST
51
  + system_prompt
52
  + """
53
+
54
  Context: {context}
55
  User: {question}"""
56
  + E_INST
 
62
  prompt_template = (
63
  system_prompt
64
  + """
65
+
66
  Context: {history} \n {context}
67
  User: {question}
68
  Answer:"""
 
72
  prompt_template = (
73
  system_prompt
74
  + """
75
+
76
  Context: {context}
77
  User: {question}
78
  Answer:"""
run.sh CHANGED
@@ -1,5 +1,5 @@
1
  redis-cli --version
2
 
3
- sudo service redis-server start
4
 
5
  uvicorn "main:app" --port 7860 --host 0.0.0.0
 
1
  redis-cli --version
2
 
3
+ service redis-server start
4
 
5
  uvicorn "main:app" --port 7860 --host 0.0.0.0