Spaces:
Paused
Paused
Daniel Marques
commited on
Commit
·
0b74b4d
1
Parent(s):
dca490a
fix: add websocket in handlerToken
Browse files- Dockerfile +7 -1
- main.py +10 -10
- prompt_template_utils.py +7 -8
- run.sh +1 -1
Dockerfile
CHANGED
@@ -4,7 +4,6 @@
|
|
4 |
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
|
5 |
|
6 |
|
7 |
-
|
8 |
RUN apt-get update && apt-get upgrade -y \
|
9 |
&& apt-get install -y git build-essential libpq-dev gcc \
|
10 |
wget ocl-icd-opencl-dev opencl-headers clinfo \
|
@@ -14,11 +13,14 @@ RUN apt-get update && apt-get upgrade -y \
|
|
14 |
|
15 |
WORKDIR /app
|
16 |
|
|
|
|
|
17 |
COPY . .
|
18 |
|
19 |
# setting build related env vars
|
20 |
ENV CUDA_DOCKER_ARCH=all
|
21 |
ENV LLAMA_CUBLAS=1
|
|
|
22 |
|
23 |
# Install depencencies
|
24 |
RUN python -m pip install --upgrade pip pytest cmake \
|
@@ -29,6 +31,10 @@ RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python
|
|
29 |
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 PIP_ROOT_USER_ACTION=ignore pip install --ignore-installed --timeout 100 -r requirements.txt
|
30 |
RUN pip install uvicorn
|
31 |
|
|
|
|
|
|
|
|
|
32 |
# RUN useradd -m -u 1000 user
|
33 |
# USER user
|
34 |
|
|
|
4 |
FROM nvidia/cuda:12.1.1-devel-ubuntu22.04
|
5 |
|
6 |
|
|
|
7 |
RUN apt-get update && apt-get upgrade -y \
|
8 |
&& apt-get install -y git build-essential libpq-dev gcc \
|
9 |
wget ocl-icd-opencl-dev opencl-headers clinfo \
|
|
|
13 |
|
14 |
WORKDIR /app
|
15 |
|
16 |
+
RUN mkdir "./cache"
|
17 |
+
|
18 |
COPY . .
|
19 |
|
20 |
# setting build related env vars
|
21 |
ENV CUDA_DOCKER_ARCH=all
|
22 |
ENV LLAMA_CUBLAS=1
|
23 |
+
ENV TRANSFORMERS_CACHE="./cache"
|
24 |
|
25 |
# Install depencencies
|
26 |
RUN python -m pip install --upgrade pip pytest cmake \
|
|
|
31 |
RUN CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 PIP_ROOT_USER_ACTION=ignore pip install --ignore-installed --timeout 100 -r requirements.txt
|
32 |
RUN pip install uvicorn
|
33 |
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
# RUN useradd -m -u 1000 user
|
39 |
# USER user
|
40 |
|
main.py
CHANGED
@@ -4,7 +4,6 @@ import shutil
|
|
4 |
import subprocess
|
5 |
import asyncio
|
6 |
|
7 |
-
|
8 |
from typing import Any, Dict, List
|
9 |
|
10 |
from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
|
@@ -20,6 +19,8 @@ from langchain.memory import ConversationBufferMemory
|
|
20 |
from langchain.callbacks.base import BaseCallbackHandler
|
21 |
from langchain.schema import LLMResult
|
22 |
|
|
|
|
|
23 |
# from langchain.embeddings import HuggingFaceEmbeddings
|
24 |
from load_models import load_model
|
25 |
|
@@ -77,15 +78,13 @@ handlerToken = MyCustomSyncHandler()
|
|
77 |
|
78 |
LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
|
79 |
|
80 |
-
template = """You are a helpful, respectful and honest assistant.
|
81 |
-
Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
|
82 |
-
Context: {context}
|
83 |
-
Question: {question}
|
84 |
-
"""
|
85 |
-
|
86 |
-
memory = ConversationBufferMemory(input_key="question", memory_key="history")
|
87 |
|
88 |
-
|
89 |
|
90 |
QA = RetrievalQA.from_chain_type(
|
91 |
llm=LLM,
|
@@ -93,7 +92,8 @@ QA = RetrievalQA.from_chain_type(
|
|
93 |
retriever=RETRIEVER,
|
94 |
return_source_documents=SHOW_SOURCES,
|
95 |
chain_type_kwargs={
|
96 |
-
"prompt":
|
|
|
97 |
},
|
98 |
)
|
99 |
|
|
|
4 |
import subprocess
|
5 |
import asyncio
|
6 |
|
|
|
7 |
from typing import Any, Dict, List
|
8 |
|
9 |
from fastapi import FastAPI, HTTPException, UploadFile, WebSocket, WebSocketDisconnect
|
|
|
19 |
from langchain.callbacks.base import BaseCallbackHandler
|
20 |
from langchain.schema import LLMResult
|
21 |
|
22 |
+
from prompt_template_utils import get_prompt_template
|
23 |
+
|
24 |
# from langchain.embeddings import HuggingFaceEmbeddings
|
25 |
from load_models import load_model
|
26 |
|
|
|
78 |
|
79 |
LLM = load_model(device_type=DEVICE_TYPE, model_id=MODEL_ID, model_basename=MODEL_BASENAME, stream=True, callbacks=[handlerToken])
|
80 |
|
81 |
+
# template = """You are a helpful, respectful and honest assistant.
|
82 |
+
# Always answer in the most helpful and safe way possible without trying to make up an answer, if you don't know the answer just say "I don't know" and don't share false information or topics that were not provided in your training. Use a maximum of 15 sentences. Your answer should be as concise and clear as possible. Always say "thank you for asking!" at the end of your answer.
|
83 |
+
# Context: {context}
|
84 |
+
# Question: {question}
|
85 |
+
# """
|
|
|
|
|
86 |
|
87 |
+
prompt, memory = get_prompt_template(promptTemplate_type="llama", history=True)
|
88 |
|
89 |
QA = RetrievalQA.from_chain_type(
|
90 |
llm=LLM,
|
|
|
92 |
retriever=RETRIEVER,
|
93 |
return_source_documents=SHOW_SOURCES,
|
94 |
chain_type_kwargs={
|
95 |
+
"prompt": prompt,
|
96 |
+
"memory": memory
|
97 |
},
|
98 |
)
|
99 |
|
prompt_template_utils.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
"""
|
2 |
-
This file implements prompt template for llama based models.
|
3 |
-
Modify the prompt template based on the model you select.
|
4 |
This seems to have significant impact on the output of the LLM.
|
5 |
"""
|
6 |
|
@@ -10,10 +10,9 @@ from langchain.prompts import PromptTemplate
|
|
10 |
# this is specific to Llama-2.
|
11 |
|
12 |
system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
|
13 |
-
Read the given context before answering questions and think step by step. If you can not answer a user question based on
|
14 |
the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
|
15 |
|
16 |
-
|
17 |
def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
|
18 |
if promptTemplate_type == "llama":
|
19 |
B_INST, E_INST = "[INST]", "[/INST]"
|
@@ -40,7 +39,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
40 |
B_INST
|
41 |
+ system_prompt
|
42 |
+ """
|
43 |
-
|
44 |
Context: {history} \n {context}
|
45 |
User: {question}"""
|
46 |
+ E_INST
|
@@ -51,7 +50,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
51 |
B_INST
|
52 |
+ system_prompt
|
53 |
+ """
|
54 |
-
|
55 |
Context: {context}
|
56 |
User: {question}"""
|
57 |
+ E_INST
|
@@ -63,7 +62,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
63 |
prompt_template = (
|
64 |
system_prompt
|
65 |
+ """
|
66 |
-
|
67 |
Context: {history} \n {context}
|
68 |
User: {question}
|
69 |
Answer:"""
|
@@ -73,7 +72,7 @@ def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, h
|
|
73 |
prompt_template = (
|
74 |
system_prompt
|
75 |
+ """
|
76 |
-
|
77 |
Context: {context}
|
78 |
User: {question}
|
79 |
Answer:"""
|
|
|
1 |
"""
|
2 |
+
This file implements prompt template for llama based models.
|
3 |
+
Modify the prompt template based on the model you select.
|
4 |
This seems to have significant impact on the output of the LLM.
|
5 |
"""
|
6 |
|
|
|
10 |
# this is specific to Llama-2.
|
11 |
|
12 |
system_prompt = """You are a helpful assistant, you will use the provided context to answer user questions.
|
13 |
+
Read the given context before answering questions and think step by step. If you can not answer a user question based on
|
14 |
the provided context, inform the user. Do not use any other information for answering user. Provide a detailed answer to the question."""
|
15 |
|
|
|
16 |
def get_prompt_template(system_prompt=system_prompt, promptTemplate_type=None, history=False):
|
17 |
if promptTemplate_type == "llama":
|
18 |
B_INST, E_INST = "[INST]", "[/INST]"
|
|
|
39 |
B_INST
|
40 |
+ system_prompt
|
41 |
+ """
|
42 |
+
|
43 |
Context: {history} \n {context}
|
44 |
User: {question}"""
|
45 |
+ E_INST
|
|
|
50 |
B_INST
|
51 |
+ system_prompt
|
52 |
+ """
|
53 |
+
|
54 |
Context: {context}
|
55 |
User: {question}"""
|
56 |
+ E_INST
|
|
|
62 |
prompt_template = (
|
63 |
system_prompt
|
64 |
+ """
|
65 |
+
|
66 |
Context: {history} \n {context}
|
67 |
User: {question}
|
68 |
Answer:"""
|
|
|
72 |
prompt_template = (
|
73 |
system_prompt
|
74 |
+ """
|
75 |
+
|
76 |
Context: {context}
|
77 |
User: {question}
|
78 |
Answer:"""
|
run.sh
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
redis-cli --version
|
2 |
|
3 |
-
|
4 |
|
5 |
uvicorn "main:app" --port 7860 --host 0.0.0.0
|
|
|
1 |
redis-cli --version
|
2 |
|
3 |
+
service redis-server start
|
4 |
|
5 |
uvicorn "main:app" --port 7860 --host 0.0.0.0
|