Update app.py
Browse files
app.py
CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import HTMLResponse
|
|
3 |
from transformers import AutoTokenizer
|
4 |
from pydantic import BaseModel
|
5 |
from llama_cpp import Llama
|
|
|
6 |
|
7 |
class Message(BaseModel):
|
8 |
content: str
|
@@ -193,6 +194,7 @@ messages = []
|
|
193 |
|
194 |
@app.post("/chat")
|
195 |
def chat(req: Message):
|
|
|
196 |
messages.append({"role": "user", "content": req.content})
|
197 |
text = tokenizer.apply_chat_template(
|
198 |
messages, tokenize=False, add_generation_prompt=True
|
@@ -200,8 +202,8 @@ def chat(req: Message):
|
|
200 |
output = llm(text,max_tokens=req.token,echo=False)
|
201 |
response = output['choices'][0]['text']
|
202 |
messages.append({"role": "assistant", "content": response})
|
203 |
-
|
204 |
-
return {"response":
|
205 |
|
206 |
|
207 |
@app.post("/setSystemPrompt")
|
|
|
3 |
from transformers import AutoTokenizer
|
4 |
from pydantic import BaseModel
|
5 |
from llama_cpp import Llama
|
6 |
+
import time
|
7 |
|
8 |
class Message(BaseModel):
|
9 |
content: str
|
|
|
194 |
|
195 |
@app.post("/chat")
|
196 |
def chat(req: Message):
|
197 |
+
a = time.time()
|
198 |
messages.append({"role": "user", "content": req.content})
|
199 |
text = tokenizer.apply_chat_template(
|
200 |
messages, tokenize=False, add_generation_prompt=True
|
|
|
202 |
output = llm(text,max_tokens=req.token,echo=False)
|
203 |
response = output['choices'][0]['text']
|
204 |
messages.append({"role": "assistant", "content": response})
|
205 |
+
b = time.time()
|
206 |
+
return {"response": response, "time": b-a}
|
207 |
|
208 |
|
209 |
@app.post("/setSystemPrompt")
|