Update app.py
Browse files
app.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
import fastapi
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from time import time
|
4 |
-
from fastapi.middleware.cors import CORSMiddleware
|
5 |
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
|
6 |
import logging
|
7 |
import llama_cpp
|
@@ -25,7 +25,7 @@ llm_chat = llama_cpp.Llama.from_pretrained(
|
|
25 |
verbose=False,
|
26 |
n_ctx=1024,
|
27 |
n_gpu_layers=0,
|
28 |
-
|
29 |
)
|
30 |
llm_generate = llama_cpp.Llama.from_pretrained(
|
31 |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
@@ -37,7 +37,7 @@ llm_generate = llama_cpp.Llama.from_pretrained(
|
|
37 |
mirostat_mode=2,
|
38 |
mirostat_tau=4.0,
|
39 |
mirostat_eta=1.1
|
40 |
-
|
41 |
)
|
42 |
# Logger setup
|
43 |
logging.basicConfig(level=logging.INFO)
|
@@ -83,7 +83,7 @@ async def chat(gen:GenModel):
|
|
83 |
et = time()
|
84 |
output["time"] = et - st
|
85 |
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
86 |
-
print(messages)
|
87 |
return output
|
88 |
except Exception as e:
|
89 |
logger.error(f"Error in /complete endpoint: {e}")
|
@@ -131,5 +131,4 @@ async def generate(gen:GenModel):
|
|
131 |
|
132 |
if __name__ == "__main__":
|
133 |
import uvicorn
|
134 |
-
|
135 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
1 |
import fastapi
|
2 |
from fastapi.responses import JSONResponse
|
3 |
from time import time
|
4 |
+
#from fastapi.middleware.cors import CORSMiddleware
|
5 |
#MODEL_PATH = "./qwen1_5-0_5b-chat-q4_0.gguf" #"./qwen1_5-0_5b-chat-q4_0.gguf"
|
6 |
import logging
|
7 |
import llama_cpp
|
|
|
25 |
verbose=False,
|
26 |
n_ctx=1024,
|
27 |
n_gpu_layers=0,
|
28 |
+
chat_format="llama-2"
|
29 |
)
|
30 |
llm_generate = llama_cpp.Llama.from_pretrained(
|
31 |
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
|
|
37 |
mirostat_mode=2,
|
38 |
mirostat_tau=4.0,
|
39 |
mirostat_eta=1.1
|
40 |
+
chat_format="llama-2"
|
41 |
)
|
42 |
# Logger setup
|
43 |
logging.basicConfig(level=logging.INFO)
|
|
|
83 |
et = time()
|
84 |
output["time"] = et - st
|
85 |
messages.append({'role': "assistant", "content": output['choices'][0]['message']['content']})
|
86 |
+
#print(messages)
|
87 |
return output
|
88 |
except Exception as e:
|
89 |
logger.error(f"Error in /complete endpoint: {e}")
|
|
|
131 |
|
132 |
if __name__ == "__main__":
|
133 |
import uvicorn
|
|
|
134 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|