Update app.py
Browse files
app.py
CHANGED
@@ -12,8 +12,8 @@ llama = llama_cpp.Llama.from_pretrained(
|
|
12 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
13 |
verbose=False,
|
14 |
n_ctx=4096,
|
15 |
-
n_threads=4,
|
16 |
n_gpu_layers=0,
|
|
|
17 |
)
|
18 |
# Logger setup
|
19 |
logging.basicConfig(level=logging.INFO)
|
@@ -74,7 +74,11 @@ async def complete(
|
|
74 |
],
|
75 |
temperature=temperature,
|
76 |
seed=seed,
|
|
|
|
|
77 |
)
|
|
|
|
|
78 |
et = time()
|
79 |
output["time"] = et - st
|
80 |
return output
|
|
|
12 |
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
13 |
verbose=False,
|
14 |
n_ctx=4096,
|
|
|
15 |
n_gpu_layers=0,
|
16 |
+
chat_format="llama-2"
|
17 |
)
|
18 |
# Logger setup
|
19 |
logging.basicConfig(level=logging.INFO)
|
|
|
74 |
],
|
75 |
temperature=temperature,
|
76 |
seed=seed,
|
77 |
+
echo=True,
|
78 |
+
stream=True
|
79 |
)
|
80 |
+
for item in output:
|
81 |
+
print(item['choices'][0]['text'], end='')
|
82 |
et = time()
|
83 |
output["time"] = et - st
|
84 |
return output
|