Update app.py
Browse files
app.py
CHANGED
@@ -9,21 +9,10 @@ from llama_cpp import Llama
|
|
9 |
SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
|
10 |
|
11 |
|
12 |
-
def get_message_tokens(model, role, content):
|
13 |
-
content = f"{role}\n{content}\n</s>"
|
14 |
-
content = content.encode("utf-8")
|
15 |
-
return model.tokenize(content, special=True)
|
16 |
-
|
17 |
-
|
18 |
-
def get_system_tokens(model):
|
19 |
-
system_message = {"role": "system", "content": SYSTEM_PROMPT}
|
20 |
-
return get_message_tokens(model, **system_message)
|
21 |
-
|
22 |
-
|
23 |
def load_model(
|
24 |
directory: str = ".",
|
25 |
-
model_name: str = "
|
26 |
-
model_url: str = "https://huggingface.co/IlyaGusev/
|
27 |
):
|
28 |
final_model_path = os.path.join(directory, model_name)
|
29 |
|
@@ -36,7 +25,7 @@ def load_model(
|
|
36 |
|
37 |
model = Llama(
|
38 |
model_path=final_model_path,
|
39 |
-
n_ctx=
|
40 |
)
|
41 |
|
42 |
print("Model loaded!")
|
@@ -59,35 +48,28 @@ def bot(
|
|
59 |
temp
|
60 |
):
|
61 |
model = MODEL
|
62 |
-
|
63 |
|
64 |
for user_message, bot_message in history[:-1]:
|
65 |
-
|
66 |
-
tokens.extend(message_tokens)
|
67 |
if bot_message:
|
68 |
-
|
69 |
-
tokens.extend(message_tokens)
|
70 |
|
71 |
last_user_message = history[-1][0]
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
generator = model.generate(
|
78 |
-
tokens,
|
79 |
top_k=top_k,
|
80 |
top_p=top_p,
|
81 |
-
|
82 |
-
)
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
partial_text += model.detokenize([token]).decode("utf-8", "ignore")
|
89 |
-
history[-1][1] = partial_text
|
90 |
-
yield history
|
91 |
|
92 |
|
93 |
with gr.Blocks(
|
|
|
9 |
SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def load_model(
|
13 |
directory: str = ".",
|
14 |
+
model_name: str = "saiga_nemo_12b.Q4_K_M.gguf",
|
15 |
+
model_url: str = "https://huggingface.co/IlyaGusev/saiga_nemo_12b_gguf/resolve/main/saiga_nemo_12b.Q4_K_M.gguf"
|
16 |
):
|
17 |
final_model_path = os.path.join(directory, model_name)
|
18 |
|
|
|
25 |
|
26 |
model = Llama(
|
27 |
model_path=final_model_path,
|
28 |
+
n_ctx=8192
|
29 |
)
|
30 |
|
31 |
print("Model loaded!")
|
|
|
48 |
temp
|
49 |
):
|
50 |
model = MODEL
|
51 |
+
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
|
52 |
|
53 |
for user_message, bot_message in history[:-1]:
|
54 |
+
messages.append({"role": "user", "content": user_message})
|
|
|
55 |
if bot_message:
|
56 |
+
messages.append({"role": "assistant", "content": bot_message})
|
|
|
57 |
|
58 |
last_user_message = history[-1][0]
|
59 |
+
messages.append({"role": "user", "content": last_user_message})
|
60 |
+
partial_text = ""
|
61 |
+
for part in model.create_chat_completion(
|
62 |
+
messages,
|
63 |
+
temperature=temp,
|
|
|
|
|
64 |
top_k=top_k,
|
65 |
top_p=top_p,
|
66 |
+
stream=True,
|
67 |
+
):
|
68 |
+
delta = part["choices"][0]["delta"]
|
69 |
+
if "content" in delta:
|
70 |
+
partial_text += delta["content"]
|
71 |
+
history[-1][1] = partial_text
|
72 |
+
yield history
|
|
|
|
|
|
|
73 |
|
74 |
|
75 |
with gr.Blocks(
|