import gradio as gr
import os
from huggingface_hub.file_download import http_get
from llama_cpp import Llama
SYSTEM_PROMPT = "Ты — Сайга, русскоязычный автоматический ассистент. Ты разговариваешь с людьми и помогаешь им."
def load_model(
directory: str = ".",
model_name: str = "saiga_nemo_12b.Q4_K_M.gguf",
model_url: str = "https://huggingface.co/IlyaGusev/saiga_nemo_12b_gguf/resolve/main/saiga_nemo_12b.Q4_K_M.gguf"
):
final_model_path = os.path.join(directory, model_name)
print("Downloading all files...")
if not os.path.exists(final_model_path):
with open(final_model_path, "wb") as f:
http_get(model_url, f)
os.chmod(final_model_path, 0o777)
print("Files downloaded!")
model = Llama(
model_path=final_model_path,
n_ctx=8192
)
print("Model loaded!")
return model
MODEL = load_model()
def user(message, history):
new_history = history + [[message, None]]
return "", new_history
def bot(
history,
system_prompt,
top_p,
top_k,
temp
):
model = MODEL
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for user_message, bot_message in history[:-1]:
messages.append({"role": "user", "content": user_message})
if bot_message:
messages.append({"role": "assistant", "content": bot_message})
last_user_message = history[-1][0]
messages.append({"role": "user", "content": last_user_message})
partial_text = ""
for part in model.create_chat_completion(
messages,
temperature=temp,
top_k=top_k,
top_p=top_p,
stream=True,
):
delta = part["choices"][0]["delta"]
if "content" in delta:
partial_text += delta["content"]
history[-1][1] = partial_text
yield history
with gr.Blocks(
theme=gr.themes.Soft()
) as demo:
favicon = ''
gr.Markdown(
f"""