chat-ggml

Runtime error

File size: 7,608 Bytes

from pathlib import Path
from urllib.parse import urlparse

import gradio as gr
import psutil
from ctransformers import AutoModelForCausalLM
from huggingface_hub import hf_hub_download


URL = "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q2_K.gguf"
repo_id = "/".join(urlparse(URL).path.strip("/").split("/")[:2])


model_file = Path(URL).name

_ = hf_hub_download(
    repo_id=repo_id,
    revision="main",
    filename=model_file,
    local_dir="models",
    # local_dir_use_symlinks=True,
)

llm = AutoModelForCausalLM.from_pretrained(
    _,
    model_type="llama",
    threads=psutil.cpu_count(logical=False),
)

TITLE = f"""<h2 align="center"> chat-ggml ({model_file})"""
USER_NAME = "User"
BOT_NAME = "Assistant"
DEFAULT_INSTRUCTIONS = """The following is a conversation between a highly knowledgeable and intelligent AI assistant and a human User. In the following interactions, User and Assistant will converse and Assistant will answer User's questions.
"""
RETRY_COMMAND = "/retry"
STOP_STR = f"\n{USER_NAME}:"
STOP_SUSPECT_LIST = [":", "\n", "User"]


def chat_accordion():
    with gr.Accordion("Parameters", open=False):
        temperature = gr.Slider(
            minimum=0.1,
            maximum=2.0,
            value=0.8,
            step=0.1,
            interactive=True,
            label="Temperature",
        )
        top_p = gr.Slider(
            minimum=0.1,
            maximum=0.99,
            value=0.9,
            step=0.01,
            interactive=True,
            label="p (nucleus sampling)",
        )
    return temperature, top_p


def format_chat_prompt(message: str, chat_history, instructions: str) -> str:
    instructions = instructions.strip(" ").strip("\n")
    prompt = instructions
    for turn in chat_history:
        user_message, bot_message = turn
        prompt = f"{prompt}\n{USER_NAME}: {user_message}\n{BOT_NAME}: {bot_message}"
    prompt = f"{prompt}\n{USER_NAME}: {message}\n{BOT_NAME}:"
    return prompt


def chat():
    with gr.Column(elem_id="chat_container"):
        with gr.Row():
            chatbot = gr.Chatbot(elem_id="chatbot")
        with gr.Row():
            inputs = gr.Textbox(
                placeholder=f"Hello {BOT_NAME} !!",
                label="Type an input and press Enter",
                max_lines=3,
            )

    with gr.Row(elem_id="button_container"):
        with gr.Column():
            retry_button = gr.Button("♻️ Retry")
        with gr.Column():
            delete_turn_button = gr.Button("✨ Undo")
        with gr.Column():
            clear_chat_button = gr.Button("🧽 Clear")

    gr.Examples(
        [
            ["Hey! Any recommendations for my holidays"],
            ["What's the Everett interpretation of quantum mechanics?"],
            [
                "Give me a list of the top 10 dive sites you would recommend around the world."
            ],
            ["Can you tell me more about deep-water soloing?"],
        ],
        inputs=inputs,
        label="Click on any example and press Enter in the input textbox!",
    )

    with gr.Row(elem_id="param_container"):
        with gr.Column():
            temperature, top_p = chat_accordion()
        with gr.Column():
            with gr.Accordion("Instructions", open=False):
                instructions = gr.Textbox(
                    placeholder="LLM instructions",
                    value=DEFAULT_INSTRUCTIONS,
                    lines=3,
                    interactive=True,
                    label="Instructions",
                    max_lines=10,
                    show_label=False,
                )
            # with gr.Accordion("Role #1", open=False):
            #     instructions = gr.Textbox(
            #         placeholder="Role #1 like ### Instruction",
            #         value=USER_NAME,
            #         lines=1,
            #         interactive=True,
            #         label="USER_NAME",
            #         max_lines=1,
            #         show_label=False,
            #     )
            # with gr.Accordion("Role #2", open=False):
            #     instructions = gr.Textbox(
            #         placeholder="Role #2 like ### Response",
            #         value=BOT_NAME,
            #         lines=1,
            #         interactive=True,
            #         label="BOT_NAME",
            #         max_lines=1,
            #         show_label=False,
            #     )

    def run_chat(
        message: str, chat_history, instructions: str,  temperature: float, top_p: float
    ):
        if not message or (message == RETRY_COMMAND and len(chat_history) == 0):
            yield chat_history
            return

        if message == RETRY_COMMAND and chat_history:
            prev_turn = chat_history.pop(-1)
            user_message, _ = prev_turn
            message = user_message

        prompt = format_chat_prompt(message, chat_history, instructions)
        chat_history = chat_history + [[message, ""]]
        stream = llm(
            prompt,
            max_new_tokens=1024,
            stop=[STOP_STR, "<|endoftext|>"],
            temperature=temperature,
            top_p=top_p,
            stream=True,
        )
        acc_text = ""
        for idx, response in enumerate(stream):
            text_token = response

            if text_token in STOP_SUSPECT_LIST:
                acc_text += text_token
                continue

            if idx == 0 and text_token.startswith(" "):
                text_token = text_token[1:]

            acc_text += text_token
            last_turn = list(chat_history.pop(-1))
            last_turn[-1] += acc_text
            chat_history = chat_history + [last_turn]
            yield chat_history
            acc_text = ""

    def delete_last_turn(chat_history):
        if chat_history:
            chat_history.pop(-1)
        return {chatbot: gr.update(value=chat_history)}

    def run_retry(
        message: str, chat_history, instructions: str, temperature: float, top_p: float
    ):
        yield from run_chat(
            RETRY_COMMAND, chat_history, instructions, temperature, top_p
        )

    def clear_chat():
        return []

    inputs.submit(
        run_chat,
        [inputs, chatbot, instructions, temperature, top_p],
        outputs=[chatbot],
        show_progress="minimal",
    )
    inputs.submit(lambda: "", inputs=None, outputs=inputs)
    delete_turn_button.click(delete_last_turn, inputs=[chatbot], outputs=[chatbot])
    retry_button.click(
        run_retry,
        [inputs, chatbot, instructions, temperature, top_p],
        outputs=[chatbot],
        show_progress="minimal",
    )
    clear_chat_button.click(clear_chat, [], chatbot)



def get_demo():
    with gr.Blocks(
        # css=None
        # css="""#chat_container {width: 700px; margin-left: auto; margin-right: auto;}
        #        #button_container {width: 700px; margin-left: auto; margin-right: auto;}
        #        #param_container {width: 700px; margin-left: auto; margin-right: auto;}"""
        css="""#chatbot {
    font-size: 14px;
    min-height: 300px;
}"""
    ) as demo:
        gr.HTML(TITLE)

        with gr.Row():
            with gr.Column():
                gr.Markdown(
                    """**Chat, brainstorm ideas, discuss your holiday plans, and more!**
                    """
                )

        chat()

    return demo


if __name__ == "__main__":
    demo = get_demo()
    demo.queue(max_size=64, concurrency_count=8)
    demo.launch(server_name="0.0.0.0", server_port=7860)