Spaces:

artificialguybr
/

LongWriter-glm4-9b-ZERO

Running on Zero

File size: 3,513 Bytes

f6fc9d2
b58bfab
 
f6fc9d2
b58bfab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6fc9d2
b58bfab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
be3c447
b58bfab
 
be3c447
b58bfab
 
 
 
be3c447
 
 
 
b58bfab
 
 
 
 
 
 
 
 
 
f6fc9d2
b58bfab
 
 
 
f6fc9d2
b58bfab
 
 
 
 
 
 
f6fc9d2
b58bfab
 
 
 
 
 
 
f6fc9d2
b58bfab
 
 
 
 
 
 
f6fc9d2
b58bfab
 
 
 
 
 
 
f6fc9d2
b58bfab

import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
from threading import Thread

MODEL = "THUDM/LongWriter-glm4-9b"

TITLE = "<h1><center>LongWriter-glm4-9b</center></h1>"

PLACEHOLDER = """
<center>
<p>Hi! I'm LongWriter-glm4-9b, capable of generating 10,000+ words. How can I assist you today?</p>
</center>
"""

CSS = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(MODEL, torch_dtype=torch.bfloat16, trust_remote_code=True, device_map="auto")
model = model.eval()

@spaces.GPU
def stream_chat(
    message: str, 
    history: list,
    system_prompt: str,
    temperature: float = 0.5, 
    max_new_tokens: int = 32768, 
    top_p: float = 1.0, 
    top_k: int = 50,
):
    print(f'message: {message}')
    print(f'history: {history}')

    chat_history = []
    for prompt, answer in history:
        chat_history.append((prompt, answer))

    response, _ = model.chat(
        tokenizer,
        message,
        history=chat_history,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
    )
    
    yield response


chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)

with gr.Blocks(css=CSS, theme="soft") as demo:
    gr.HTML(TITLE)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_classes="duplicate-button")
    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Textbox(
                value="You are a helpful assistant capable of generating long-form content.",
                label="System Prompt",
                render=False,
            ),
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.5,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=1024,
                maximum=32768,
                step=1024,
                value=32768,
                label="Max new tokens",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=1.0,
                label="Top p",
                render=False,
            ),
            gr.Slider(
                minimum=1,
                maximum=100,
                step=1,
                value=50,
                label="Top k",
                render=False,
            ),
        ],
        examples=[
            ["Write a 10000-word comprehensive guide on artificial intelligence and its applications."],
            ["Create a detailed 5000-word business plan for a space tourism company."],
            ["Compose a 3000-word short story about time travel and its consequences."],
            ["Develop a 7000-word research proposal on the potential of quantum computing in cryptography."],
        ],
        cache_examples=False,
    )

if __name__ == "__main__":
    demo.launch()