TheBloke_dolphin-2.0-mistral-7B-GGUF_Playground

Paused

sam749 commited on Feb 9, 2024

Commit

56b6398

verified ·

1 Parent(s): dd9942a

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -1,6 +1,9 @@
 from ctransformers import AutoModelForCausalLM
-from fastapi import FastAPI, Form
-from pydantic import BaseModel
 #Model loading
 llm = AutoModelForCausalLM.from_pretrained("dolphin-2.0-mistral-7b.Q4_K_S.gguf",
@@ -8,27 +11,46 @@ model_type='mistral',
 max_new_tokens = 1096,
 threads = 3,
 )
-#Pydantic object
-class validation(BaseModel):
-    prompt: str
-#Fast API
-app = FastAPI()
-#Zephyr completion
-@app.post("/llm_on_cpu")
-async def stream(item: validation):
-    system_prompt = 'Below is an instruction that describes a task. Write a response that appropriately completes the request.'
     start,end = "<|im_start|>", "<|im_end|>"
-    prompt = f"<|im_start|>system\n{system_prompt}{end}\n{start}user\n{item.prompt.strip()}{end}\n"
     return llm(prompt)

 from ctransformers import AutoModelForCausalLM
+import gradio as gr
+greety = """
+A special thanks to [TheBloke](https://huggingface.co/TheBloke) for the quantized model and [Gathnex](https://medium.com/@gathnex) for his excellent tutorial.
+"""
 #Model loading
 llm = AutoModelForCausalLM.from_pretrained("dolphin-2.0-mistral-7b.Q4_K_S.gguf",
 max_new_tokens = 1096,
 threads = 3,
 )
+def stream(prompt,UL):
+    system_prompt = 'You are a hlepful AI assistant. Below is an instruction that describes a task. Write a response that appropriately completes the request.'
     start,end = "<|im_start|>", "<|im_end|>"
+    prompt = f"<|im_start|>system\n{system_prompt}{end}\n{start}user\n{prompt.strip()}{end}\n"
     return llm(prompt)
+css = """
+h1{
+    text-align: center;
+}
+#duplicate-button{
+    margin: auto;
+    color: whitesmoke;
+    background: #1565c0;
+}
+.contain{
+    max-width: 900px;
+    margin: auto;
+    padding-top: 1.5rem;
+}
+"""
+chat_interface = gr.ChatInterface(
+    fn = stream,
+    stop_btn='None',
+    examples = [
+        "what are 'Large Language Models'?",
+        "Explain OCEAN personality types"
+    ],
+)
+with gr.Blocks(css=css) as demo:
+    gr.HTML("<h1><center>Dolphin2.0_x_Mistral Demo</center></h1>")
+    gr.DuplicateButton(value="Duplicate Space for private use",elem_id="duplicate-button")
+    chat_interface.render()
+    gr.Markdown(greety)
+if __name__ == "__main__":
+    demo.queue(max_size=10).launch()