Spaces:
Running
Running
File size: 2,711 Bytes
f818900 fff2d97 f818900 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
import os
import gradio as gr
import ollama
public_ip = os.environ['PUBLIC_IP']
port = os.environ['PORT']
model = 'llama3.1'
from ollama import Client
client = Client(host=f'http://{public_ip}:{port}')
def format_history(msg: str, history: list[list[str, str]], system_prompt: str):
chat_history = [{"role": "system", "content":system_prompt}]
for query, response in history:
chat_history.append({"role": "user", "content": query})
chat_history.append({"role": "assistant", "content": response})
chat_history.append({"role": "user", "content": msg})
return chat_history
def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float):
chat_history = format_history(msg, history, system_prompt)
response = client.chat(model=model,
stream=True,
messages=chat_history,
options={'top_k':top_k, 'top_p':top_p, 'temperature':temperature})
message = ""
for partial_resp in response:
token = partial_resp["message"]["content"]
message += token
yield message
chatbot = gr.ChatInterface(
generate_response,
chatbot=gr.Chatbot(
avatar_images=["user.png", "chatbot.png"],
height="64vh"
),
additional_inputs=[
gr.Textbox("You are a helpful assistant and always try to answer user queries to the best of your ability.", label="System Prompt"),
gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"),
gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"),
gr.Slider(0.0,2.0, label="temperature", value=0.4, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"),
],
title="Trashcan AI",
description="LLama3.1 hosted on a 2013 \"Trashcan\" Mac Pro with ollama",
theme="finlaymacklon/smooth_slate",
submit_btn="Send",
retry_btn="🔄 Regenerate Response",
undo_btn="↩ Delete Previous",
clear_btn="🗑️ Clear Chat"
)
chatbot.queue().launch()
|