import os import gradio as gr import ollama public_ip = os.environ['PUBLIC_IP'] port = os.environ['PORT'] model = 'llama3.1' from ollama import Client client = Client(host=f'http://{public_ip}:{port}') def format_history(msg: str, history: list[list[str, str]], system_prompt: str): chat_history = [{"role": "system", "content":system_prompt}] for query, response in history: chat_history.append({"role": "user", "content": query}) chat_history.append({"role": "assistant", "content": response}) chat_history.append({"role": "user", "content": msg}) return chat_history def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float): chat_history = format_history(msg, history, system_prompt) response = client.chat(model=model, stream=True, messages=chat_history, options={'top_k':top_k, 'top_p':top_p, 'temperature':temperature}) message = "" for partial_resp in response: token = partial_resp["message"]["content"] message += token yield message chatbot = gr.ChatInterface( generate_response, chatbot=gr.Chatbot( avatar_images=["user.png", "chatbot.png"], height="64vh" ), additional_inputs=[ gr.Textbox("You are a helpful assistant and always try to answer user queries to the best of your ability.", label="System Prompt"), gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"), gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"), gr.Slider(0.0,2.0, label="temperature", value=0.4, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"), ], title="Trashcan AI", description="LLama3.1 hosted on a 2013 \"Trashcan\" Mac Pro with ollama", theme="finlaymacklon/smooth_slate", submit_btn="Send", retry_btn="🔄 Regenerate Response", undo_btn="↩ Delete Previous", clear_btn="🗑️ Clear Chat" ) chatbot.queue().launch()