Spaces:

Mikhil-jivus
/

EndpointTesting

Runtime error

File size: 2,039 Bytes

a445827
c994f1f
8f3eaf7
bbfe136
 
91b03f9
e3f498d
 
 
0a5ec67
e3f498d
bbfe136
 
 
 
 
a445827
b17ecc2
a445827
 
 
 
 
 
 
 
e3f498d
a445827
e3f498d
 
 
 
 
a445827
e3f498d
 
 
 
 
 
 
 
a445827
 
e3f498d
 
721cdc9
e3f498d
 
b17ecc2
e3f498d
 
 
a445827
 
 
e3f498d
a445827
 
 
 
 
 
 
 
 
 
 
 
e3f498d
a445827
e3f498d

import gradio as gr
import os
from huggingface_hub import InferenceClient
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

access_token = os.getenv('HF_TOKEN')
# Load the tokenizer and model from the Hugging Face repository
tokenizer = AutoTokenizer.from_pretrained(repo_id, token=access_token)


client = InferenceClient("text-generation",model="Mikhil-jivus/Llama-32-3B-FineTuned",tokenizer=tokenizer,token = access_token)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()