Spaces:

Mikhil-jivus
/

EndpointTesting

Runtime error

File size: 2,064 Bytes

a445827
c994f1f
8f3eaf7
bbfe136
 
91b03f9
e3f498d
 
 
f5504b1
e3f498d
bbfe136
a9caed4
bbfe136
 
a9caed4
a445827
b17ecc2
a445827
 
 
 
 
 
 
 
e3f498d
a445827
e3f498d
 
 
 
 
a445827
e3f498d
 
 
 
 
 
 
 
a445827
 
e3f498d
 
721cdc9
e3f498d
 
b17ecc2
e3f498d
 
 
a445827
 
 
e3f498d
a445827
 
 
 
 
 
 
 
 
 
 
 
e3f498d
a445827
e3f498d

import gradio as gr
import os
from huggingface_hub import InferenceClient
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
repo_id = "Mikhil-jivus/Llama-32-3B-FineTuned"
access_token = os.getenv('HF_TOKEN')
# Load the tokenizer and model from the Hugging Face repository
tokenizer = AutoTokenizer.from_pretrained(repo_id, , trust_remote_code=True, token=access_token)


client = InferenceClient(model=repo_id,tokenizer=tokenizer,token = access_token)


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()