Spaces:
Runtime error
Runtime error
File size: 2,062 Bytes
a445827 c994f1f 8f3eaf7 bbfe136 91b03f9 e3f498d f5504b1 e3f498d bbfe136 9b150ae bbfe136 a9caed4 a445827 b17ecc2 a445827 e3f498d a445827 e3f498d a445827 e3f498d a445827 e3f498d 721cdc9 e3f498d b17ecc2 e3f498d a445827 e3f498d a445827 e3f498d a445827 e3f498d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import gradio as gr
import os
from huggingface_hub import InferenceClient
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
repo_id = "Mikhil-jivus/Llama-32-3B-FineTuned"
access_token = os.getenv('HF_TOKEN')
# Load the tokenizer and model from the Hugging Face repository
tokenizer = AutoTokenizer.from_pretrained(repo_id, trust_remote_code=True, token=access_token)
client = InferenceClient(model=repo_id,tokenizer=tokenizer,token = access_token)
def respond(
message,
history: list[tuple[str, str]],
system_message,
max_tokens,
temperature,
top_p,
):
messages = [{"role": "system", "content": system_message}]
for val in history:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
response = ""
for message in client.chat_completion(
messages,
max_tokens=max_tokens,
stream=True,
temperature=temperature,
top_p=top_p,
):
token = message.choices[0].delta.content
response += token
yield response
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p (nucleus sampling)",
),
],
)
if __name__ == "__main__":
demo.launch() |