Spaces:
Runtime error
Runtime error
File size: 2,734 Bytes
a445827 0fafb5e bbfe136 6ac164c 91b03f9 e3f498d 6ac164c bbfe136 6ac164c a445827 6ac164c b17ecc2 6ac164c a445827 6ac164c a445827 6ac164c e3f498d 6ac164c e3f498d 6ac164c 721cdc9 e3f498d b17ecc2 a445827 6ac164c 604284f 6ac164c e3f498d 6ac164c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import gradio as gr
import os
import torch
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer,
pipeline,
)
from threading import Thread
access_token = os.getenv('HF_TOKEN')
# The huggingface model id for Finetuned model
checkpoint = "Mikhil-jivus/Llama-32-3B-FineTuned"
# Download and load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True,token=access_token)
model = AutoModelForCausalLM.from_pretrained(
checkpoint, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True,token=access_token
)
# Text generation pipeline
phi2 = pipeline(
"text-generation",
tokenizer=tokenizer,
model=model,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=tokenizer.eos_token_id,
device_map="auto",
)
# Function that accepts a prompt and generates text using the phi2 pipeline
def generate(message, chat_history, max_new_tokens):
instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
final_prompt = f"Instruction: {instruction}\n"
for sent, received in chat_history:
final_prompt += "User: " + sent + "\n"
final_prompt += "Assistant: " + received + "\n"
final_prompt += "User: " + message + "\n"
final_prompt += "Output:"
# Streamer
streamer = TextIteratorStreamer(
tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
)
thread = Thread(
target=phi2,
kwargs={
"text_inputs": final_prompt,
"max_new_tokens": max_new_tokens,
"streamer": streamer,
},
)
thread.start()
generated_text = ""
for word in streamer:
generated_text += word
response = generated_text.strip()
if "User:" in response:
response = response.split("User:")[0].strip()
if "Assistant:" in response:
response = response.split("Assistant:")[1].strip()
yield response
# Chat interface with gradio
with gr.Blocks() as demo:
gr.Markdown(
"""
# Jivus AI Chatbot Demo
This chatbot was created using Llama 3 billion parameter Transformer model.
"""
)
tokens_slider = gr.Slider(
8,
512,
value=256,
label="Maximum new tokens",
info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.",
)
chatbot = gr.ChatInterface(
fn=generate,
additional_inputs=[tokens_slider],
stop_btn=None,
examples=[["Who is Leonhard Euler?"]],
)
demo.queue().launch() |