Spaces:

Mikhil-jivus
/

EndpointTesting

Runtime error

File size: 3,037 Bytes

a445827
0fafb5e
bbfe136
6ac164c
 
 
 
 
 
 
91b03f9
e3f498d
6ac164c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbfe136
 
6ac164c
 
5a7f128
6ac164c
a445827
6ac164c
 
 
b17ecc2
6ac164c
 
a445827
6ac164c
 
 
 
 
 
 
 
 
 
 
 
 
a445827
6ac164c
 
 
 
e3f498d
6ac164c
 
e3f498d
6ac164c
 
721cdc9
e3f498d
b17ecc2
a445827
6ac164c
 
 
 
 
 
 
 
 
 
5a7f128
 
604284f
5a7f128
6ac164c
 
 
 
 
 
 
 
e3f498d
6ac164c

import gradio as gr
import os
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
    pipeline,
)
from threading import Thread

access_token = os.getenv('HF_TOKEN')

# The huggingface model id for Finetuned model
checkpoint = "Mikhil-jivus/Llama-32-3B-FineTuned"

# Download and load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True,token=access_token)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True,token=access_token
)

# Text generation pipeline
phi2 = pipeline(
    "text-generation",
    tokenizer=tokenizer,
    model=model,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    device_map="auto",
)


# Function that accepts a prompt and generates text using the phi2 pipeline
def generate(message, chat_history, max_new_tokens):
    instruction = "You are Abhishek Rai, Lead sales At PerfectSprout. Here is your Personality: He is energetic smart and has 17 years of sales experience. You are to strictly follow your personality and respond like Abhishek Rai on a cold Call. Be quick to judge the relevance of the pitch and proceed only if the product/pitch promises to add some value to your work. You possess comprehensive real-world knowledge, enabling you to scrutinize and challenge any implausible claims, inaccuracies, or conventional fallacies presented by a sales rep during a cold call."
    final_prompt = f"Instruction: {instruction}\n"

    for sent, received in chat_history:
        final_prompt += "User: " + sent + "\n"
        final_prompt += "Assistant: " + received + "\n"

    final_prompt += "User: " + message + "\n"
    final_prompt += "Output:"

    # Streamer
    streamer = TextIteratorStreamer(
        tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
    )
    thread = Thread(
        target=phi2,
        kwargs={
            "text_inputs": final_prompt,
            "max_new_tokens": max_new_tokens,
            "streamer": streamer,
        },
    )
    thread.start()

    generated_text = ""
    for word in streamer:
        generated_text += word
        response = generated_text.strip()

        if "User:" in response:
            response = response.split("User:")[0].strip()

        if "Assistant:" in response:
            response = response.split("Assistant:")[1].strip()

        yield response


# Chat interface with gradio
with gr.Blocks() as demo:
    gr.Markdown(
        """
  # Jivus AI Chatbot Demo
  This chatbot was created using Llama 3 billion parameter Transformer model. 
  """
    )

    tokens_slider = gr.Slider(
        minimum=8,
        maximum=512,
        value=256,
        label="Maximum new tokens"
    )

    chatbot = gr.ChatInterface(
        fn=generate,
        additional_inputs=[tokens_slider],
        stop_btn=None,
        examples=[["Who is Leonhard Euler?"]],
    )

demo.queue().launch()