File size: 2,725 Bytes
a445827
6ac164c
bbfe136
6ac164c
 
 
 
 
 
 
91b03f9
e3f498d
6ac164c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbfe136
 
6ac164c
 
 
 
a445827
6ac164c
 
 
b17ecc2
6ac164c
 
a445827
6ac164c
 
 
 
 
 
 
 
 
 
 
 
 
a445827
6ac164c
 
 
 
e3f498d
6ac164c
 
e3f498d
6ac164c
 
721cdc9
e3f498d
b17ecc2
a445827
6ac164c
 
 
 
 
 
 
 
 
 
 
604284f
 
6ac164c
 
 
 
 
 
 
 
 
 
e3f498d
6ac164c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import gradio as gr

import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TextIteratorStreamer,
    pipeline,
)
from threading import Thread

access_token = os.getenv('HF_TOKEN')

# The huggingface model id for Finetuned model
checkpoint = "Mikhil-jivus/Llama-32-3B-FineTuned"

# Download and load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True,token=access_token)
model = AutoModelForCausalLM.from_pretrained(
    checkpoint, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True,token=access_token
)

# Text generation pipeline
phi2 = pipeline(
    "text-generation",
    tokenizer=tokenizer,
    model=model,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id,
    device_map="auto",
)


# Function that accepts a prompt and generates text using the phi2 pipeline
def generate(message, chat_history, max_new_tokens):
    instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
    final_prompt = f"Instruction: {instruction}\n"

    for sent, received in chat_history:
        final_prompt += "User: " + sent + "\n"
        final_prompt += "Assistant: " + received + "\n"

    final_prompt += "User: " + message + "\n"
    final_prompt += "Output:"

    # Streamer
    streamer = TextIteratorStreamer(
        tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0
    )
    thread = Thread(
        target=phi2,
        kwargs={
            "text_inputs": final_prompt,
            "max_new_tokens": max_new_tokens,
            "streamer": streamer,
        },
    )
    thread.start()

    generated_text = ""
    for word in streamer:
        generated_text += word
        response = generated_text.strip()

        if "User:" in response:
            response = response.split("User:")[0].strip()

        if "Assistant:" in response:
            response = response.split("Assistant:")[1].strip()

        yield response


# Chat interface with gradio
with gr.Blocks() as demo:
    gr.Markdown(
        """
  # Jivus AI Chatbot Demo
  This chatbot was created using Llama 3 billion parameter Transformer model. 
  """
    )

    tokens_slider = gr.Slider(
        8,
        512,
        value=256,
        label="Maximum new tokens",
        info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.",
    )

    chatbot = gr.ChatInterface(
        fn=generate,
        additional_inputs=[tokens_slider],
        stop_btn=None,
        examples=[["Who is Leonhard Euler?"]],
    )

demo.queue().launch()