Spaces:
Runtime error
Runtime error
import gradio as gr | |
import spaces | |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
from langchain.llms.base import LLM | |
from langchain.memory import ConversationBufferMemory | |
from langchain.chains import LLMChain, ConversationChain | |
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline | |
from langchain.prompts import PromptTemplate, ChatPromptTemplate | |
def initialize_model_and_tokenizer(model_name="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model"): | |
model = AutoModelForCausalLM.from_pretrained(model_name) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
return model, tokenizer | |
def load_pipeline(): | |
model, tokenizer = initialize_model_and_tokenizer() | |
pipe = pipeline("text-generation", | |
model= model, | |
tokenizer = tokenizer, | |
max_new_tokens = 20, | |
top_k = 30, | |
early_stopping=True, | |
num_beams = 2, | |
temperature = 0.1, | |
repetition_penalty = 1.03) | |
llm = HuggingFacePipeline(pipeline = pipe) | |
return llm | |
def chat_interface(inputs): | |
question = inputs["input"] | |
chat_history = inputs["history"] | |
# Assuming `chain` is your instance of ConversationalRetrievalChain | |
result = chain.run({"input": question, "history": chat_history}) | |
return result["response"] | |
llm = load_pipeline() | |
template = """<<SYS>> | |
You are an AI having conversation with a human. Below is an instruction that describes a task. | |
Write a response that appropriately completes the request. | |
Reply with the most helpful and logic answer. During the conversation you need to ask the user | |
the following questions to complete the hotel booking task. | |
1) Where would you like to stay and when? | |
2) How many people are staying in the room? | |
3) Do you prefer any ammenities like breakfast included or gym? | |
4) What is your name, your email address and phone number? | |
Make sure you receive a logical answer from the user from every question to complete the hotel | |
booking process. | |
<</SYS>> | |
Previous conversation: | |
{history} | |
Human: {input} | |
AI:""" | |
prompt = PromptTemplate(template=template, input_variables=["history", "input"]) | |
memory = ConversationBufferMemory(memory_key="history", llm = llm, prompt = prompt) | |
llm_chain = ConversationChain(prompt=prompt, llm=llm, memory = memory) | |
with gr.Blocks() as demo: | |
gr.Markdown("Hotel Booking Assistant Chat 🤗") | |
chatbot = gr.Chatbot(label="Chat history") | |
message = gr.Textbox(label="Ask me a question!") | |
clear = gr.Button("Clear") | |
#llm_chain, llm = init_chain(model, tokenizer) | |
# Convert chat history to list of tuples | |
chat_history_tuples = [] | |
for message in chat_history: | |
chat_history_tuples.append((message[0], message[1])) | |
result = llm_chain({"input": query, "history": chat_history_tuples}) | |
iface = gr.Interface( | |
fn=chat_interface, | |
inputs=[ | |
gr.inputs.Textbox(lines=1, label="Question"), | |
gr.inputs.Textbox(lines=5, label="Chat History"), | |
], | |
outputs="text" | |
) | |
iface.launch() | |