|
import os |
|
import pandas as pd |
|
import logging |
|
from datasets import load_dataset |
|
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline |
|
from langchain_chroma import Chroma |
|
from langchain_core.prompts import PromptTemplate |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.runnables import RunnablePassthrough |
|
import gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
|
|
|
|
|
|
|
|
|
|
ds = load_dataset("maxpro291/bankfaqs_dataset") |
|
train_ds = ds['train'] |
|
data = train_ds[:] |
|
|
|
|
|
questions = [] |
|
answers = [] |
|
for entry in data['text']: |
|
if entry.startswith("Q:"): |
|
questions.append(entry) |
|
elif entry.startswith("A:"): |
|
answers.append(entry) |
|
|
|
|
|
Bank_Data = pd.DataFrame({'question': questions, 'answer': answers}) |
|
|
|
|
|
context_data = [] |
|
for i in range(len(Bank_Data)): |
|
context = f"Question: {Bank_Data.iloc[i]['question']} Answer: {Bank_Data.iloc[i]['answer']}" |
|
context_data.append(context) |
|
|
|
|
|
|
|
|
|
|
|
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
vectorstore = Chroma.from_texts( |
|
texts=context_data, |
|
embedding=embed_model, |
|
persist_directory="./chroma_db_bank" |
|
) |
|
|
|
|
|
retriever = vectorstore.as_retriever() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_name = "gpt2" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
|
|
|
|
pipe = pipeline( |
|
"text-generation", |
|
model=model, |
|
tokenizer=tokenizer, |
|
max_length=512, |
|
temperature=0.7, |
|
top_p=0.95, |
|
repetition_penalty=1.15 |
|
) |
|
|
|
|
|
huggingface_model = HuggingFacePipeline(pipeline=pipe) |
|
|
|
|
|
|
|
|
|
|
|
template = ( |
|
"You are a helpful banking assistant. " |
|
"Use the provided context if it is relevant to answer the question. " |
|
"If not, answer using your general banking knowledge.\n" |
|
"Question: {question}\n" |
|
"Answer:" |
|
) |
|
rag_prompt = PromptTemplate.from_template(template) |
|
|
|
|
|
rag_chain = ( |
|
{"context": retriever, "question": RunnablePassthrough()} |
|
| rag_prompt |
|
| huggingface_model |
|
| StrOutputParser() |
|
) |
|
|
|
|
|
|
|
|
|
def rag_memory_stream(message, history): |
|
partial_text = "" |
|
|
|
for new_text in rag_chain.stream(message): |
|
partial_text += new_text |
|
yield partial_text |
|
|
|
|
|
examples = [ |
|
"I want to open an account", |
|
"What is a savings account?", |
|
"How do I use an ATM?", |
|
"How can I resolve a bank account issue?" |
|
] |
|
|
|
title = "Your Personal Banking Assistant 💬" |
|
description = ( |
|
"Welcome! I’m here to answer your questions about banking and related topics. " |
|
"Ask me anything, and I’ll do my best to assist you." |
|
) |
|
|
|
|
|
demo = gr.ChatInterface( |
|
fn=rag_memory_stream, |
|
title=title, |
|
description=description, |
|
examples=examples, |
|
theme="glass", |
|
) |
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch(share=True) |
|
|