Spaces:
Sleeping
Sleeping
import streamlit as st | |
import openai | |
from openai import OpenAI | |
from brain import get_index_for_documents | |
from langchain.chains import RetrievalQA | |
from langchain_community.chat_models import ChatOpenAI | |
from langchain_community.embeddings import OpenAIEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from dotenv import load_dotenv | |
import os | |
from evaluation_module import RAGEvaluator | |
# Set the title for the Streamlit app | |
st.title("DocuChat with Evaluation") | |
# Set up the OpenAI client | |
client = OpenAI() | |
load_dotenv() # Load variables from .env | |
openai.api_key = os.getenv("OPENAI_API_KEY") | |
# Initialize evaluator | |
evaluator = RAGEvaluator() | |
# Function to create vector database from different file types | |
def create_vectordb(files, filenames, raw_texts): | |
with st.spinner("Creating vector database..."): | |
vectordb = get_index_for_documents( | |
[file.getvalue() for file in files if file.type == "application/pdf"], | |
filenames, | |
[raw_text for raw_text in raw_texts.splitlines() if raw_text.strip()], | |
openai.api_key | |
) | |
return vectordb | |
# Upload files using Streamlit's file uploader | |
uploaded_files = st.file_uploader("Upload your documents (PDF or TXT)", type=["pdf", "txt"], accept_multiple_files=True, label_visibility="hidden") | |
# Text area for raw text input | |
raw_text = st.text_area("Or enter your raw text here:", height=150) | |
# If files are uploaded or raw text is provided, create the vectordb and store it in the session state | |
if uploaded_files or raw_text: | |
file_names = [file.name for file in uploaded_files] if uploaded_files else [] | |
st.session_state["vectordb"] = create_vectordb(uploaded_files, file_names, raw_text) | |
# Define the template for the chatbot prompt | |
prompt_template = """ | |
You are a helpful Assistant who answers to users questions based on multiple contexts given to you. | |
Keep your answer short and to the point. | |
The evidence is the context of the document extract with metadata. | |
Carefully focus on the metadata, especially 'filename' and 'page' whenever answering. | |
Make sure to add filename and page number at the end of the sentence you are citing to. | |
Also be able to give a summary based on the document extract given to you, but do not hallucinate. | |
Reply "Not applicable" if text is irrelevant. | |
The document content is: | |
{doc_extract} | |
""" | |
# Get the current prompt from the session state or set a default value | |
prompt = st.session_state.get("prompt", [{"role": "system", "content": "none"}]) | |
# Display previous chat messages | |
for message in prompt: | |
if message["role"] != "system": | |
with st.chat_message(message["role"]): | |
st.write(message["content"]) | |
# Get the user's question using Streamlit's chat input | |
question = st.chat_input("Ask anything") | |
# Handle the user's question | |
if question: | |
vectordb = st.session_state.get("vectordb", None) | |
if not vectordb: | |
with st.chat_message("assistant"): | |
st.write("You need to provide a PDF, TXT file, or raw text.") | |
st.stop() | |
# Search the vectordb for similar content to the user's question | |
search_results = vectordb.similarity_search(question, k=3) | |
doc_extract = "\n".join([result.page_content for result in search_results]) | |
# Update the prompt with the document extract | |
prompt[0] = { | |
"role": "system", | |
"content": prompt_template.format(doc_extract=doc_extract), | |
} | |
# Add the user's question to the prompt and display it | |
prompt.append({"role": "user", "content": question}) | |
with st.chat_message("user"): | |
st.write(question) | |
# Display an empty assistant message while waiting for the response | |
with st.chat_message("assistant"): | |
botmsg = st.empty() | |
# Call ChatGPT with streaming and display the response as it comes | |
response = [] | |
result = "" | |
for chunk in client.chat.completions.create( | |
model="gpt-3.5-turbo", messages=prompt, stream=True | |
): | |
text = chunk.choices[0].delta.content | |
if text is not None: | |
response.append(text) | |
result = "".join(response).strip() | |
botmsg.write(result) | |
# Add the assistant's response to the prompt | |
prompt.append({"role": "assistant", "content": result}) | |
# Store the updated prompt in the session state | |
st.session_state["prompt"] = prompt | |
# Evaluation Section | |
st.write("## Evaluation Results") | |
if st.button("Evaluate Response"): | |
if doc_extract and result: | |
# Perform evaluation | |
metrics = evaluator.evaluate_all(result, doc_extract) | |
# Display metrics with explanations | |
st.write(f"**BLEU Score**: {metrics['BLEU']:.2f}") | |
st.write("BLEU measures the overlap between the generated output and reference text based on n-grams. Range: 0-100. Higher scores indicate better match.") | |
st.write(f"**ROUGE-1 Score**: {metrics['ROUGE-1']:.2f}") | |
st.write("ROUGE-1 measures the overlap of unigrams between the generated output and reference text. Range: 0-1. Higher scores indicate better match.") | |
st.write(f"**BERT Precision**: {metrics['BERT P']:.2f}") | |
st.write(f"**BERT Recall**: {metrics['BERT R']:.2f}") | |
st.write(f"**BERT F1 Score**: {metrics['BERT F1']:.2f}") | |
st.write("BERTScore evaluates the semantic similarity between the generated output and reference text using BERT embeddings. Range: 0-1. Higher scores indicate better semantic similarity.") | |
st.write(f"**Perplexity**: {metrics['Perplexity']:.2f}") | |
st.write("Perplexity measures how well a language model predicts the text. Range: 1 to ∞. Lower values indicate better fluency and coherence.") | |
st.write(f"**Diversity**: {metrics['Diversity']:.2f}") | |
st.write("Diversity measures the uniqueness of bigrams in the generated output. Range: 0-1. Higher values indicate more diverse and varied output.") | |