Spaces:

amine-01
/

Rabat-Touristic-Guide

Sleeping

App Files Files Community

amine-01 commited on Sep 14, 2024

Commit

d66126a

verified ·

1 Parent(s): b6ba543

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -115

app.py CHANGED Viewed

@@ -1,136 +1,79 @@
 import streamlit as st
-from langchain.prompts import PromptTemplate
-from langchain.chains.question_answering import load_qa_chain
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain_community.vectorstores import Chroma
-from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
-from dotenv import load_dotenv
-import PyPDF2
-import os
-import io
 from langchain.document_loaders import PyPDFDirectoryLoader
 from langchain.embeddings import SentenceTransformerEmbeddings
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
-# Define SPEAKER_TYPES to distinguish between user and bot roles
-SPEAKER_TYPES = {
-    "USER": "user",
-    "BOT": "bot"
-}
-# Define the initial prompt to show when the app starts
-initial_prompt = {
-    'role': SPEAKER_TYPES["BOT"],
-    'content': "Hello! I am your Gemini Pro RAG chatbot. You can ask me questions after uploading a PDF."
-}
-# --- Your RAG chatbot logic ---
-source_data_folder = "MyData"
-text_splitter = RecursiveCharacterTextSplitter(
-    separators=["\n\n", "\n", ". ", " ", ""],
-    chunk_size=2000,
-    chunk_overlap=200
-)
 embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
-path_db = "/content/VectorDB"
-llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="AIzaSyAnsIVS4x_7lJLe9AYXGLV8FRwUTQkB-1w")
-# --- Streamlit app starts here ---
-# Set up the Streamlit app configuration
-st.set_page_config(
-    page_title="Gemini Pro RAG App",
-    page_icon="🔍",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-# Initialize session state for chat history and vectorstore (PDF context)
-if 'chat_history' not in st.session_state:
-    st.session_state.chat_history = [initial_prompt]
-if 'vectorstore' not in st.session_state:
-    st.session_state.vectorstore = None
-# Function to clear chat history
-def clear_chat_history():
-    st.session_state.chat_history = [initial_prompt]
-# Extract text from PDF
-def extract_text_from_pdf(pdf_bytes):
-    pdf_reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
-    text = ""
-    for page in pdf_reader.pages:
-        text += page.extract_text()
-    return text
-# Initialize vectorstore
-def initialize_vector_index(text):
-    docs = [{'page_content': text}]
-    splits = text_splitter.split_documents(docs)
-    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=path_db)
-    return vectorstore
-# Sidebar configuration
-with st.sidebar:
-    st.title('🔍 Gemini RAG Chatbot')
-    st.write('This chatbot uses the Gemini Pro API with RAG capabilities.')
-    st.button('Clear Chat History', on_click=clear_chat_history, type='primary')
-    uploaded_file = st.file_uploader("Upload a PDF file", type=["pdf"], help="Upload your PDF file here to start the analysis.")
-    if uploaded_file is not None:
-        st.success("PDF File Uploaded Successfully!")
-        text = extract_text_from_pdf(uploaded_file.read())
-        vectorstore = initialize_vector_index(text)
-        st.session_state.vectorstore = vectorstore
-# Main interface
-st.header('Gemini Pro RAG Chatbot')
-st.subheader('Upload a PDF and ask questions about its content!')
-# Display the welcome prompt if chat history is only the initial prompt
-if len(st.session_state.chat_history) == 1:
-    with st.chat_message(SPEAKER_TYPES["BOT"], avatar="🔍"):
-        st.write(initial_prompt['content'])
-# Get user input
-prompt = st.chat_input("Ask a question about the PDF content:", key="user_input")
-# Function to get a response from RAG chain
-def get_rag_response(prompt):
-    retriever = st.session_state.vectorstore.as_retriever()  # Use the stored vectorstore retriever
-    rag_chain = (
-        {"context": retriever | format_docs, "question": RunnablePassthrough()}
-        | prompt
-        | llm
-        | StrOutputParser()
-    )
-    response = rag_chain.invoke(prompt)
-    return response
-# Handle the user prompt and generate response
-if prompt:
-    # Add user prompt to chat history
-    st.session_state.chat_history.append({'role': SPEAKER_TYPES["USER"], 'content': prompt})
-    # Display chat messages from the chat history
-    for message in st.session_state.chat_history[1:]:
-        with st.chat_message(message["role"], avatar="👤" if message['role'] == SPEAKER_TYPES["USER"] else "🔍"):
-            st.write(message["content"])
-    # Get the response using the RAG chain
-    with st.spinner(text='Generating response...'):
-        response_text = get_rag_response(prompt)
-        st.session_state.chat_history.append({'role': SPEAKER_TYPES["BOT"], 'content': response_text})
-    # Display the bot response
-    with st.chat_message(SPEAKER_TYPES["BOT"], avatar="🔍"):
-        st.write(response_text)
-# Add footer for additional information or credits
-st.markdown("""
-<hr>
-<div style="text-align: center;">
-    <small>Powered by Gemini Pro API | Developed by Christian Thomas BADOLO</small>
-</div>
-""", unsafe_allow_html=True)

 import streamlit as st
 from langchain.document_loaders import PyPDFDirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
 from langchain.embeddings import SentenceTransformerEmbeddings
+from langchain import hub
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables import RunnablePassthrough
+from langchain_google_genai import ChatGoogleGenerativeAI
+import os
+# Set up the directories for data and vector DB
+DATA_DIR = "/content/MyData"
+DB_DIR = "/content/VectorDB"
+# Create directories if they don't exist
+os.makedirs(DATA_DIR, exist_ok=True)
+os.makedirs(DB_DIR, exist_ok=True)
+# Initialize the embeddings model
 embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
+# Load and process PDF documents
+def load_data():
+    loader = PyPDFDirectoryLoader(DATA_DIR)
+    data_on_pdf = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(
+        separators=["\n\n", "\n", ". ", " ", ""],
+        chunk_size=1000,
+        chunk_overlap=200
+    )
+    splits = text_splitter.split_documents(data_on_pdf)
+    vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=DB_DIR)
+    return vectorstore
+# Set up the generative AI model
+llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="YOUR_GOOGLE_API_KEY")
+# Load vector store
+vectorstore = load_data()
+# Streamlit interface
+st.title("RAG App: Question-Answering with PDFs")
+# File uploader for PDF documents
+uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
+if uploaded_files:
+    for uploaded_file in uploaded_files:
+        with open(os.path.join(DATA_DIR, uploaded_file.name), "wb") as f:
+            f.write(uploaded_file.getbuffer())
+    st.success("PDF files uploaded successfully!")
+    # Reload vector store after uploading new files
+    vectorstore = load_data()
+# User input for question
+question = st.text_input("Ask a question about the documents:")
+if st.button("Submit"):
+    if question:
+        retriever = vectorstore.as_retriever()
+        prompt = hub.pull("rlm/rag-prompt")
+        def format_docs(docs):
+            return "\n\n".join(doc.page_content for doc in docs)
+        rag_chain = (
+            {"context": retriever | format_docs, "question": RunnablePassthrough()}
+            | prompt
+            | llm
+            | StrOutputParser()
+        )
+        response = rag_chain.invoke(question)
+        st.markdown(response)
+    else:
+        st.warning("Please enter a question.")