amine-01's picture
Update app.py
d66126a verified
raw
history blame
2.62 kB
import streamlit as st
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain import hub
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import ChatGoogleGenerativeAI
import os
# Set up the directories for data and vector DB
DATA_DIR = "/content/MyData"
DB_DIR = "/content/VectorDB"
# Create directories if they don't exist
os.makedirs(DATA_DIR, exist_ok=True)
os.makedirs(DB_DIR, exist_ok=True)
# Initialize the embeddings model
embeddings_model = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# Load and process PDF documents
def load_data():
loader = PyPDFDirectoryLoader(DATA_DIR)
data_on_pdf = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
separators=["\n\n", "\n", ". ", " ", ""],
chunk_size=1000,
chunk_overlap=200
)
splits = text_splitter.split_documents(data_on_pdf)
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings_model, persist_directory=DB_DIR)
return vectorstore
# Set up the generative AI model
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key="YOUR_GOOGLE_API_KEY")
# Load vector store
vectorstore = load_data()
# Streamlit interface
st.title("RAG App: Question-Answering with PDFs")
# File uploader for PDF documents
uploaded_files = st.file_uploader("Upload PDF files", accept_multiple_files=True, type=["pdf"])
if uploaded_files:
for uploaded_file in uploaded_files:
with open(os.path.join(DATA_DIR, uploaded_file.name), "wb") as f:
f.write(uploaded_file.getbuffer())
st.success("PDF files uploaded successfully!")
# Reload vector store after uploading new files
vectorstore = load_data()
# User input for question
question = st.text_input("Ask a question about the documents:")
if st.button("Submit"):
if question:
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
response = rag_chain.invoke(question)
st.markdown(response)
else:
st.warning("Please enter a question.")