DexterSptizu commited on
Commit
77ea126
1 Parent(s): 9105364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -42
app.py CHANGED
@@ -1,19 +1,23 @@
1
  import gradio as gr
2
- from langchain_chroma import Chroma
3
- from langchain_openai import OpenAIEmbeddings
4
- from langchain_core.documents import Document
5
- from langchain_openai import ChatOpenAI
6
- from langchain_core.prompts import ChatPromptTemplate
7
- from langchain_core.runnables import RunnablePassthrough
8
  from PyPDF2 import PdfReader
9
  import os
10
 
11
  # Function to process the uploaded PDF and convert it to documents
12
  def pdf_to_documents(pdf_file):
13
- reader = PdfReader(pdf_file.name)
14
- pages = [page.extract_text() for page in reader.pages]
15
- documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
16
- return documents
 
 
 
 
 
17
 
18
  # Initialize vector store
19
  def initialize_vectorstore(documents, api_key):
@@ -22,50 +26,64 @@ def initialize_vectorstore(documents, api_key):
22
  vectorstore = Chroma.from_documents(documents, embedding=embeddings)
23
  return vectorstore
24
 
25
- # RAG retrieval and LLM chain
26
  def rag_from_pdf(question, pdf_file, api_key):
27
- documents = pdf_to_documents(pdf_file)
28
- vectorstore = initialize_vectorstore(documents, api_key)
29
-
30
- retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections
31
-
32
- # Initialize the LLM
33
- llm = ChatOpenAI(model="gpt-3.5-turbo")
34
-
35
- # Create a prompt template for combining context and question
36
- prompt_template = """
37
- Answer this question using the provided context only.
38
 
39
- {question}
40
-
41
- Context:
42
- {context}
43
- """
44
-
45
- prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
46
-
47
- # Create a RAG chain combining retriever and LLM
48
- rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
49
-
50
- # Perform retrieval and return LLM's answer
51
- response = rag_chain.invoke(question)
52
- return response.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Gradio interface
55
  with gr.Blocks() as app:
56
- gr.Markdown("## PDF-based Question Answering with RAG")
57
-
58
  # Input for OpenAI API Key
59
- api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
60
 
61
  # File upload for the PDF
62
- pdf_file_input = gr.File(label="Upload your PDF document")
63
 
64
  # Question input
65
- question_input = gr.Textbox(label="Ask a question related to the PDF")
66
 
67
  # Output for the RAG response
68
- rag_output = gr.Textbox(label="Generated Response", lines=10)
69
 
70
  # Button to run RAG chain
71
  rag_button = gr.Button("Ask Question")
 
1
  import gradio as gr
2
+ from langchain.vectorstores import Chroma
3
+ from langchain.embeddings.openai import OpenAIEmbeddings
4
+ from langchain.schema import Document
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.prompts.chat import ChatPromptTemplate
 
7
  from PyPDF2 import PdfReader
8
  import os
9
 
10
  # Function to process the uploaded PDF and convert it to documents
11
  def pdf_to_documents(pdf_file):
12
+ try:
13
+ reader = PdfReader(pdf_file.name)
14
+ pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
15
+ documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
16
+ if not documents:
17
+ raise ValueError("The uploaded PDF is empty or could not be processed.")
18
+ return documents
19
+ except Exception as e:
20
+ raise ValueError(f"Failed to process the PDF: {str(e)}")
21
 
22
  # Initialize vector store
23
  def initialize_vectorstore(documents, api_key):
 
26
  vectorstore = Chroma.from_documents(documents, embedding=embeddings)
27
  return vectorstore
28
 
29
+ # RAG retrieval and LLM chain for FAQ Bot
30
  def rag_from_pdf(question, pdf_file, api_key):
31
+ if not question.strip():
32
+ return "Please enter a question."
33
+ if not pdf_file:
34
+ return "Please upload a valid PDF file."
35
+ if not api_key.strip():
36
+ return "Please enter your OpenAI API key."
 
 
 
 
 
37
 
38
+ try:
39
+ # Process the PDF into documents
40
+ documents = pdf_to_documents(pdf_file)
41
+
42
+ # Initialize vectorstore
43
+ vectorstore = initialize_vectorstore(documents, api_key)
44
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
45
+
46
+ # Initialize the LLM
47
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
48
+
49
+ # Create a prompt template for combining context and question
50
+ prompt_template = """
51
+ You are a helpful assistant answering questions based on the provided PDF document.
52
+ Only use the given context to answer the question.
53
+ Question: {question}
54
+ Context: {context}
55
+ """
56
+ prompt = ChatPromptTemplate.from_template(prompt_template)
57
+
58
+ # Retrieve relevant documents
59
+ retrieved_docs = retriever.get_relevant_documents(question)
60
+ context = "\n".join([doc.page_content for doc in retrieved_docs])
61
+
62
+ # Generate response using the LLM
63
+ if not context.strip():
64
+ return "No relevant information found in the document to answer the question."
65
+
66
+ formatted_prompt = prompt.format(question=question, context=context)
67
+ response = llm(completion=formatted_prompt)
68
+ return response.strip()
69
+ except Exception as e:
70
+ return f"An error occurred: {str(e)}"
71
 
72
  # Gradio interface
73
  with gr.Blocks() as app:
74
+ gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")
75
+
76
  # Input for OpenAI API Key
77
+ api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")
78
 
79
  # File upload for the PDF
80
+ pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])
81
 
82
  # Question input
83
+ question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")
84
 
85
  # Output for the RAG response
86
+ rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")
87
 
88
  # Button to run RAG chain
89
  rag_button = gr.Button("Ask Question")