DexterSptizu commited on
Commit
b77cac2
1 Parent(s): 77ea126

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -61
app.py CHANGED
@@ -1,23 +1,19 @@
1
  import gradio as gr
2
- from langchain.vectorstores import Chroma
3
- from langchain.embeddings.openai import OpenAIEmbeddings
4
- from langchain.schema import Document
5
- from langchain.chat_models import ChatOpenAI
6
- from langchain.prompts.chat import ChatPromptTemplate
 
7
  from PyPDF2 import PdfReader
8
  import os
9
 
10
  # Function to process the uploaded PDF and convert it to documents
11
  def pdf_to_documents(pdf_file):
12
- try:
13
- reader = PdfReader(pdf_file.name)
14
- pages = [page.extract_text().strip() for page in reader.pages if page.extract_text()]
15
- documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
16
- if not documents:
17
- raise ValueError("The uploaded PDF is empty or could not be processed.")
18
- return documents
19
- except Exception as e:
20
- raise ValueError(f"Failed to process the PDF: {str(e)}")
21
 
22
  # Initialize vector store
23
  def initialize_vectorstore(documents, api_key):
@@ -26,64 +22,50 @@ def initialize_vectorstore(documents, api_key):
26
  vectorstore = Chroma.from_documents(documents, embedding=embeddings)
27
  return vectorstore
28
 
29
- # RAG retrieval and LLM chain for FAQ Bot
30
  def rag_from_pdf(question, pdf_file, api_key):
31
- if not question.strip():
32
- return "Please enter a question."
33
- if not pdf_file:
34
- return "Please upload a valid PDF file."
35
- if not api_key.strip():
36
- return "Please enter your OpenAI API key."
 
 
 
 
 
37
 
38
- try:
39
- # Process the PDF into documents
40
- documents = pdf_to_documents(pdf_file)
41
-
42
- # Initialize vectorstore
43
- vectorstore = initialize_vectorstore(documents, api_key)
44
- retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})
45
-
46
- # Initialize the LLM
47
- llm = ChatOpenAI(model="gpt-3.5-turbo")
48
-
49
- # Create a prompt template for combining context and question
50
- prompt_template = """
51
- You are a helpful assistant answering questions based on the provided PDF document.
52
- Only use the given context to answer the question.
53
- Question: {question}
54
- Context: {context}
55
- """
56
- prompt = ChatPromptTemplate.from_template(prompt_template)
57
-
58
- # Retrieve relevant documents
59
- retrieved_docs = retriever.get_relevant_documents(question)
60
- context = "\n".join([doc.page_content for doc in retrieved_docs])
61
-
62
- # Generate response using the LLM
63
- if not context.strip():
64
- return "No relevant information found in the document to answer the question."
65
-
66
- formatted_prompt = prompt.format(question=question, context=context)
67
- response = llm(completion=formatted_prompt)
68
- return response.strip()
69
- except Exception as e:
70
- return f"An error occurred: {str(e)}"
71
 
72
  # Gradio interface
73
  with gr.Blocks() as app:
74
- gr.Markdown("## Smart FAQ Bot - Ask Questions from Your PDF File")
75
-
76
  # Input for OpenAI API Key
77
- api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password", placeholder="sk-...")
78
 
79
  # File upload for the PDF
80
- pdf_file_input = gr.File(label="Upload your PDF document", file_types=[".pdf"])
81
 
82
  # Question input
83
- question_input = gr.Textbox(label="Ask a question related to the PDF", placeholder="Type your question here...")
84
 
85
  # Output for the RAG response
86
- rag_output = gr.Textbox(label="Generated Answer", lines=10, placeholder="Your answer will appear here...")
87
 
88
  # Button to run RAG chain
89
  rag_button = gr.Button("Ask Question")
@@ -92,4 +74,4 @@ with gr.Blocks() as app:
92
  rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
93
 
94
  # Launch Gradio app
95
- app.launch()
 
1
  import gradio as gr
2
+ from langchain_chroma import Chroma
3
+ from langchain_openai import OpenAIEmbeddings
4
+ from langchain_core.documents import Document
5
+ from langchain_openai import ChatOpenAI
6
+ from langchain_core.prompts import ChatPromptTemplate
7
+ from langchain_core.runnables import RunnablePassthrough
8
  from PyPDF2 import PdfReader
9
  import os
10
 
11
  # Function to process the uploaded PDF and convert it to documents
12
  def pdf_to_documents(pdf_file):
13
+ reader = PdfReader(pdf_file.name)
14
+ pages = [page.extract_text() for page in reader.pages]
15
+ documents = [Document(page_content=page, metadata={"page_number": idx + 1}) for idx, page in enumerate(pages)]
16
+ return documents
 
 
 
 
 
17
 
18
  # Initialize vector store
19
  def initialize_vectorstore(documents, api_key):
 
22
  vectorstore = Chroma.from_documents(documents, embedding=embeddings)
23
  return vectorstore
24
 
25
+ # RAG retrieval and LLM chain
26
  def rag_from_pdf(question, pdf_file, api_key):
27
+ documents = pdf_to_documents(pdf_file)
28
+ vectorstore = initialize_vectorstore(documents, api_key)
29
+
30
+ retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 2}) # Retrieve top 2 relevant sections
31
+
32
+ # Initialize the LLM
33
+ llm = ChatOpenAI(model="gpt-3.5-turbo")
34
+
35
+ # Create a prompt template for combining context and question
36
+ prompt_template = """
37
+ Answer this question using the provided context only.
38
 
39
+ {question}
40
+
41
+ Context:
42
+ {context}
43
+ """
44
+
45
+ prompt = ChatPromptTemplate.from_messages([("human", prompt_template)])
46
+
47
+ # Create a RAG chain combining retriever and LLM
48
+ rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm
49
+
50
+ # Perform retrieval and return LLM's answer
51
+ response = rag_chain.invoke(question)
52
+ return response.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
  # Gradio interface
55
  with gr.Blocks() as app:
56
+ gr.Markdown("## PDF-based Question Answering with RAG")
57
+
58
  # Input for OpenAI API Key
59
+ api_key_input = gr.Textbox(label="Enter your OpenAI API Key", type="password")
60
 
61
  # File upload for the PDF
62
+ pdf_file_input = gr.File(label="Upload your PDF document")
63
 
64
  # Question input
65
+ question_input = gr.Textbox(label="Ask a question related to the PDF")
66
 
67
  # Output for the RAG response
68
+ rag_output = gr.Textbox(label="Generated Response", lines=10)
69
 
70
  # Button to run RAG chain
71
  rag_button = gr.Button("Ask Question")
 
74
  rag_button.click(rag_from_pdf, inputs=[question_input, pdf_file_input, api_key_input], outputs=rag_output)
75
 
76
  # Launch Gradio app
77
+ app.launch()