File size: 3,173 Bytes
f31b8a3 37123e5 f31b8a3 37123e5 72390f6 37123e5 69992ee 24412da 72390f6 69992ee 72390f6 37123e5 69992ee 24412da 37123e5 24412da 37123e5 72390f6 69992ee 37123e5 69992ee 37123e5 72390f6 37123e5 72390f6 37123e5 69992ee 37123e5 69992ee 37123e5 69992ee 6dee266 f31b8a3 69992ee f31b8a3 69992ee f31b8a3 69992ee f31b8a3 69992ee f31b8a3 69992ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import gradio as gr
import os
from utils.document_parsing import DocParsing
from utils.retrieval import Retrieval
from utils.llm_generation import LLMGeneration
import json
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
# Setting up Retriever
retriever = Retrieval(model_name=embedding_model_name)
llm_model_name = "gpt-4o-mini"
# Settting up LLMGenerator
llm_generator = None
def set_api_key(api_key: str) -> None:
"""
Sets the OpenAI API key as an environment variable.
Parameters:
api_key (str): The OpenAI API key to be set.
Returns:
None: This function does not return any value.
Raises:
gr.Error: If the provided API key is empty or consists only of whitespace characters.
"""
if api_key.strip():
os.environ["OPENAI_API_KEY"] = api_key
else:
raise gr.Error("Please provide a valid API key")
def process_inputs(api_key: str, pdf_file, questions: str) -> str:
"""
This function processes the inputs, sets up the API key, validates the PDF file, parses the PDF,
creates a vector store, generates an LLM generator, validates the questions, retrieves top similar chunks,
generates answers, and returns the output in JSON format.
Parameters:
api_key (str): The OpenAI API key for accessing the LLM model.
pdf_file (File): The uploaded PDF file.
questions (str): The list of questions, one per line.
Returns:
str: The output in JSON format containing the answers to the questions.
"""
# Setup Api KEY
set_api_key(api_key)
if pdf_file is None:
raise gr.Error("Please upload a pdf file")
# Parsing the pdf
doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name)
docs = doc_handler.process_pdf()
# Create vector store
retriever.create_vector_store(chunks=docs)
# LLM Generator
llm_generator = LLMGeneration(llm_model_name=llm_model_name)
if not questions.strip():
raise gr.Error("Please provide valid set of questions")
output_dict = {}
questions_list = questions.strip().split("\n")
for question in questions_list:
# Retrieve top similar chunks
similar_chunks = retriever.search(query=question, k=10)
# Generate the answer
output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
response = json.dumps(output_dict, indent=4)
return response
with gr.Blocks() as demo:
gr.Markdown("# AskMYPDF Q&A App")
gr.Markdown(
"Enter your OPENAI API key, upload a PDF, and list your questions below."
)
api_key_input = gr.Textbox(label="API Key", type="password")
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
questions_input = gr.Textbox(
label="List of Questions (one per line)",
lines=5,
placeholder="Question 1\nQuestion 2\n...",
)
submit_button = gr.Button("Submit")
output = gr.Textbox(label="Output")
submit_button.click(
fn=process_inputs,
inputs=[api_key_input, pdf_input, questions_input],
outputs=output,
)
if __name__ == "__main__":
demo.launch()
|