import gradio as gr import os from utils.document_parsing import DocParsing from utils.retrieval import Retrieval from utils.llm_generation import LLMGeneration import json embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2" # Setting up Retriever retriever = Retrieval(model_name=embedding_model_name) llm_model_name = "gpt-4o-mini" # Settting up LLMGenerator llm_generator = None def set_api_key(api_key: str) -> None: """ Sets the OpenAI API key as an environment variable. Parameters: api_key (str): The OpenAI API key to be set. Returns: None: This function does not return any value. Raises: gr.Error: If the provided API key is empty or consists only of whitespace characters. """ if api_key.strip(): os.environ["OPENAI_API_KEY"] = api_key else: raise gr.Error("Please provide a valid API key") def process_inputs(api_key: str, pdf_file, questions: str) -> str: """ This function processes the inputs, sets up the API key, validates the PDF file, parses the PDF, creates a vector store, generates an LLM generator, validates the questions, retrieves top similar chunks, generates answers, and returns the output in JSON format. Parameters: api_key (str): The OpenAI API key for accessing the LLM model. pdf_file (File): The uploaded PDF file. questions (str): The list of questions, one per line. Returns: str: The output in JSON format containing the answers to the questions. """ # Setup Api KEY set_api_key(api_key) if pdf_file is None: raise gr.Error("Please upload a pdf file") # Parsing the pdf doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name) docs = doc_handler.process_pdf() # Create vector store retriever.create_vector_store(chunks=docs) # LLM Generator llm_generator = LLMGeneration(llm_model_name=llm_model_name) if not questions.strip(): raise gr.Error("Please provide valid set of questions") output_dict = {} questions_list = questions.strip().split("\n") for question in questions_list: # Retrieve top similar chunks similar_chunks = retriever.search(query=question, k=10) # Generate the answer output_dict[question] = llm_generator.generate_answer(question, similar_chunks) response = json.dumps(output_dict, indent=4) return response with gr.Blocks() as demo: gr.Markdown("# AskMYPDF Q&A App") gr.Markdown( "Enter your OPENAI API key, upload a PDF, and list your questions below." ) api_key_input = gr.Textbox(label="API Key", type="password") pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) questions_input = gr.Textbox( label="List of Questions (one per line)", lines=5, placeholder="Question 1\nQuestion 2\n...", ) submit_button = gr.Button("Submit") output = gr.Textbox(label="Output") submit_button.click( fn=process_inputs, inputs=[api_key_input, pdf_input, questions_input], outputs=output, ) if __name__ == "__main__": demo.launch()