Spaces:

agoyal496
/

AskMyPDF

Sleeping

File size: 3,173 Bytes

import gradio as gr
import os
from utils.document_parsing import DocParsing
from utils.retrieval import Retrieval
from utils.llm_generation import LLMGeneration
import json


embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
#  Setting up Retriever
retriever = Retrieval(model_name=embedding_model_name)


llm_model_name = "gpt-4o-mini"
# Settting up LLMGenerator
llm_generator = None


def set_api_key(api_key: str) -> None:
    """
    Sets the OpenAI API key as an environment variable.

    Parameters:
    api_key (str): The OpenAI API key to be set.

    Returns:
    None: This function does not return any value.

    Raises:
    gr.Error: If the provided API key is empty or consists only of whitespace characters.
    """
    if api_key.strip():
        os.environ["OPENAI_API_KEY"] = api_key
    else:
        raise gr.Error("Please provide a valid API key")


def process_inputs(api_key: str, pdf_file, questions: str) -> str:
    """
    This function processes the inputs, sets up the API key, validates the PDF file, parses the PDF,
    creates a vector store, generates an LLM generator, validates the questions, retrieves top similar chunks,
    generates answers, and returns the output in JSON format.

    Parameters:
    api_key (str): The OpenAI API key for accessing the LLM model.
    pdf_file (File): The uploaded PDF file.
    questions (str): The list of questions, one per line.

    Returns:
    str: The output in JSON format containing the answers to the questions.
    """
    # Setup Api KEY
    set_api_key(api_key)

    if pdf_file is None:
        raise gr.Error("Please upload a pdf file")

    # Parsing the pdf
    doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name)
    docs = doc_handler.process_pdf()

    # Create vector store
    retriever.create_vector_store(chunks=docs)

    # LLM Generator
    llm_generator = LLMGeneration(llm_model_name=llm_model_name)

    if not questions.strip():
        raise gr.Error("Please provide valid set of questions")
    output_dict = {}
    questions_list = questions.strip().split("\n")
    for question in questions_list:

        # Retrieve top similar chunks
        similar_chunks = retriever.search(query=question, k=10)

        # Generate the answer
        output_dict[question] = llm_generator.generate_answer(question, similar_chunks)

    response = json.dumps(output_dict, indent=4)
    return response


with gr.Blocks() as demo:
    gr.Markdown("# AskMYPDF Q&A App")
    gr.Markdown(
        "Enter your OPENAI API key, upload a PDF, and list your questions below."
    )

    api_key_input = gr.Textbox(label="API Key", type="password")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    questions_input = gr.Textbox(
        label="List of Questions (one per line)",
        lines=5,
        placeholder="Question 1\nQuestion 2\n...",
    )

    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Output")

    submit_button.click(
        fn=process_inputs,
        inputs=[api_key_input, pdf_input, questions_input],
        outputs=output,
    )

if __name__ == "__main__":
    demo.launch()