File size: 3,173 Bytes
f31b8a3
37123e5
 
 
 
 
f31b8a3
 
37123e5
 
 
 
 
 
 
72390f6
37123e5
69992ee
24412da
 
 
 
 
 
 
 
 
 
 
 
 
72390f6
69992ee
72390f6
 
37123e5
69992ee
24412da
 
 
 
 
 
 
 
 
 
37123e5
24412da
 
 
37123e5
 
 
 
72390f6
69992ee
37123e5
69992ee
37123e5
 
 
 
 
72390f6
 
37123e5
72390f6
 
37123e5
69992ee
37123e5
69992ee
37123e5
 
 
 
 
69992ee
6dee266
f31b8a3
 
69992ee
f31b8a3
 
69992ee
 
 
 
f31b8a3
 
69992ee
 
 
 
 
f31b8a3
 
 
 
 
 
 
69992ee
f31b8a3
 
 
69992ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import os
from utils.document_parsing import DocParsing
from utils.retrieval import Retrieval
from utils.llm_generation import LLMGeneration
import json


embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
#  Setting up Retriever
retriever = Retrieval(model_name=embedding_model_name)


llm_model_name = "gpt-4o-mini"
# Settting up LLMGenerator
llm_generator = None


def set_api_key(api_key: str) -> None:
    """
    Sets the OpenAI API key as an environment variable.

    Parameters:
    api_key (str): The OpenAI API key to be set.

    Returns:
    None: This function does not return any value.

    Raises:
    gr.Error: If the provided API key is empty or consists only of whitespace characters.
    """
    if api_key.strip():
        os.environ["OPENAI_API_KEY"] = api_key
    else:
        raise gr.Error("Please provide a valid API key")


def process_inputs(api_key: str, pdf_file, questions: str) -> str:
    """
    This function processes the inputs, sets up the API key, validates the PDF file, parses the PDF,
    creates a vector store, generates an LLM generator, validates the questions, retrieves top similar chunks,
    generates answers, and returns the output in JSON format.

    Parameters:
    api_key (str): The OpenAI API key for accessing the LLM model.
    pdf_file (File): The uploaded PDF file.
    questions (str): The list of questions, one per line.

    Returns:
    str: The output in JSON format containing the answers to the questions.
    """
    # Setup Api KEY
    set_api_key(api_key)

    if pdf_file is None:
        raise gr.Error("Please upload a pdf file")

    # Parsing the pdf
    doc_handler = DocParsing(file_path=pdf_file.name, model_name=embedding_model_name)
    docs = doc_handler.process_pdf()

    # Create vector store
    retriever.create_vector_store(chunks=docs)

    # LLM Generator
    llm_generator = LLMGeneration(llm_model_name=llm_model_name)

    if not questions.strip():
        raise gr.Error("Please provide valid set of questions")
    output_dict = {}
    questions_list = questions.strip().split("\n")
    for question in questions_list:

        # Retrieve top similar chunks
        similar_chunks = retriever.search(query=question, k=10)

        # Generate the answer
        output_dict[question] = llm_generator.generate_answer(question, similar_chunks)

    response = json.dumps(output_dict, indent=4)
    return response


with gr.Blocks() as demo:
    gr.Markdown("# AskMYPDF Q&A App")
    gr.Markdown(
        "Enter your OPENAI API key, upload a PDF, and list your questions below."
    )

    api_key_input = gr.Textbox(label="API Key", type="password")
    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
    questions_input = gr.Textbox(
        label="List of Questions (one per line)",
        lines=5,
        placeholder="Question 1\nQuestion 2\n...",
    )

    submit_button = gr.Button("Submit")
    output = gr.Textbox(label="Output")

    submit_button.click(
        fn=process_inputs,
        inputs=[api_key_input, pdf_input, questions_input],
        outputs=output,
    )

if __name__ == "__main__":
    demo.launch()