|
import gradio as gr |
|
import os |
|
from utils.document_parsing import DocParsing |
|
from utils.retrieval import Retrieval |
|
from utils.llm_generation import LLMGeneration |
|
import json |
|
|
|
|
|
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2" |
|
|
|
retriever = Retrieval(model_name=embedding_model_name) |
|
|
|
|
|
llm_model_name = "gpt-4o-mini" |
|
|
|
llm_generator = LLMGeneration(llm_model_name=llm_model_name) |
|
|
|
def set_api_key(api_key): |
|
os.environ['OPENAI_API_KEY'] = api_key |
|
|
|
def process_inputs(api_key:str, pdf_file, questions: str): |
|
|
|
|
|
set_api_key(api_key) |
|
|
|
if pdf_file is None: |
|
raise Exception("Blaf") |
|
|
|
|
|
|
|
doc_handler = DocParsing(file_path=pdf_file.name,model_name=embedding_model_name) |
|
docs = doc_handler.process_pdf() |
|
|
|
|
|
retriever.create_vector_store(chunks=docs) |
|
|
|
|
|
output_dict = {} |
|
questions_list = questions.strip().split('\n') |
|
for question in questions_list: |
|
|
|
|
|
similar_chunks = retriever.search(query=question, k=10) |
|
|
|
|
|
output_dict[question] = llm_generator.generate_answer(question, similar_chunks) |
|
|
|
|
|
response = json.dumps(output_dict) |
|
return response |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# AskMYPDF Q&A App") |
|
gr.Markdown("Enter your OPENAI API key, upload a PDF, and list your questions below.") |
|
|
|
api_key_input = gr.Textbox(label="API Key", type="password") |
|
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) |
|
questions_input = gr.Textbox(label="List of Questions (one per line)", lines=5, placeholder="Question 1\nQuestion 2\n...") |
|
|
|
submit_button = gr.Button("Submit") |
|
output = gr.Textbox(label="Output") |
|
|
|
submit_button.click( |
|
fn=process_inputs, |
|
inputs=[api_key_input, pdf_input, questions_input], |
|
outputs=output |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |