add flow to app
Browse files
app.py
CHANGED
@@ -1,15 +1,52 @@
|
|
1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
-
def process_inputs(api_key, pdf_file, questions):
|
4 |
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
questions_list = questions.strip().split('\n')
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
return response
|
14 |
|
15 |
with gr.Blocks() as demo:
|
|
|
1 |
import gradio as gr
|
2 |
+
import os
|
3 |
+
from utils.document_parsing import DocParsing
|
4 |
+
from utils.retrieval import Retrieval
|
5 |
+
from utils.llm_generation import LLMGeneration
|
6 |
+
import json
|
7 |
|
|
|
8 |
|
9 |
+
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
|
10 |
+
# Setting up Retriever
|
11 |
+
retriever = Retrieval(model_name=embedding_model_name)
|
12 |
+
|
13 |
+
|
14 |
+
llm_model_name = "gpt-4o-mini"
|
15 |
+
# Settting up LLMGenerator
|
16 |
+
llm_generator = LLMGeneration(llm_model_name=llm_model_name)
|
17 |
+
|
18 |
+
def set_api_key(api_key):
|
19 |
+
os.environ['OPENAI_API_KEY'] = api_key
|
20 |
+
|
21 |
+
def process_inputs(api_key:str, pdf_file, questions: str):
|
22 |
+
|
23 |
+
# Setup Api KEY
|
24 |
+
set_api_key(api_key)
|
25 |
+
|
26 |
+
if pdf_file is None:
|
27 |
+
raise Exception("Blaf")
|
28 |
+
|
29 |
+
|
30 |
+
# Parsing the pdf
|
31 |
+
doc_handler = DocParsing(file_path=pdf_file.name,model_name=embedding_model_name)
|
32 |
+
docs = doc_handler.process_pdf()
|
33 |
+
|
34 |
+
# Create vector store
|
35 |
+
retriever.create_vector_store(chunks=docs)
|
36 |
+
|
37 |
+
|
38 |
+
output_dict = {}
|
39 |
questions_list = questions.strip().split('\n')
|
40 |
+
for question in questions_list:
|
41 |
+
|
42 |
+
# Retrieve top similar chunks
|
43 |
+
similar_chunks = retriever.search(query=question, k=10)
|
44 |
+
|
45 |
+
# Generate the answer
|
46 |
+
output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
|
47 |
+
|
48 |
+
|
49 |
+
response = json.dumps(output_dict)
|
50 |
return response
|
51 |
|
52 |
with gr.Blocks() as demo:
|