agoyal496 commited on
Commit
37123e5
·
1 Parent(s): b7b9521

add flow to app

Browse files
Files changed (1) hide show
  1. app.py +45 -8
app.py CHANGED
@@ -1,15 +1,52 @@
1
  import gradio as gr
 
 
 
 
 
2
 
3
- def process_inputs(api_key, pdf_file, questions):
4
 
5
- # In this placeholder, we'll simply echo the inputs.
6
- if pdf_file is not None:
7
- pdf_name = pdf_file.name
8
- else:
9
- pdf_name = "No file uploaded"
10
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  questions_list = questions.strip().split('\n')
12
- response = f"API Key: {api_key}\nUploaded PDF: {pdf_name}\nQuestions: {questions_list}"
 
 
 
 
 
 
 
 
 
13
  return response
14
 
15
  with gr.Blocks() as demo:
 
1
  import gradio as gr
2
+ import os
3
+ from utils.document_parsing import DocParsing
4
+ from utils.retrieval import Retrieval
5
+ from utils.llm_generation import LLMGeneration
6
+ import json
7
 
 
8
 
9
+ embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
10
+ # Setting up Retriever
11
+ retriever = Retrieval(model_name=embedding_model_name)
12
+
13
+
14
+ llm_model_name = "gpt-4o-mini"
15
+ # Settting up LLMGenerator
16
+ llm_generator = LLMGeneration(llm_model_name=llm_model_name)
17
+
18
+ def set_api_key(api_key):
19
+ os.environ['OPENAI_API_KEY'] = api_key
20
+
21
+ def process_inputs(api_key:str, pdf_file, questions: str):
22
+
23
+ # Setup Api KEY
24
+ set_api_key(api_key)
25
+
26
+ if pdf_file is None:
27
+ raise Exception("Blaf")
28
+
29
+
30
+ # Parsing the pdf
31
+ doc_handler = DocParsing(file_path=pdf_file.name,model_name=embedding_model_name)
32
+ docs = doc_handler.process_pdf()
33
+
34
+ # Create vector store
35
+ retriever.create_vector_store(chunks=docs)
36
+
37
+
38
+ output_dict = {}
39
  questions_list = questions.strip().split('\n')
40
+ for question in questions_list:
41
+
42
+ # Retrieve top similar chunks
43
+ similar_chunks = retriever.search(query=question, k=10)
44
+
45
+ # Generate the answer
46
+ output_dict[question] = llm_generator.generate_answer(question, similar_chunks)
47
+
48
+
49
+ response = json.dumps(output_dict)
50
  return response
51
 
52
  with gr.Blocks() as demo: