Spaces:
Runtime error
Runtime error
samarthagarwal23
commited on
Commit
•
d7f0548
1
Parent(s):
b37de3c
Update app.py
Browse files
app.py
CHANGED
@@ -17,11 +17,6 @@ overlap = 15
|
|
17 |
param_top_k_retriver = 15
|
18 |
param_top_k_ranker = 3
|
19 |
|
20 |
-
qa_model = pipeline("question-answering",
|
21 |
-
#model = "deepset/minilm-uncased-squad2")
|
22 |
-
model = "deepset/roberta-base-squad2")
|
23 |
-
|
24 |
-
|
25 |
def read_pdf(file):
|
26 |
text = extract_text(file.name)
|
27 |
# Split text into smaller docs
|
@@ -80,7 +75,7 @@ def print_colored(text, start_idx, end_idx, confidence):
|
|
80 |
cstr_break(conf_str, color='grey')]), color='black')
|
81 |
return a
|
82 |
|
83 |
-
def final_qa_pipeline(file, query):
|
84 |
docs = read_pdf(file)
|
85 |
tokenized_corpus = []
|
86 |
for doc in docs:
|
@@ -91,6 +86,10 @@ def final_qa_pipeline(file, query):
|
|
91 |
top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
|
92 |
lvl1 = retrieval(query, top_k_retriver, docs, bm25)
|
93 |
|
|
|
|
|
|
|
|
|
94 |
if len(lvl1) > 0:
|
95 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
96 |
top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
|
@@ -111,11 +110,12 @@ examples = [
|
|
111 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
|
112 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
|
113 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
|
|
|
114 |
]
|
115 |
|
116 |
iface = gr.Interface(
|
117 |
fn = final_qa_pipeline,
|
118 |
-
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
|
119 |
outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
|
120 |
examples=examples,
|
121 |
theme = "grass",
|
|
|
17 |
param_top_k_retriver = 15
|
18 |
param_top_k_ranker = 3
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
def read_pdf(file):
|
21 |
text = extract_text(file.name)
|
22 |
# Split text into smaller docs
|
|
|
75 |
cstr_break(conf_str, color='grey')]), color='black')
|
76 |
return a
|
77 |
|
78 |
+
def final_qa_pipeline(file, query, model_nm):
|
79 |
docs = read_pdf(file)
|
80 |
tokenized_corpus = []
|
81 |
for doc in docs:
|
|
|
86 |
top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
|
87 |
lvl1 = retrieval(query, top_k_retriver, docs, bm25)
|
88 |
|
89 |
+
qa_model = pipeline("question-answering",
|
90 |
+
#model = "deepset/minilm-uncased-squad2")
|
91 |
+
model = "deepset/"+model_nm)
|
92 |
+
|
93 |
if len(lvl1) > 0:
|
94 |
fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
|
95 |
top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
|
|
|
110 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
|
111 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
|
112 |
[os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
|
113 |
+
[os.path.abspath("NASDAQ_MSFT_2020.pdf"), "How much is the guided revenue for next quarter?"],
|
114 |
]
|
115 |
|
116 |
iface = gr.Interface(
|
117 |
fn = final_qa_pipeline,
|
118 |
+
inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:"), gr.inputs.Dropdown(["minilm-uncased-squad2","roberta-base-squad2"], "minilm-uncased-squad2", label="Model")],
|
119 |
outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
|
120 |
examples=examples,
|
121 |
theme = "grass",
|