samarthagarwal23 commited on
Commit
d7f0548
1 Parent(s): b37de3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -7
app.py CHANGED
@@ -17,11 +17,6 @@ overlap = 15
17
  param_top_k_retriver = 15
18
  param_top_k_ranker = 3
19
 
20
- qa_model = pipeline("question-answering",
21
- #model = "deepset/minilm-uncased-squad2")
22
- model = "deepset/roberta-base-squad2")
23
-
24
-
25
  def read_pdf(file):
26
  text = extract_text(file.name)
27
  # Split text into smaller docs
@@ -80,7 +75,7 @@ def print_colored(text, start_idx, end_idx, confidence):
80
  cstr_break(conf_str, color='grey')]), color='black')
81
  return a
82
 
83
- def final_qa_pipeline(file, query):
84
  docs = read_pdf(file)
85
  tokenized_corpus = []
86
  for doc in docs:
@@ -91,6 +86,10 @@ def final_qa_pipeline(file, query):
91
  top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
92
  lvl1 = retrieval(query, top_k_retriver, docs, bm25)
93
 
 
 
 
 
94
  if len(lvl1) > 0:
95
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
96
  top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
@@ -111,11 +110,12 @@ examples = [
111
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
112
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
113
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
 
114
  ]
115
 
116
  iface = gr.Interface(
117
  fn = final_qa_pipeline,
118
- inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
119
  outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
120
  examples=examples,
121
  theme = "grass",
 
17
  param_top_k_retriver = 15
18
  param_top_k_ranker = 3
19
 
 
 
 
 
 
20
  def read_pdf(file):
21
  text = extract_text(file.name)
22
  # Split text into smaller docs
 
75
  cstr_break(conf_str, color='grey')]), color='black')
76
  return a
77
 
78
+ def final_qa_pipeline(file, query, model_nm):
79
  docs = read_pdf(file)
80
  tokenized_corpus = []
81
  for doc in docs:
 
86
  top_k_retriver, top_k_ranker = param_top_k_retriver, param_top_k_ranker
87
  lvl1 = retrieval(query, top_k_retriver, docs, bm25)
88
 
89
+ qa_model = pipeline("question-answering",
90
+ #model = "deepset/minilm-uncased-squad2")
91
+ model = "deepset/"+model_nm)
92
+
93
  if len(lvl1) > 0:
94
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
95
  top1 = print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end'], str(np.round(100*fnl_rank[0]["score"],1))+"%")
 
110
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "how much are the outstanding shares ?"],
111
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "what is competitors strategy ?"],
112
  [os.path.abspath("NASDAQ_AAPL_2020.pdf"), "who is the chief executive officer ?"],
113
+ [os.path.abspath("NASDAQ_MSFT_2020.pdf"), "How much is the guided revenue for next quarter?"],
114
  ]
115
 
116
  iface = gr.Interface(
117
  fn = final_qa_pipeline,
118
+ inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:"), gr.inputs.Dropdown(["minilm-uncased-squad2","roberta-base-squad2"], "minilm-uncased-squad2", label="Model")],
119
  outputs = [gr.outputs.HTML(label="Top 1 answer"), gr.outputs.HTML(label="Top 2 answer")],
120
  examples=examples,
121
  theme = "grass",