samarthagarwal23 commited on
Commit
0e90d70
1 Parent(s): de8c106

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import os
3
  import numpy as np
4
- os.system("pip install pdfminer.six rank_bm25 torch transformers")
5
 
6
  from gradio.mix import Series
7
  import re
@@ -11,7 +11,7 @@ import torch
11
  from transformers import pipeline
12
  import pdfminer
13
  from pdfminer.high_level import extract_text
14
- #from termcolor import colored
15
 
16
  def read_pdf(file):
17
  text = extract_text(file.name)
@@ -60,7 +60,13 @@ def qa_ranker(query, docs_, top_k_ranker):
60
  answer['doc'] = doc
61
  ans.append(answer)
62
  return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
63
-
 
 
 
 
 
 
64
  def final_qa_pipeline(file, query):
65
  docs = read_pdf(file)
66
  tokenized_corpus = []
@@ -74,7 +80,7 @@ def final_qa_pipeline(file, query):
74
 
75
  if len(lvl1) > 0:
76
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
77
- return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), fnl_rank[0]["doc"])
78
  #for fnl_ in fnl_rank:
79
  # print("\n")
80
  # print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
@@ -93,7 +99,7 @@ examples = [
93
  iface = gr.Interface(
94
  fn = final_qa_pipeline,
95
  inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
96
- outputs = [gr.outputs.HTML(label="Answer"), gr.outputs.HTML(label="Score"), gr.outputs.HTML(label="Reference text")],
97
  examples=examples,
98
  )
99
  iface.launch()
 
1
  import gradio as gr
2
  import os
3
  import numpy as np
4
+ os.system("pip install pdfminer.six rank_bm25 torch transformers termcolor")
5
 
6
  from gradio.mix import Series
7
  import re
 
11
  from transformers import pipeline
12
  import pdfminer
13
  from pdfminer.high_level import extract_text
14
+ from termcolor import colored
15
 
16
  def read_pdf(file):
17
  text = extract_text(file.name)
 
60
  answer['doc'] = doc
61
  ans.append(answer)
62
  return sorted(ans, key=lambda x: x['score'], reverse=True)[:top_k_ranker]
63
+
64
+ def print_colored(text, start_idx, end_idx):
65
+ a = colored(text[:start_idx]) + \
66
+ colored(text[start_idx:end_idx], 'red', 'on_yellow') + \
67
+ colored(text[end_idx:]))
68
+ return a
69
+
70
  def final_qa_pipeline(file, query):
71
  docs = read_pdf(file)
72
  tokenized_corpus = []
 
80
 
81
  if len(lvl1) > 0:
82
  fnl_rank = qa_ranker(query, [l["docs"] for l in lvl1], top_k_ranker)
83
+ return (fnl_rank[0]["answer"], np.round(fnl_rank[0]["score"],3), print_colored(fnl_rank[0]['doc'], fnl_rank[0]['start'], fnl_rank[0]['end']))
84
  #for fnl_ in fnl_rank:
85
  # print("\n")
86
  # print_colored(fnl_['doc'], fnl_['start'], fnl_['end'])
 
99
  iface = gr.Interface(
100
  fn = final_qa_pipeline,
101
  inputs = [gr.inputs.File(label="input pdf file"), gr.inputs.Textbox(label="Question:")],
102
+ outputs = [gr.outputs.TextBox(label="Answer"), gr.outputs.TextBox(label="Score"), gr.outputs.HTML(label="Reference text")],
103
  examples=examples,
104
  )
105
  iface.launch()