Ankur Goyal commited on
Commit
1af0b6d
1 Parent(s): 2919076

Draw a box over the answer

Browse files
Files changed (1) hide show
  1. app.py +43 -8
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
 
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
 
 
5
  import streamlit as st
6
 
7
  import torch
@@ -24,8 +25,28 @@ def construct_pipeline():
24
 
25
 
26
  @st.cache
27
- def run_pipeline(question, document):
28
- return construct_pipeline()(question=question, **document.context)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
 
31
  st.markdown("# DocQuery: Query Documents w/ NLP")
@@ -75,16 +96,30 @@ question = st.text_input("QUESTION", "")
75
  document = st.session_state.document
76
  loading_placeholder = st.empty()
77
  if document is not None:
78
- col1, col2 = st.columns(2)
79
- col1.image(document.preview, use_column_width=True)
80
 
81
- if document is not None and question is not None and len(question) > 0:
82
- predictions = run_pipeline(question=question, document=document)
83
 
 
 
84
  col2.header("Answers")
85
- for p in ensure_list(predictions):
86
- col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
90
 
 
2
 
3
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
4
 
5
+ from PIL import ImageDraw
6
  import streamlit as st
7
 
8
  import torch
 
25
 
26
 
27
  @st.cache
28
+ def run_pipeline(question, document, top_k):
29
+ return construct_pipeline()(question=question, **document.context, top_k=top_k)
30
+
31
+
32
+ # TODO: Move into docquery
33
+ # TODO: Support words past the first page (or window?)
34
+ def lift_word_boxes(document):
35
+ return document.context["image"][0][1]
36
+
37
+
38
+ def expand_bbox(word_boxes):
39
+ if len(word_boxes) == 0:
40
+ return None
41
+
42
+ min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
43
+ return [min(min_x), min(min_y), max(max_x), max(max_y)]
44
+
45
+
46
+ # LayoutLM boxes are normalized to 0, 1000
47
+ def normalize_bbox(box, width, height):
48
+ pct = [c / 1000 for c in box]
49
+ return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
50
 
51
 
52
  st.markdown("# DocQuery: Query Documents w/ NLP")
 
96
  document = st.session_state.document
97
  loading_placeholder = st.empty()
98
  if document is not None:
99
+ col1, col2 = st.columns([3, 1])
100
+ image = document.preview
101
 
 
 
102
 
103
+ colors = ["blue", "red", "green"]
104
+ if document is not None and question is not None and len(question) > 0:
105
  col2.header("Answers")
 
 
106
 
107
+ predictions = run_pipeline(question=question, document=document, top_k=1)
108
+
109
+ word_boxes = lift_word_boxes(document)
110
+ image = image.copy()
111
+ draw = ImageDraw.Draw(image)
112
+ for i, p in enumerate(ensure_list(predictions)):
113
+ col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
114
+ x1, y1, x2, y2 = normalize_bbox(
115
+ expand_bbox(word_boxes[p["start"] : p["end"] + 1]),
116
+ image.width,
117
+ image.height,
118
+ )
119
+ draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i])
120
+
121
+ if document is not None:
122
+ col1.image(image, use_column_width=True)
123
 
124
  "DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
125