Spaces:
Build error
Build error
Ankur Goyal
commited on
Commit
•
47b5f74
1
Parent(s):
98e826c
Add better examples and confidence threshold
Browse files- acze.png +0 -0
- app.py +22 -13
- contract.jpeg +0 -0
- north_sea.pdf +0 -0
- north_sea.png +0 -0
- statement.pdf +0 -0
- statement.png +0 -0
acze.png
ADDED
app.py
CHANGED
@@ -73,19 +73,21 @@ def normalize_bbox(box, width, height, padding=0.005):
|
|
73 |
EXAMPLES = [
|
74 |
[
|
75 |
"invoice.png",
|
76 |
-
"Invoice
|
77 |
],
|
78 |
[
|
79 |
-
"
|
80 |
-
"
|
81 |
],
|
82 |
[
|
83 |
-
"
|
84 |
-
"
|
85 |
],
|
86 |
]
|
87 |
|
88 |
-
QUESTION_FILES = {
|
|
|
|
|
89 |
|
90 |
FIELDS = {
|
91 |
"Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
|
@@ -98,6 +100,8 @@ FIELDS = {
|
|
98 |
"Invoice Total": ["Invoice Total?"],
|
99 |
"Amount Due": ["Amount Due?"],
|
100 |
"Payment Terms": ["Payment Terms?"],
|
|
|
|
|
101 |
}
|
102 |
|
103 |
|
@@ -150,7 +154,7 @@ colors = ["#64A087", "green", "black"]
|
|
150 |
|
151 |
|
152 |
def annotate_page(prediction, pages, document):
|
153 |
-
if "word_ids" in prediction:
|
154 |
image = pages[prediction["page"]]
|
155 |
draw = ImageDraw.Draw(image, "RGBA")
|
156 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
@@ -192,9 +196,14 @@ def process_fields(document, fields, model=list(CHECKPOINTS.keys())[0]):
|
|
192 |
table = []
|
193 |
|
194 |
for (field_name, questions) in fields.items():
|
195 |
-
answers = [
|
|
|
|
|
|
|
|
|
|
|
196 |
answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
|
197 |
-
top = answers[0]
|
198 |
annotate_page(top, pages, document)
|
199 |
ret[field_name] = top
|
200 |
table.append([field_name, top.get("answer") if top is not None else None])
|
@@ -400,8 +409,8 @@ with gr.Blocks(css=CSS) as demo:
|
|
400 |
None, # document
|
401 |
# {**FIELDS}, # fields
|
402 |
gr.update(visible=False, value=None), # output
|
403 |
-
|
404 |
-
|
405 |
None,
|
406 |
None,
|
407 |
None,
|
@@ -414,8 +423,8 @@ with gr.Blocks(css=CSS) as demo:
|
|
414 |
document,
|
415 |
# fields,
|
416 |
output,
|
417 |
-
|
418 |
-
|
419 |
example_image,
|
420 |
upload,
|
421 |
url,
|
|
|
73 |
EXAMPLES = [
|
74 |
[
|
75 |
"invoice.png",
|
76 |
+
"East Repair Invoice",
|
77 |
],
|
78 |
[
|
79 |
+
"acze.png",
|
80 |
+
"ACZE Invoice",
|
81 |
],
|
82 |
[
|
83 |
+
"north_sea.png",
|
84 |
+
"North Sea Invoice",
|
85 |
],
|
86 |
]
|
87 |
|
88 |
+
QUESTION_FILES = {
|
89 |
+
"North Sea Invoice": "north_sea.pdf",
|
90 |
+
}
|
91 |
|
92 |
FIELDS = {
|
93 |
"Vendor Name": ["Vendor Name - Logo?", "Vendor Name - Address?"],
|
|
|
100 |
"Invoice Total": ["Invoice Total?"],
|
101 |
"Amount Due": ["Amount Due?"],
|
102 |
"Payment Terms": ["Payment Terms?"],
|
103 |
+
"Remit To Name": ["Remit To Name?"],
|
104 |
+
"Remit To Address": ["Remit To Address?"],
|
105 |
}
|
106 |
|
107 |
|
|
|
154 |
|
155 |
|
156 |
def annotate_page(prediction, pages, document):
|
157 |
+
if prediction is not None and "word_ids" in prediction:
|
158 |
image = pages[prediction["page"]]
|
159 |
draw = ImageDraw.Draw(image, "RGBA")
|
160 |
word_boxes = lift_word_boxes(document, prediction["page"])
|
|
|
196 |
table = []
|
197 |
|
198 |
for (field_name, questions) in fields.items():
|
199 |
+
answers = [
|
200 |
+
a
|
201 |
+
for q in questions
|
202 |
+
for a in ensure_list(run_pipeline(model, q, document, top_k=1))
|
203 |
+
if a.get("score", 1) > 0.5
|
204 |
+
]
|
205 |
answers.sort(key=lambda x: -x.get("score", 0) if x else 0)
|
206 |
+
top = answers[0] if len(answers) > 0 else None
|
207 |
annotate_page(top, pages, document)
|
208 |
ret[field_name] = top
|
209 |
table.append([field_name, top.get("answer") if top is not None else None])
|
|
|
409 |
None, # document
|
410 |
# {**FIELDS}, # fields
|
411 |
gr.update(visible=False, value=None), # output
|
412 |
+
gr.update(**empty_table(fields.value)), # output_table
|
413 |
+
gr.update(visible=False),
|
414 |
None,
|
415 |
None,
|
416 |
None,
|
|
|
423 |
document,
|
424 |
# fields,
|
425 |
output,
|
426 |
+
output_table,
|
427 |
+
img_clear_button,
|
428 |
example_image,
|
429 |
upload,
|
430 |
url,
|
contract.jpeg
DELETED
Binary file (124 kB)
|
|
north_sea.pdf
ADDED
Binary file (70.9 kB). View file
|
|
north_sea.png
ADDED
statement.pdf
DELETED
The diff for this file is too large to render.
See raw diff
|
|
statement.png
DELETED
Binary file (140 kB)
|
|