Spaces:
Build error
Build error
Upload 4 files
Browse files- app.py +12 -5
- packages.txt +2 -1
- requirements.txt +5 -2
app.py
CHANGED
@@ -14,6 +14,7 @@ from transformers import DetrFeatureExtractor
|
|
14 |
from transformers import TableTransformerForObjectDetection
|
15 |
import torch
|
16 |
import gradio as gr
|
|
|
17 |
|
18 |
|
19 |
def plot_results_detection(
|
@@ -396,7 +397,15 @@ def postprocess_dataframes(result_tables):
|
|
396 |
return res
|
397 |
|
398 |
|
399 |
-
def process_image(image):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
TD_THRESHOLD = 0.7
|
401 |
TSR_THRESHOLD = 0.8
|
402 |
padd_top = 100
|
@@ -461,13 +470,11 @@ examples = [["image_0.png"]]
|
|
461 |
|
462 |
iface = gr.Interface(
|
463 |
fn=process_image,
|
464 |
-
inputs=gr.Image(type="pil"),
|
465 |
outputs="text",
|
466 |
title=title,
|
467 |
description=description,
|
468 |
article=article,
|
469 |
examples=examples,
|
470 |
)
|
471 |
-
iface.launch(debug=True)
|
472 |
-
|
473 |
-
|
|
|
14 |
from transformers import TableTransformerForObjectDetection
|
15 |
import torch
|
16 |
import gradio as gr
|
17 |
+
import pdf2image
|
18 |
|
19 |
|
20 |
def plot_results_detection(
|
|
|
397 |
return res
|
398 |
|
399 |
|
400 |
+
def process_image(image, pdf):
|
401 |
+
if pdf:
|
402 |
+
path_to_pdf = pdf.name
|
403 |
+
# convert PDF to PIL images (one image by page)
|
404 |
+
first_page=True # we want here only the first page as image
|
405 |
+
if first_page: last_page = 1
|
406 |
+
else: last_page = None
|
407 |
+
imgs = pdf2image.convert_from_path(path_to_pdf, last_page=last_page)
|
408 |
+
image = imgs[0]
|
409 |
TD_THRESHOLD = 0.7
|
410 |
TSR_THRESHOLD = 0.8
|
411 |
padd_top = 100
|
|
|
470 |
|
471 |
iface = gr.Interface(
|
472 |
fn=process_image,
|
473 |
+
inputs=[gr.Image(type="pil"), gr.File(label="PDF")]
|
474 |
outputs="text",
|
475 |
title=title,
|
476 |
description=description,
|
477 |
article=article,
|
478 |
examples=examples,
|
479 |
)
|
480 |
+
iface.launch(debug=True)
|
|
|
|
packages.txt
CHANGED
@@ -1 +1,2 @@
|
|
1 |
-
tesseract-ocr
|
|
|
|
1 |
+
tesseract-ocr
|
2 |
+
poppler-utils
|
requirements.txt
CHANGED
@@ -1,9 +1,12 @@
|
|
1 |
opencv-python
|
2 |
pytesseract
|
3 |
Pillow
|
4 |
-
gradio
|
5 |
timm
|
6 |
transformers
|
7 |
numpy
|
8 |
pandas
|
9 |
-
torch
|
|
|
|
|
|
|
|
1 |
opencv-python
|
2 |
pytesseract
|
3 |
Pillow
|
4 |
+
gradio
|
5 |
timm
|
6 |
transformers
|
7 |
numpy
|
8 |
pandas
|
9 |
+
torch
|
10 |
+
craft_text_detector
|
11 |
+
transformers[sentencepiece]
|
12 |
+
pdf2image
|