deepdoctection

Runtime error

App Files Files Community

JaMe76 commited on Jul 4, 2023

Commit

b9fe6b2

•

1 Parent(s): 397d15f

update space

Browse files

Files changed (1) hide show

app.py +9 -8

app.py CHANGED Viewed

@@ -1,13 +1,11 @@
 import os
-os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
-credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],"aws_secret_access_key": os.environ["SECRET_KEY"]}
 # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
 os.system("pip uninstall -y gradio")
 os.system("pip install gradio==3.4.1")
 os.system(os.environ["DD_ADDONS"])
 from os import getcwd, path, environ
 import deepdoctection as dd
 from deepdoctection.dataflow.serialize import DataFromList
@@ -16,6 +14,7 @@ from dd_addons.extern import PdfTextDetector, PostProcessor, get_xsl_path
 from dd_addons.pipe.conn import PostProcessorService
 import gradio as gr
 _DD_ONE = "conf_dd_one.yaml"
@@ -97,6 +96,9 @@ d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item
 pdf_text = PdfTextDetector(_XSL_PATH)
 # text detector
 tex_text = dd.TextractOcrDetector(**credentials_kwargs)
@@ -161,10 +163,9 @@ def build_gradio_analyzer():
         order = dd.TextOrderService(
             text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
-            floating_text_block_names=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK,
-            text_block_names=cfg.TEXT_ORDERING.TEXT_BLOCK,
-            text_containers_to_text_block=cfg.TEXT_ORDERING.TEXT_CONTAINER_TO_TEXT_BLOCK
-        )
         pipe_component_list.append(order)
     pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
@@ -182,7 +183,7 @@ def analyze_image(img, pdf, max_datapoints):
     analyzer = build_gradio_analyzer()
     if img is not None:
-        image = dd.Image(file_name="input.png", location="")
         image.image = img[:, :, ::-1]
         df = DataFromList(lst=[image])

 import os
 # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552
 os.system("pip uninstall -y gradio")
 os.system("pip install gradio==3.4.1")
 os.system(os.environ["DD_ADDONS"])
+import time
 from os import getcwd, path, environ
 import deepdoctection as dd
 from deepdoctection.dataflow.serialize import DataFromList
 from dd_addons.pipe.conn import PostProcessorService
 import gradio as gr
+from botocore.config import Config
 _DD_ONE = "conf_dd_one.yaml"
 pdf_text = PdfTextDetector(_XSL_PATH)
 # text detector
+credentials_kwargs={"aws_access_key_id": os.environ["ACCESS_KEY"],
+                    "aws_secret_access_key": os.environ["SECRET_KEY"],
+                    "config": Config(region_name=os.environ["REGION"])}
 tex_text = dd.TextractOcrDetector(**credentials_kwargs)
         order = dd.TextOrderService(
             text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER,
+            floating_text_block_categories=cfg.TEXT_ORDERING.FLOATING_TEXT_BLOCK,
+            text_block_categories=cfg.TEXT_ORDERING.TEXT_BLOCK,
+            include_residual_text_container=cfg.TEXT_ORDERING.TEXT_CONTAINER_TO_TEXT_BLOCK)
         pipe_component_list.append(order)
     pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list)
     analyzer = build_gradio_analyzer()
     if img is not None:
+        image = dd.Image(file_name=str(time.time()).replace(".","") + ".png", location="")
         image.image = img[:, :, ::-1]
         df = DataFromList(lst=[image])