File size: 3,035 Bytes
d748bf5
 
 
4769339
6c71924
 
 
 
 
ac7b15a
 
4769339
 
 
6c71924
4769339
 
 
 
 
 
 
 
981daf7
 
 
 
 
4769339
 
 
 
 
 
 
 
 
 
 
 
 
 
29c2d45
 
 
 
4769339
 
 
 
 
 
ac7b15a
cfa812c
ac7b15a
4769339
6199455
ac7b15a
4769339
ac7b15a
4769339
ac7b15a
 
 
 
72386ad
6c47f29
72386ad
3b9c7e0
ac7b15a
cfa812c
3b9c7e0
ac7b15a
72386ad
95ea484
ac7b15a
423104f
ac7b15a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import os
os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')

import deepdoctection as dd

from deepdoctection.extern.model import ModelProfile
from deepdoctection.analyzer.dd import build_analyzer, _auto_select_lib_and_device, _maybe_copy_config_to_cache
from deepdoctection.utils.metacfg import set_config_by_yaml

import gradio as gr

_DD_ONE = "deepdoctection/configs/conf_dd_one.yaml"
_TESSERACT = "deepdoctection/configs/conf_tesseract.yaml"

dd.ModelCatalog.register("layout/model_final_inf_only.pt",ModelProfile(
            name="layout/model_final_inf_only.pt",
            description="Detectron2 layout detection model trained on private datasets",
            config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml",
            size=[274632215],
            tp_model=False,
            hf_repo_id=os.environ.get("HF_REPO"),
            hf_model_name="model_final_inf_only.pt",
            hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"],
            categories={"1": dd.LayoutType.text,
                        "2": dd.LayoutType.title,
                        "3": dd.LayoutType.list,
                        "4": dd.LayoutType.table,
                        "5": dd.LayoutType.figure},
        ))
        
def get_space_dd_analyzer():
    # get a dd analyzer with a special layout model
    lib, device = _auto_select_lib_and_device()
    dd_one_config_path = _maybe_copy_config_to_cache(_DD_ONE)
    _maybe_copy_config_to_cache(_TESSERACT)

    # Set up of the configuration and logging
    cfg = set_config_by_yaml(dd_one_config_path)

    cfg.freeze(freezed=False)
    cfg.LIB = lib
    cfg.DEVICE = device
    cfg.TAB = True
    cfg.TAB_REF = True
    cfg.OCR = True
    cfg.LANG = None
    cfg.WEIGHTS.D2LAYOUT = "layout/model_final_inf_only.pt"
    cfg.freeze()
    
    return build_analyzer(cfg)
    
    

def analyze_image(img):
    # creating an image object and passing to the analyzer by using dataflows
    image = dd.Image(file_name="input.png", location="")
    image.image = img[:,:,::-1]

    df = dd.DataFromList(lst=[image])

    analyzer = get_space_dd_analyzer()

    df = analyzer.analyze(dataset_dataflow=df)
    df.reset_state()
    dp = next(iter(df))
    out = dp.as_dict()
    out.pop("image")
    
    return dp.viz(show_table_structure=False), out

inputs = [gr.inputs.Image(type='numpy', label="Original Image")]
outputs = [gr.outputs.Image(type="numpy", label="Output Image"), gr.JSON()]

title = "Deepdoctection - A Document AI Package"
description = "Demonstration of layout analysis and output of a document page. This demo uses the deepdoctection analyzer with Tesseract's OCR engine. Models detect text, titles, tables, figures and lists as well as table cells. Based on the layout it determines reading order and generates an JSON output."

examples = [['sample_1.jpg'],['sample_2.png']]

gr.Interface(analyze_image, inputs, outputs, title=title, description=description, examples=examples).launch()