import os os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html') # work around: https://discuss.huggingface.co/t/how-to-install-a-specific-version-of-gradio-in-spaces/13552 os.system("pip uninstall -y gradio") os.system("pip install gradio==3.4.1") from os import getcwd, path, environ import deepdoctection as dd from deepdoctection.dataflow.serialize import DataFromList import gradio as gr _DD_ONE = "conf_dd_one.yaml" _DETECTIONS = ["table", "ocr"] dd.ModelCatalog.register("layout/model_final_inf_only.pt",dd.ModelProfile( name="layout/model_final_inf_only.pt", description="Detectron2 layout detection model trained on private datasets", config="dd/d2/layout/CASCADE_RCNN_R_50_FPN_GN.yaml", size=[274632215], tp_model=False, hf_repo_id=environ.get("HF_REPO"), hf_model_name="model_final_inf_only.pt", hf_config_file=["Base-RCNN-FPN.yaml", "CASCADE_RCNN_R_50_FPN_GN.yaml"], categories={"1": dd.LayoutType.text, "2": dd.LayoutType.title, "3": dd.LayoutType.list, "4": dd.LayoutType.table, "5": dd.LayoutType.figure}, )) # Set up of the configuration and logging. Models are globally defined, so that they are not re-loaded once the input # updates cfg = dd.set_config_by_yaml(path.join(getcwd(),_DD_ONE)) cfg.freeze(freezed=False) cfg.DEVICE = "cpu" cfg.freeze() # layout detector layout_config_path = dd.ModelCatalog.get_full_path_configs(cfg.CONFIG.D2LAYOUT) layout_weights_path = dd.ModelDownloadManager.maybe_download_weights_and_configs(cfg.WEIGHTS.D2LAYOUT) categories_layout = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2LAYOUT).categories assert categories_layout is not None assert layout_weights_path is not None d_layout = dd.D2FrcnnDetector(layout_config_path, layout_weights_path, categories_layout, device=cfg.DEVICE) # cell detector cell_config_path = dd.ModelCatalog.get_full_path_configs(cfg.CONFIG.D2CELL) cell_weights_path = dd.ModelDownloadManager.maybe_download_weights_and_configs(cfg.WEIGHTS.D2CELL) categories_cell = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2CELL).categories assert categories_cell is not None d_cell = dd.D2FrcnnDetector(cell_config_path, cell_weights_path, categories_cell, device=cfg.DEVICE) # row/column detector item_config_path = dd.ModelCatalog.get_full_path_configs(cfg.CONFIG.D2ITEM) item_weights_path = dd.ModelDownloadManager.maybe_download_weights_and_configs(cfg.WEIGHTS.D2ITEM) categories_item = dd.ModelCatalog.get_profile(cfg.WEIGHTS.D2ITEM).categories assert categories_item is not None d_item = dd.D2FrcnnDetector(item_config_path, item_weights_path, categories_item, device=cfg.DEVICE) # word detector det = dd.DoctrTextlineDetector() # text recognizer rec = dd.DoctrTextRecognizer() def build_gradio_analyzer(table, table_ref, ocr): """Building the Detectron2/DocTr analyzer based on the given config""" cfg.freeze(freezed=False) cfg.TAB = table cfg.TAB_REF = table_ref cfg.OCR = ocr cfg.freeze() pipe_component_list = [] layout = dd.ImageLayoutService(d_layout, to_image=True, crop_image=True) pipe_component_list.append(layout) if cfg.TAB: cell = dd.SubImageLayoutService(d_cell, dd.LayoutType.table, {1: 6}, True) pipe_component_list.append(cell) item = dd.SubImageLayoutService(d_item, dd.LayoutType.table, {1: 7, 2: 8}, True) pipe_component_list.append(item) table_segmentation = dd.TableSegmentationService( cfg.SEGMENTATION.ASSIGNMENT_RULE, cfg.SEGMENTATION.IOU_THRESHOLD_ROWS if cfg.SEGMENTATION.ASSIGNMENT_RULE in ["iou"] else cfg.SEGMENTATION.IOA_THRESHOLD_ROWS, cfg.SEGMENTATION.IOU_THRESHOLD_COLS if cfg.SEGMENTATION.ASSIGNMENT_RULE in ["iou"] else cfg.SEGMENTATION.IOA_THRESHOLD_COLS, cfg.SEGMENTATION.FULL_TABLE_TILING, cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_ROWS, cfg.SEGMENTATION.REMOVE_IOU_THRESHOLD_COLS, ) pipe_component_list.append(table_segmentation) if cfg.TAB_REF: table_segmentation_refinement = dd.TableSegmentationRefinementService() pipe_component_list.append(table_segmentation_refinement) if cfg.OCR: d_layout_text = dd.ImageLayoutService(det, to_image=True, crop_image=True) pipe_component_list.append(d_layout_text) d_text = dd.TextExtractionService(rec, extract_from_roi="WORD") pipe_component_list.append(d_text) match = dd.MatchingService( parent_categories=cfg.WORD_MATCHING.PARENTAL_CATEGORIES, child_categories=dd.LayoutType.word, matching_rule=cfg.WORD_MATCHING.RULE, threshold=cfg.WORD_MATCHING.IOU_THRESHOLD if cfg.WORD_MATCHING.RULE in ["iou"] else cfg.WORD_MATCHING.IOA_THRESHOLD, ) pipe_component_list.append(match) order = dd.TextOrderService( text_container=dd.LayoutType.word, floating_text_block_names=[dd.LayoutType.title, dd.LayoutType.text, dd.LayoutType.list], text_block_names=[ dd.LayoutType.title, dd.LayoutType.text, dd.LayoutType.list, dd.LayoutType.cell, dd.CellType.header, dd.CellType.body, ], ) pipe_component_list.append(order) pipe = dd.DoctectionPipe(pipeline_component_list=pipe_component_list) return pipe def prepare_output(dp, add_table, add_ocr): out = dp.as_dict() out.pop("_image") layout_items = dp.items if add_ocr: layout_items.sort(key=lambda x: x.reading_order) layout_items_str = "" for item in layout_items: layout_items_str += f"\n {item.layout_type}: {item.text}" if add_table: html_list = [table.html for table in dp.tables] if html_list: html = html_list[0] else: html = None else: html = None return dp.viz(show_table_structure=False), layout_items_str, html, out def analyze_image(img, pdf, attributes): # creating an image object and passing to the analyzer by using dataflows add_table = _DETECTIONS[0] in attributes add_ocr = _DETECTIONS[1] in attributes analyzer = build_gradio_analyzer(add_table, add_table, add_ocr) if img is not None: image = dd.Image(file_name="input.png", location="") image.image = img[:, :, ::-1] df = DataFromList(lst=[image]) df = analyzer.analyze(dataset_dataflow=df) elif pdf: df = analyzer.analyze(path=pdf.name, max_datapoints=3, output="image") else: raise ValueError df.reset_state() df_iter = iter(df) dp = next(df_iter) return prepare_output(dp, add_table, add_ocr) demo = gr.Blocks(css="scrollbar.css") with demo: with gr.Box(): gr.Markdown("