Add model implementation and training output

Browse files

Files changed (12) hide show

.gitignore +178 -0
Detectron2_DocLayNet.ipynb +0 -0
README.md +41 -0
config.yml +328 -0
load_the_model.ipynb +0 -0
metadata.json +1 -0
output/events.out.tfevents.1724410384.Legion.1993.0 +3 -0
output/events.out.tfevents.1725745992.Legion.508902.0 +3 -0
output/last_checkpoint +1 -0
output/metrics.json +0 -0
output/model_final.pth +3 -0
requirements.txt +180 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,178 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# output folders
+/output
+/output_first_try
+/output_second_try
+/output_publaynet
+# datasets
+/FUNSD
+# pictures
+*.jpg
+*.png
+# configs
+*.json

Detectron2_DocLayNet.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md ADDED Viewed

	@@ -0,0 +1,41 @@

+# Detectron2 + DocLayNet
+Model made for document layout analysis
+## Load the model
+First install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+In a `.py` or `.ipynb` file:
+```python
+import cv2
+import json
+import matplotlib.pyplot as plt
+from detectron2.utils.visualizer import Visualizer
+from detectron2.data import Metadata
+from detectron2.config import get_cfg
+from detectron2.engine import DefaultPredictor
+cfg = get_cfg()
+cfg.merge_from_file("config.yml")
+cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7  # set the testing threshold for this model
+with open("metadata.json", "r") as f:
+    metadata_dict = json.load(f)
+predictor = DefaultPredictor(cfg)
+metadata = Metadata()
+metadata.set(thing_classes=metadata_dict["thing_classes"])
+im = cv2.imread("image.jpg")
+output = predictor(im)
+v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=0.8)
+v = v.draw_instance_predictions(output["instances"].to("cpu"))
+plt.figure(figsize=(14,10))
+plt.imshow(cv2.cvtColor(v.get_image()[:, :, ::-1], cv2.COLOR_BGR2RGB))
+plt.show()
+```

config.yml ADDED Viewed

	@@ -0,0 +1,328 @@

+CUDNN_BENCHMARK: false
+DATALOADER:
+  ASPECT_RATIO_GROUPING: true
+  FILTER_EMPTY_ANNOTATIONS: true
+  NUM_WORKERS: 2
+  REPEAT_SQRT: true
+  REPEAT_THRESHOLD: 0.0
+  SAMPLER_TRAIN: TrainingSampler
+DATASETS:
+  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
+  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
+  PROPOSAL_FILES_TEST: []
+  PROPOSAL_FILES_TRAIN: []
+  TEST:
+  - test
+  TRAIN:
+  - train
+GLOBAL:
+  HACK: 1.0
+INPUT:
+  CROP:
+    ENABLED: false
+    SIZE:
+    - 0.9
+    - 0.9
+    TYPE: relative_range
+  FORMAT: BGR
+  MASK_FORMAT: polygon
+  MAX_SIZE_TEST: 1333
+  MAX_SIZE_TRAIN: 1333
+  MIN_SIZE_TEST: 800
+  MIN_SIZE_TRAIN:
+  - 640
+  - 672
+  - 704
+  - 736
+  - 768
+  - 800
+  MIN_SIZE_TRAIN_SAMPLING: choice
+  RANDOM_FLIP: horizontal
+MODEL:
+  ANCHOR_GENERATOR:
+    ANGLES:
+    - - -90
+      - 0
+      - 90
+    ASPECT_RATIOS:
+    - - 0.5
+      - 1.0
+      - 2.0
+    NAME: DefaultAnchorGenerator
+    OFFSET: 0.0
+    SIZES:
+    - - 32
+    - - 64
+    - - 128
+    - - 256
+    - - 512
+  BACKBONE:
+    FREEZE_AT: 2
+    NAME: build_resnet_fpn_backbone
+  DEVICE: cuda
+  FPN:
+    FUSE_TYPE: sum
+    IN_FEATURES:
+    - res2
+    - res3
+    - res4
+    - res5
+    NORM: ''
+    OUT_CHANNELS: 256
+  KEYPOINT_ON: false
+  LOAD_PROPOSALS: false
+  MASK_ON: false
+  META_ARCHITECTURE: GeneralizedRCNN
+  PANOPTIC_FPN:
+    COMBINE:
+      ENABLED: true
+      INSTANCES_CONFIDENCE_THRESH: 0.5
+      OVERLAP_THRESH: 0.5
+      STUFF_AREA_LIMIT: 4096
+    INSTANCE_LOSS_WEIGHT: 1.0
+  PIXEL_MEAN:
+  - 103.53
+  - 116.28
+  - 123.675
+  PIXEL_STD:
+  - 1.0
+  - 1.0
+  - 1.0
+  PROPOSAL_GENERATOR:
+    MIN_SIZE: 0
+    NAME: RPN
+  RESNETS:
+    DEFORM_MODULATED: false
+    DEFORM_NUM_GROUPS: 1
+    DEFORM_ON_PER_STAGE:
+    - false
+    - false
+    - false
+    - false
+    DEPTH: 101
+    NORM: FrozenBN
+    NUM_GROUPS: 1
+    OUT_FEATURES:
+    - res2
+    - res3
+    - res4
+    - res5
+    RES2_OUT_CHANNELS: 256
+    RES5_DILATION: 1
+    STEM_OUT_CHANNELS: 64
+    STRIDE_IN_1X1: true
+    WIDTH_PER_GROUP: 64
+  RETINANET:
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_WEIGHTS: &id002
+    - 1.0
+    - 1.0
+    - 1.0
+    - 1.0
+    FOCAL_LOSS_ALPHA: 0.25
+    FOCAL_LOSS_GAMMA: 2.0
+    IN_FEATURES:
+    - p3
+    - p4
+    - p5
+    - p6
+    - p7
+    IOU_LABELS:
+    - 0
+    - -1
+    - 1
+    IOU_THRESHOLDS:
+    - 0.4
+    - 0.5
+    NMS_THRESH_TEST: 0.5
+    NORM: ''
+    NUM_CLASSES: 80
+    NUM_CONVS: 4
+    PRIOR_PROB: 0.01
+    SCORE_THRESH_TEST: 0.05
+    SMOOTH_L1_LOSS_BETA: 0.1
+    TOPK_CANDIDATES_TEST: 1000
+  ROI_BOX_CASCADE_HEAD:
+    BBOX_REG_WEIGHTS:
+    - &id001
+      - 10.0
+      - 10.0
+      - 5.0
+      - 5.0
+    - - 20.0
+      - 20.0
+      - 10.0
+      - 10.0
+    - - 30.0
+      - 30.0
+      - 15.0
+      - 15.0
+    IOUS:
+    - 0.5
+    - 0.6
+    - 0.7
+  ROI_BOX_HEAD:
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_LOSS_WEIGHT: 1.0
+    BBOX_REG_WEIGHTS: *id001
+    CLS_AGNOSTIC_BBOX_REG: false
+    CONV_DIM: 256
+    FC_DIM: 1024
+    FED_LOSS_FREQ_WEIGHT_POWER: 0.5
+    FED_LOSS_NUM_CLASSES: 50
+    NAME: FastRCNNConvFCHead
+    NORM: ''
+    NUM_CONV: 0
+    NUM_FC: 2
+    POOLER_RESOLUTION: 7
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+    SMOOTH_L1_BETA: 0.0
+    TRAIN_ON_PRED_BOXES: false
+    USE_FED_LOSS: false
+    USE_SIGMOID_CE: false
+  ROI_HEADS:
+    BATCH_SIZE_PER_IMAGE: 128
+    IN_FEATURES:
+    - p2
+    - p3
+    - p4
+    - p5
+    IOU_LABELS:
+    - 0
+    - 1
+    IOU_THRESHOLDS:
+    - 0.5
+    NAME: StandardROIHeads
+    NMS_THRESH_TEST: 0.5
+    NUM_CLASSES: 11
+    POSITIVE_FRACTION: 0.25
+    PROPOSAL_APPEND_GT: true
+    SCORE_THRESH_TEST: 0.7
+  ROI_KEYPOINT_HEAD:
+    CONV_DIMS:
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    - 512
+    LOSS_WEIGHT: 1.0
+    MIN_KEYPOINTS_PER_IMAGE: 1
+    NAME: KRCNNConvDeconvUpsampleHead
+    NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS: true
+    NUM_KEYPOINTS: 17
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+  ROI_MASK_HEAD:
+    CLS_AGNOSTIC_MASK: false
+    CONV_DIM: 256
+    NAME: MaskRCNNConvUpsampleHead
+    NORM: ''
+    NUM_CONV: 4
+    POOLER_RESOLUTION: 14
+    POOLER_SAMPLING_RATIO: 0
+    POOLER_TYPE: ROIAlignV2
+  RPN:
+    BATCH_SIZE_PER_IMAGE: 256
+    BBOX_REG_LOSS_TYPE: smooth_l1
+    BBOX_REG_LOSS_WEIGHT: 1.0
+    BBOX_REG_WEIGHTS: *id002
+    BOUNDARY_THRESH: -1
+    CONV_DIMS:
+    - -1
+    HEAD_NAME: StandardRPNHead
+    IN_FEATURES:
+    - p2
+    - p3
+    - p4
+    - p5
+    - p6
+    IOU_LABELS:
+    - 0
+    - -1
+    - 1
+    IOU_THRESHOLDS:
+    - 0.3
+    - 0.7
+    LOSS_WEIGHT: 1.0
+    NMS_THRESH: 0.7
+    POSITIVE_FRACTION: 0.5
+    POST_NMS_TOPK_TEST: 1000
+    POST_NMS_TOPK_TRAIN: 1000
+    PRE_NMS_TOPK_TEST: 1000
+    PRE_NMS_TOPK_TRAIN: 2000
+    SMOOTH_L1_BETA: 0.0
+  SEM_SEG_HEAD:
+    COMMON_STRIDE: 4
+    CONVS_DIM: 128
+    IGNORE_VALUE: 255
+    IN_FEATURES:
+    - p2
+    - p3
+    - p4
+    - p5
+    LOSS_WEIGHT: 1.0
+    NAME: SemSegFPNHead
+    NORM: GN
+    NUM_CLASSES: 54
+  WEIGHTS: ./output/model_final.pth
+OUTPUT_DIR: ./output
+SEED: -1
+SOLVER:
+  AMP:
+    ENABLED: false
+  BASE_LR: 1.0e-05
+  BASE_LR_END: 0.0
+  BIAS_LR_FACTOR: 1.0
+  CHECKPOINT_PERIOD: 5000
+  CLIP_GRADIENTS:
+    CLIP_TYPE: value
+    CLIP_VALUE: 1.0
+    ENABLED: false
+    NORM_TYPE: 2.0
+  GAMMA: 0.1
+  IMS_PER_BATCH: 2
+  LR_SCHEDULER_NAME: WarmupMultiStepLR
+  MAX_ITER: 85000
+  MOMENTUM: 0.9
+  NESTEROV: false
+  NUM_DECAYS: 3
+  REFERENCE_WORLD_SIZE: 0
+  RESCALE_INTERVAL: false
+  STEPS:
+  - 210000
+  - 250000
+  WARMUP_FACTOR: 0.001
+  WARMUP_ITERS: 1000
+  WARMUP_METHOD: linear
+  WEIGHT_DECAY: 0.0001
+  WEIGHT_DECAY_BIAS: null
+  WEIGHT_DECAY_NORM: 0.0
+TEST:
+  AUG:
+    ENABLED: false
+    FLIP: true
+    MAX_SIZE: 4000
+    MIN_SIZES:
+    - 400
+    - 500
+    - 600
+    - 700
+    - 800
+    - 900
+    - 1000
+    - 1100
+    - 1200
+  DETECTIONS_PER_IMAGE: 100
+  EVAL_PERIOD: 0
+  EXPECTED_RESULTS: []
+  KEYPOINT_OKS_SIGMAS: []
+  PRECISE_BN:
+    ENABLED: false
+    NUM_ITER: 200
+VERSION: 2
+VIS_PERIOD: 0

load_the_model.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

metadata.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"name": "train", "thing_classes": ["Caption", "Footnote", "Formula", "List-item", "Page-footer", "Page-header", "Picture", "Section-header", "Table", "Text", "Title"]}

output/events.out.tfevents.1724410384.Legion.1993.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d34d88c2bc95494c1e75c1e46bb2d5d7123d30f189cea548a9291c48adb02222
+size 3934313

output/events.out.tfevents.1725745992.Legion.508902.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ab5ae006e63f68b45f2706cf1c815c502c15257acb92872e19db70a1e551440
+size 171412

output/last_checkpoint ADDED Viewed

	@@ -0,0 +1 @@


1	+ model_final.pth

output/metrics.json ADDED Viewed

The diff for this file is too large to render. See raw diff

output/model_final.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c645471f4eede20a57da47df492c71e8b21e6f956ef84dee801ec15a262df1c
+size 351189924

requirements.txt ADDED Viewed

	@@ -0,0 +1,180 @@

+absl-py==2.1.0
+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+antlr4-python3-runtime==4.9.3
+anyio==3.5.0
+asttokens==2.4.1
+async-timeout==4.0.3
+attrs==24.2.0
+Babel==2.8.0
+beautifulsoup4==4.10.0
+black==24.8.0
+blinker==1.4
+certifi==2024.7.4
+chardet==4.0.0
+charset-normalizer==3.3.2
+click==8.1.7
+cloudpickle==3.0.0
+colorama==0.4.4
+comm==0.2.2
+command-not-found==0.3
+commonmark==0.9.1
+contourpy==1.2.1
+cov-core==1.15.0
+coverage==6.2
+cryptography==3.4.8
+cycler==0.12.1
+datasets==2.21.0
+dbus-python==1.2.18
+debugpy==1.8.5
+decorator==5.1.1
+detectron2 @ git+https://github.com/facebookresearch/detectron2.git@bcfd464d0c810f0442d91a349c0f6df945467143
+dill==0.3.8
+distro==1.7.0
+distro-info==1.1+ubuntu0.2
+exceptiongroup==1.2.2
+execnet==1.9.0
+executing==2.0.1
+filelock==3.15.4
+flake8==4.0.1
+fonttools==4.53.1
+frozenlist==1.4.1
+fsspec==2024.6.1
+fvcore==0.1.5.post20221221
+grpcio==1.65.5
+h11==0.13.0
+html5lib==1.1
+httpcore==0.14.5
+httplib2==0.20.2
+httpx==0.22.0
+huggingface-hub==0.24.6
+hydra-core==1.3.2
+idna==3.7
+importlib-metadata==4.6.4
+iniconfig==1.1.1
+iopath==0.1.9
+ipykernel==6.29.5
+ipython==8.26.0
+isort==5.6.4
+jedi==0.19.1
+jeepney==0.7.1
+Jinja2==3.1.4
+jupyter_client==8.6.2
+jupyter_core==5.7.2
+keyring==23.5.0
+kiwisolver==1.4.5
+launchpadlib==1.10.16
+lazr.restfulclient==0.14.4
+lazr.uri==1.0.6
+livereload==2.6.3
+lxml==4.8.0
+Markdown==3.7
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mccabe==0.6.1
+mercurial==6.1.1
+mkdocs==1.1.2
+more-itertools==8.10.0
+mpmath==1.3.0
+multidict==6.0.5
+multiprocess==0.70.16
+mypy==0.942
+mypy-extensions==1.0.0
+nala==0.11.1
+nest-asyncio==1.6.0
+netifaces==0.11.0
+networkx==3.3
+nose2==0.9.2
+numpy==2.1.0
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==9.1.0.70
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.6.20
+nvidia-nvtx-cu12==12.1.105
+oauthlib==3.2.0
+omegaconf==2.3.0
+opencv-python==4.10.0.84
+opencv-python-headless==4.10.0.84
+packaging==24.1
+pandas==2.2.2
+parso==0.8.4
+pathspec==0.12.1
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.2.2
+pluggy==0.13.0
+portalocker==2.10.1
+prompt_toolkit==3.0.47
+protobuf==5.27.3
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+py==1.10.0
+pyarrow==17.0.0
+pycocotools==2.0.8
+pycodestyle==2.8.0
+pyflakes==2.4.0
+Pygments==2.18.0
+PyGObject==3.42.1
+pyinotify==0.9.6
+PyJWT==2.3.0
+pyparsing==3.1.2
+pytest==6.2.5
+pytest-cov==3.0.0
+pytest-forked==1.4.0
+pytest-sugar==0.9.4
+pytest-xdist==2.5.0
+python-apt==2.4.0+ubuntu3
+python-dateutil==2.9.0.post0
+pytz==2024.1
+PyYAML==6.0.2
+pyzmq==26.1.1
+requests==2.32.3
+rfc3986==1.5.0
+rich==11.2.0
+SecretStorage==3.3.1
+shellingham==1.4.0
+six==1.16.0
+sniffio==1.2.0
+socksio==1.0.0
+soupsieve==2.3.1
+stack-data==0.6.3
+sympy==1.13.2
+systemd-python==234
+tabulate==0.9.0
+tensorboard==2.17.1
+tensorboard-data-server==0.7.2
+termcolor==2.4.0
+toml==0.10.2
+tomli==2.0.1
+torch==2.4.0
+torchvision==0.19.0
+tornado==6.4.1
+tqdm==4.66.5
+traitlets==5.14.3
+triton==3.0.0
+typed-ast==1.4.3
+typer==0.4.0
+typing_extensions==4.12.2
+tzdata==2024.1
+ubuntu-pro-client==8001
+ufw==0.36.1
+unattended-upgrades==0.1
+urllib3==2.2.2
+wadllib==1.3.6
+wcwidth==0.2.13
+webencodings==0.5.1
+Werkzeug==3.0.3
+xxhash==3.5.0
+yacs==0.1.8
+yarl==1.9.4
+zipp==1.0.0